Blogged by Ujihisa. Standard methods of programming and thoughts including Clojure, Vim, LLVM, Haskell, Ruby and Mathematics written by a Japanese programmer. github/ujihisa

Thursday, November 3, 2011

Lossless Data Compressions

Run-length encoding

http://en.wikipedia.org/wiki/Run-length_encoding

One of the easiest encoding method to write.

An implementatin in Haskell:

import Data.List (group)
main = do
  print $ runlength "AAAAABBCBADDDDDDDD"

runlength = concatMap f . group
  where
    f xs@(x:_) = x : show (length xs)

"A5B2C1B1A1D8"

Huffman coding

http://en.wikipedia.org/wiki/Huffman_coding

An implementatin in Haskell:

import Data.List (group, sort, sortBy)
import Data.Function (on)
import Data.Map (fromList, lookup)
import Data.Maybe (fromJust)
import Prelude hiding (lookup)

main = print $ huffman "AAAAABBCBADDDDDDDD"

huffman s = flip concatMap s $ fromJust . flip lookup (huffmanMap s)

huffmanMap s =
  let x = map head . sortBy (compare `on` length) . group . sort $ s in
      fromList $ zipWith ((,)) x (bits $ length x)
bits :: Int -> [[Int]]
bits length = (take length $ repeat 1) : reverse (take (length - 1) $ iterate (1 :) [0])

[1,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0]

BPE: Byte Pair Encoding

http://en.wikipedia.org/wiki/Byte_pair_encoding

import Data.List (group, sort, sortBy, (\\))
import Data.Function (on)
import qualified Data.Map as M
import Data.Map (insert, empty, notMember)
import Data.Maybe (fromJust)
import Safe (fromJustDef)

main = do
  let orig = "ABCDCDABCDCDE"
  print orig
  let (encoded, table) = bpe orig
  print (encoded, table)
  print $ bpeDecode encoded table

bpe xs = bpe' xs empty
bpe' xs table =
  let x = head $ sortBy (flip compare `on` length) $ group $ sort $ zipWith ((,)) xs (tail xs) in
      if length x == 1
         then (xs, table)
         else let (a, b) = head x
                  new = pickupOther xs table in
                  bpe' (replace2 xs a b new) (insert new (a, b) table)

bpeDecode xs table = concatMap (replace (expand (M.map (\(a, b) -> [a, b]) table))) xs
  where
    replace :: M.Map Char String -> Char -> String
    replace expandedTable c = maybe [c] id $ M.lookup c expandedTable
    expand :: M.Map Char String -> M.Map Char String
    expand table = M.map (concatMap f) table
      where
        f :: Char -> String
        f c = maybe [c] (concatMap f) $ M.lookup c table

pickupOther xs table =
  head $ filter (flip notMember table) $ ['Z', 'Y'..'A'] \\ xs

replace2 (a:[]) _ _ _ = [a]
replace2 (a:b:xs) c d e
  | a == c && b == d = e : replace2 xs c d e
  | otherwise = a : replace2 (b : xs) c d e
  • "ABCDCDABCDCDE"
  • ("WWE",fromList [('W',('X','Z')),('X',('Y','Z')),('Y',('A','B')),('Z',('C','D'))])
  • "ABCDCDABCDCDE"

No comments:

Post a Comment

Followers