from easybits import Bits import codecs def encode(text): "An encoder which only handles ASCII: non-ASCII characters are replaced with '?'. " "this will remove the most common characters in english texts [e, t, a, o, i, n, s, h] and by writing a 1 and followed by 3 bits" "other charcaters will be written with stadard 8 bit code" "this ecoding has about a 77 percent compression rate" "there is some loss in quality since unknown characters are replaced with ? and not decoded" result = Bits() commonchars = ['e', 't', 'a', 'o', 'i', 'n', 's', 'h'] for char in text: try: b = Bits(char, encoding='ascii') except UnicodeEncodeError: b = Bits('?', encoding='ascii') char = '?' if char in commonchars: result = result.concat(Bits('1')) i = commonchars.index(char) if i == 0: result = result.concat(Bits('000')) elif i == 1: result = result.concat(Bits("001")) elif i == 2: result = result.concat(Bits("010")) elif i == 3: result = result.concat(Bits("011")) elif i == 4: result = result.concat(Bits("100")) elif i == 5: result = result.concat(Bits("101")) elif i == 6: result = result.concat(Bits("110")) elif i == 7: result = result.concat(Bits("111")) else: result = result.concat(b) return result.bytes def decode(data): "Matching decoder. Checks if first bit is 1 or 0. If it is 1, then it changes character out of encoded form, which is 3 bits long. " "If not, it just converts it using standard ascii which is 8 bits." bits = Bits(bytes(data)) text = "" commonchars = ['e', 't', 'a', 'o', 'i', 'n', 's', 'h'] i = 0 while i < len(bits): if bits[i] == 1: byte = bits[i+1:i+4] if byte == Bits('000'): text += commonchars[0] elif byte == Bits('001'): text += commonchars[1] elif byte == Bits('010'): text += commonchars[2] elif byte == Bits('011'): text += commonchars[3] elif byte == Bits('100'): text += commonchars[4] elif byte == Bits('101'): text += commonchars[5] elif byte == Bits('110'): text += commonchars[6] elif byte == Bits('111'): text += commonchars[7] i += 4 else: b = bits[i:i+8] text += b.ascii i += 8 return text def register_codec(encode, decode, name): """Registers a codec so that it can later be used to encode or decode strings and bytes. """ def encode_wrapper(text): return encode(text), len(text) def decode_wrapper(data): return decode(data), len(data) def search_for_codec(query): if query == name: return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name) codecs.register(search_for_codec) register_codec(encode, decode, "commonchars")