generated from mwc/lab_compression
57 lines
1.6 KiB
Python
57 lines
1.6 KiB
Python
import string
|
|
import codecs
|
|
from easybits import Bits
|
|
|
|
vowels = 'aeiouyAEIOUY'
|
|
allowed_chars = string.ascii_letters + string.digits
|
|
|
|
def encode(text):
|
|
ascii_novow_chars = []
|
|
last_character_was_space = False
|
|
for char in text:
|
|
if char in allowed_chars and char not in vowels:
|
|
ascii_novow_chars.append(char)
|
|
last_character_was_space = False
|
|
elif char in string.whitespace and not last_character_was_space:
|
|
ascii_novow_chars.append(' ')
|
|
last_character_was_space = True
|
|
clean_text = ''.join(ascii_novow_chars)
|
|
|
|
result = Bits()
|
|
for char in clean_text:
|
|
try:
|
|
b = Bits(char, encoding='ascii')
|
|
result = result.concat(b[1:])
|
|
except:
|
|
b = Bits('?')
|
|
result = result.concat(b[1:])
|
|
return result.bytes
|
|
|
|
def decode(data):
|
|
bits = Bits(bytes(data))
|
|
text = ""
|
|
for i in range(0, len(bits), 7):
|
|
byte = Bits('0').concat(bits[i:i+7])
|
|
text += Bits(byte).ascii
|
|
return text
|
|
|
|
def register_codec(encode, decode, name):
|
|
"""Registers a codec so that it can later be used to encode
|
|
or decode strings and bytes.
|
|
"""
|
|
def encode_wrapper(text):
|
|
return encode(text), len(text)
|
|
|
|
def decode_wrapper(data):
|
|
return decode(data), len(data)
|
|
|
|
def search_for_codec(query):
|
|
if query == name:
|
|
return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name)
|
|
|
|
codecs.register(search_for_codec)
|
|
|
|
|
|
register_codec(encode, decode,"novow7")
|
|
|
|
|