Files
lab_compression/text_codecs/23comp.py
2026-05-10 16:47:39 -04:00

47 lines
1.4 KiB
Python

from easybits import Bits
import codecs
def register_codec(encode, decode, name):
"""Registers a codec so that it can later be used to encode
or decode strings and bytes.
"""
def encode_wrapper(text):
return encode(text), len(text)
def decode_wrapper(data):
return decode(data), len(data)
def search_for_codec(query):
if query == name:
return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name)
codecs.register(search_for_codec)
com_neg= {"the": -1, "and": -2,
"this": -3, "or": -4, "not": -5, "that": -6,
"to": -7, "from": -8, "in": -9, "at": -10, "of": -11,
"because": -12, "but": -13, "be": -14,
"now": -15, "then": -16, "while": -17}
def common_encode(commonword):
return com_neg.get(commonword)
def encode(text):
result = Bits()
for char in text:
if text in com_neg:
result= common_encode(text)
else:
try:
b = Bits(char, encoding='ascii')
except UnicodeEncodeError:
b = Bits('?')
result = result.concat(b[1:])
return result.bytes
def decode(data):
bits = Bits(bytes(data))
text = ""
for i in range(-17, len(bits), 7):
byte = Bits('0').concat(bits[i:i+7])
text += Bits(byte).ascii
return text
register_codec(encode, decode, "ascii7")