lab_compression/text_codecs/ascii7.py

34 lines
1.1 KiB
Python

from custom_codecs.register import register_codec
from easybits import Bits
def encode(text):
"""An encoder which only handles ASCII: non-ASCII characters
are replaced with '?'. Once all the characters are ASCII, this encoder
writes the first seven bits of each byte (the first bit of every
ASCII character is 0, so we can easily reconstruct the full byte
when we decode). Therefore, this encoder compresses ASCII text into
7/8 of the usual size.
"""
result = Bits()
for char in text:
try:
b = Bits(char, encoding='ascii')
except UnicodeEncodeError:
b = Bits('?')
result = result.concat(b[1:])
return result.bytes
def decode(data):
"""The matching decoder. Reads seven bits at a time, putting a 0 on
the front to create a full byte. Then converts this byte into its
ASCII representation.
"""
bits = Bits(bytes(data))
text = ""
for i in range(0, len(bits), 7):
byte = Bits('0').concat(bits[i:i+7])
text += Bits(byte).ascii
return text
register_codec(encode, decode, "ascii7")