generated from mwc/lab_compression
34 lines
1.1 KiB
Python
34 lines
1.1 KiB
Python
from custom_codecs.register import register_codec
|
|
from easybits import Bits
|
|
|
|
def encode(text):
|
|
"""An encoder which only handles ASCII: non-ASCII characters
|
|
are replaced with '?'. Once all the characters are ASCII, this encoder
|
|
writes the first seven bits of each byte (the first bit of every
|
|
ASCII character is 0, so we can easily reconstruct the full byte
|
|
when we decode). Therefore, this encoder compresses ASCII text into
|
|
7/8 of the usual size.
|
|
"""
|
|
result = Bits()
|
|
for char in text:
|
|
try:
|
|
b = Bits(char, encoding='ascii')
|
|
except UnicodeEncodeError:
|
|
b = Bits('?')
|
|
result = result.concat(b[1:])
|
|
return result.bytes
|
|
|
|
def decode(data):
|
|
"""The matching decoder. Reads seven bits at a time, putting a 0 on
|
|
the front to create a full byte. Then converts this byte into its
|
|
ASCII representation.
|
|
"""
|
|
bits = Bits(bytes(data))
|
|
text = ""
|
|
for i in range(0, len(bits), 7):
|
|
byte = Bits('0').concat(bits[i:i+7])
|
|
text += Bits(byte).ascii
|
|
return text
|
|
|
|
register_codec(encode, decode, "ascii7")
|