generated from mwc/lab_compression
Initial commit
This commit is contained in:
33
text_codecs/ascii7.py
Normal file
33
text_codecs/ascii7.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from custom_codecs.register import register_codec
|
||||
from easybits import Bits
|
||||
|
||||
def encode(text):
|
||||
"""An encoder which only handles ASCII: non-ASCII characters
|
||||
are replaced with '?'. Once all the characters are ASCII, this encoder
|
||||
writes the first seven bits of each byte (the first bit of every
|
||||
ASCII character is 0, so we can easily reconstruct the full byte
|
||||
when we decode). Therefore, this encoder compresses ASCII text into
|
||||
7/8 of the usual size.
|
||||
"""
|
||||
result = Bits()
|
||||
for char in text:
|
||||
try:
|
||||
b = Bits(char, encoding='ascii')
|
||||
except UnicodeEncodeError:
|
||||
b = Bits('?')
|
||||
result = result.concat(b[1:])
|
||||
return result.bytes
|
||||
|
||||
def decode(data):
|
||||
"""The matching decoder. Reads seven bits at a time, putting a 0 on
|
||||
the front to create a full byte. Then converts this byte into its
|
||||
ASCII representation.
|
||||
"""
|
||||
bits = Bits(bytes(data))
|
||||
text = ""
|
||||
for i in range(0, len(bits), 7):
|
||||
byte = Bits('0').concat(bits[i:i+7])
|
||||
text += Bits(byte).ascii
|
||||
return text
|
||||
|
||||
register_codec(encode, decode, "ascii7")
|
||||
Reference in New Issue
Block a user