lab_compression/text_codecs/alphanumeric.py

40 lines
1.2 KiB
Python

import string
import codecs
from custom_codecs.register import register_codec
from easybits import Bits
allowed_characters = string.ascii_letters + string.digits
def encode(text):
"""A (very) lossy encoder which only saves ASCII letters, numbers, and spaces.
Everything else is discarded. All whitespace (e.g. tabs) is converted into spaces.
"""
ascii_characters = []
last_character_was_space = False
for char in text:
if char in allowed_characters:
ascii_characters.append(char)
last_character_was_space = False
elif char in string.whitespace and not last_character_was_space:
ascii_characters.append(' ')
last_character_was_space = True
ascii_text = ''.join(ascii_characters)
return Bits(ascii_text).bytes
def decode(data):
"""A decoder which reads bytes and returns (string, length),
where length is the length of bytes consumed
"""
text = ""
for i, byte in enumerate(data):
try:
text += Bits(byte, length=8).ascii
except OverflowError:
print(i, byte)
print(text + '|')
print("Error")
breakpoint()
return text
register_codec(encode, decode, "alphanumeric")