import string import codecs from custom_codecs.register import register_codec from easybits import Bits allowed_characters = string.ascii_letters + string.digits def encode(text): """A (very) lossy encoder which only saves ASCII letters, numbers, and spaces. Everything else is discarded. All whitespace (e.g. tabs) is converted into spaces. """ ascii_characters = [] last_character_was_space = False for char in text: if char in allowed_characters: ascii_characters.append(char) last_character_was_space = False elif char in string.whitespace and not last_character_was_space: ascii_characters.append(' ') last_character_was_space = True ascii_text = ''.join(ascii_characters) return Bits(ascii_text).bytes def decode(data): """A decoder which reads bytes and returns (string, length), where length is the length of bytes consumed """ text = "" for i, byte in enumerate(data): try: text += Bits(byte, length=8).ascii except OverflowError: print(i, byte) print(text + '|') print("Error") breakpoint() return text register_codec(encode, decode, "alphanumeric")