import string import codecs #from text_codecs.register import register_codec from easybits import Bits allowed_characters = string.ascii_letters + string.digits import codecs def register_codec(encode, decode, name): """Registers a codec so that it can later be used to encode or decode strings and bytes. """ def encode_wrapper(text): return encode(text), len(text) def decode_wrapper(data): return decode(data), len(data) def search_for_codec(query): if query == name: return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name) codecs.register(search_for_codec) def encode(text): """A (very) lossy encoder which only saves ASCII letters, numbers, and spaces. Everything else is discarded. All whitespace (e.g. tabs) is converted into spaces. """ ascii_characters = [] last_character_was_space = False for char in text: if char in allowed_characters: ascii_characters.append(char) last_character_was_space = False elif char in string.whitespace and not last_character_was_space: ascii_characters.append(' ') last_character_was_space = True ascii_text = ''.join(ascii_characters) return Bits(ascii_text).bytes def decode(data): """A decoder which reads bytes and returns (string, length), where length is the length of bytes consumed """ text = "" for i, byte in enumerate(data): try: text += Bits(byte, length=8).ascii except OverflowError: print(i, byte) print(text + '|') print("Error") breakpoint() return text register_codec(encode, decode, "alphanumeric")