40 lines
1.2 KiB
Python
40 lines
1.2 KiB
Python
import string
|
|
import codecs
|
|
from custom_codecs.register import register_codec
|
|
from easybits import Bits
|
|
|
|
allowed_characters = string.ascii_letters + string.digits
|
|
|
|
def encode(text):
|
|
"""A (very) lossy encoder which only saves ASCII letters, numbers, and spaces.
|
|
Everything else is discarded. All whitespace (e.g. tabs) is converted into spaces.
|
|
"""
|
|
ascii_characters = []
|
|
last_character_was_space = False
|
|
for char in text:
|
|
if char in allowed_characters:
|
|
ascii_characters.append(char)
|
|
last_character_was_space = False
|
|
elif char in string.whitespace and not last_character_was_space:
|
|
ascii_characters.append(' ')
|
|
last_character_was_space = True
|
|
ascii_text = ''.join(ascii_characters)
|
|
return Bits(ascii_text).bytes
|
|
|
|
def decode(data):
|
|
"""A decoder which reads bytes and returns (string, length),
|
|
where length is the length of bytes consumed
|
|
"""
|
|
text = ""
|
|
for i, byte in enumerate(data):
|
|
try:
|
|
text += Bits(byte, length=8).ascii
|
|
except OverflowError:
|
|
print(i, byte)
|
|
print(text + '|')
|
|
print("Error")
|
|
breakpoint()
|
|
return text
|
|
|
|
register_codec(encode, decode, "alphanumeric")
|