Files
lab_compression/text_codecs/alphanumeric.py
2026-03-24 22:43:22 -04:00

55 lines
1.6 KiB
Python

import string
import codecs
from easybits import Bits
allowed_characters = string.ascii_letters + string.digits
def encode(text):
"""A (very) lossy encoder which only saves ASCII letters, numbers, and spaces.
Everything else is discarded. All whitespace (e.g. tabs) is converted into spaces.
"""
ascii_characters = []
last_character_was_space = False
for char in text:
if char in allowed_characters:
ascii_characters.append(char)
last_character_was_space = False
elif char in string.whitespace and not last_character_was_space:
ascii_characters.append(' ')
last_character_was_space = True
ascii_text = ''.join(ascii_characters)
return Bits(ascii_text).bytes
def decode(data):
"""A decoder which reads bytes and returns (string, length),
where length is the length of bytes consumed
"""
text = ""
for i, byte in enumerate(data):
try:
text += Bits(byte, length=8).ascii
except OverflowError:
print(i, byte)
print(text + '|')
print("Error")
breakpoint()
return text
def register_codec(encode, decode, name):
"""Registers a codec so that it can later be used to encode
or decode strings and bytes.
"""
def encode_wrapper(text):
return encode(text), len(text)
def decode_wrapper(data):
return decode(data), len(data)
def search_for_codec(query):
if query == name:
return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name)
codecs.register(search_for_codec)
register_codec(encode, decode, "alphanumeric")