From 9d2b381fea5819ae2277f8ec52c7a4423472788a Mon Sep 17 00:00:00 2001 From: ambreenn Date: Wed, 15 Apr 2026 10:24:41 -0400 Subject: [PATCH] I created a text codec ascii5 similar to ascii7 but with differnt characters included. This reads/encodes with 5 bits instead of the original 7. --- .envrc | 1 + text_codecs/alphanumeric.py | 20 +++++++++++++++- text_codecs/ascii5.py | 46 +++++++++++++++++++++++++++++++++++++ text_codecs/ascii7.py | 20 +++++++++++++++- text_codecs/evaluate.py | 0 text_codecs/hello.txt | 1 + text_codecs/register.py | 0 7 files changed, 86 insertions(+), 2 deletions(-) create mode 100644 .envrc mode change 100644 => 100755 text_codecs/alphanumeric.py create mode 100755 text_codecs/ascii5.py mode change 100644 => 100755 text_codecs/ascii7.py mode change 100644 => 100755 text_codecs/evaluate.py create mode 100644 text_codecs/hello.txt mode change 100644 => 100755 text_codecs/register.py diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..4a96c22 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +source .venv/bin/activate \ No newline at end of file diff --git a/text_codecs/alphanumeric.py b/text_codecs/alphanumeric.py old mode 100644 new mode 100755 index e8cc2ea..b49c5cf --- a/text_codecs/alphanumeric.py +++ b/text_codecs/alphanumeric.py @@ -1,10 +1,28 @@ import string import codecs -from custom_codecs.register import register_codec +#from text_codecs.register import register_codec from easybits import Bits allowed_characters = string.ascii_letters + string.digits +import codecs + +def register_codec(encode, decode, name): + """Registers a codec so that it can later be used to encode + or decode strings and bytes. + """ + def encode_wrapper(text): + return encode(text), len(text) + + def decode_wrapper(data): + return decode(data), len(data) + + def search_for_codec(query): + if query == name: + return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name) + + codecs.register(search_for_codec) + def encode(text): """A (very) lossy encoder which only saves ASCII letters, numbers, and spaces. Everything else is discarded. All whitespace (e.g. tabs) is converted into spaces. diff --git a/text_codecs/ascii5.py b/text_codecs/ascii5.py new file mode 100755 index 0000000..8c61a2e --- /dev/null +++ b/text_codecs/ascii5.py @@ -0,0 +1,46 @@ +from easybits import Bits + +import codecs + +def register_codec(encode, decode, name): + def encode_wrapper(text): + return encode(text), len(text) + + def decode_wrapper(data): + return decode(data), len(data) + + def search_for_codec(query): + if query == name: + return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name) + + codecs.register(search_for_codec) + +characters=['A','B','C','D','E','F','G','H','I','J','K','L','M', 'N', 'O','P','Q','R','S','T','U','V','W','X','Y','Z',' ','.',',','?','!'] + + +def encode(text): + """This encoder consists of the 26 uppercase letters and select punctuation. + Characters that are not defined are removed, which makes the encoding less accurate than ascii7. + There are 5 bits for each byte, so the function ascii5 compresses text into 5/8 of its original size.""" + result= Bits() + text = text.upper() + for character in text: + if character in characters: + index = characters.index(character) + b=Bits(bin(index)[2:].zfill(5)) + result = result.concat(b) + return result.bytes + +def decode(data): + """This function reads 5 bits and returns the text to match.""" + bits=Bits(bytes(data)) + text = '' + for i in range(0,len(bits),5): + s=bits[i:i+5] + index=int(s.bin,2) + text+= characters[index] + return text + + +register_codec(encode, decode, "ascii5") + diff --git a/text_codecs/ascii7.py b/text_codecs/ascii7.py old mode 100644 new mode 100755 index fb24cd0..fb47a5e --- a/text_codecs/ascii7.py +++ b/text_codecs/ascii7.py @@ -1,6 +1,24 @@ -from custom_codecs.register import register_codec +#from text_codecs.register import register_codec from easybits import Bits +import codecs + +def register_codec(encode, decode, name): + """Registers a codec so that it can later be used to encode + or decode strings and bytes. + """ + def encode_wrapper(text): + return encode(text), len(text) + + def decode_wrapper(data): + return decode(data), len(data) + + def search_for_codec(query): + if query == name: + return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name) + + codecs.register(search_for_codec) + def encode(text): """An encoder which only handles ASCII: non-ASCII characters are replaced with '?'. Once all the characters are ASCII, this encoder diff --git a/text_codecs/evaluate.py b/text_codecs/evaluate.py old mode 100644 new mode 100755 diff --git a/text_codecs/hello.txt b/text_codecs/hello.txt new file mode 100644 index 0000000..3462721 --- /dev/null +++ b/text_codecs/hello.txt @@ -0,0 +1 @@ +hello! \ No newline at end of file diff --git a/text_codecs/register.py b/text_codecs/register.py old mode 100644 new mode 100755