I created a text codec ascii5 similar to ascii7

but with differnt characters included. This reads/encodes
with 5 bits instead of the original 7.
This commit is contained in:
ambreenn
2026-04-15 10:24:41 -04:00
parent fdca338c67
commit 9d2b381fea
7 changed files with 86 additions and 2 deletions

1
.envrc Normal file
View File

@@ -0,0 +1 @@
source .venv/bin/activate

20
text_codecs/alphanumeric.py Normal file → Executable file
View File

@@ -1,10 +1,28 @@
import string import string
import codecs import codecs
from custom_codecs.register import register_codec #from text_codecs.register import register_codec
from easybits import Bits from easybits import Bits
allowed_characters = string.ascii_letters + string.digits allowed_characters = string.ascii_letters + string.digits
import codecs
def register_codec(encode, decode, name):
"""Registers a codec so that it can later be used to encode
or decode strings and bytes.
"""
def encode_wrapper(text):
return encode(text), len(text)
def decode_wrapper(data):
return decode(data), len(data)
def search_for_codec(query):
if query == name:
return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name)
codecs.register(search_for_codec)
def encode(text): def encode(text):
"""A (very) lossy encoder which only saves ASCII letters, numbers, and spaces. """A (very) lossy encoder which only saves ASCII letters, numbers, and spaces.
Everything else is discarded. All whitespace (e.g. tabs) is converted into spaces. Everything else is discarded. All whitespace (e.g. tabs) is converted into spaces.

46
text_codecs/ascii5.py Executable file
View File

@@ -0,0 +1,46 @@
from easybits import Bits
import codecs
def register_codec(encode, decode, name):
def encode_wrapper(text):
return encode(text), len(text)
def decode_wrapper(data):
return decode(data), len(data)
def search_for_codec(query):
if query == name:
return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name)
codecs.register(search_for_codec)
characters=['A','B','C','D','E','F','G','H','I','J','K','L','M', 'N', 'O','P','Q','R','S','T','U','V','W','X','Y','Z',' ','.',',','?','!']
def encode(text):
"""This encoder consists of the 26 uppercase letters and select punctuation.
Characters that are not defined are removed, which makes the encoding less accurate than ascii7.
There are 5 bits for each byte, so the function ascii5 compresses text into 5/8 of its original size."""
result= Bits()
text = text.upper()
for character in text:
if character in characters:
index = characters.index(character)
b=Bits(bin(index)[2:].zfill(5))
result = result.concat(b)
return result.bytes
def decode(data):
"""This function reads 5 bits and returns the text to match."""
bits=Bits(bytes(data))
text = ''
for i in range(0,len(bits),5):
s=bits[i:i+5]
index=int(s.bin,2)
text+= characters[index]
return text
register_codec(encode, decode, "ascii5")

20
text_codecs/ascii7.py Normal file → Executable file
View File

@@ -1,6 +1,24 @@
from custom_codecs.register import register_codec #from text_codecs.register import register_codec
from easybits import Bits from easybits import Bits
import codecs
def register_codec(encode, decode, name):
"""Registers a codec so that it can later be used to encode
or decode strings and bytes.
"""
def encode_wrapper(text):
return encode(text), len(text)
def decode_wrapper(data):
return decode(data), len(data)
def search_for_codec(query):
if query == name:
return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name)
codecs.register(search_for_codec)
def encode(text): def encode(text):
"""An encoder which only handles ASCII: non-ASCII characters """An encoder which only handles ASCII: non-ASCII characters
are replaced with '?'. Once all the characters are ASCII, this encoder are replaced with '?'. Once all the characters are ASCII, this encoder

0
text_codecs/evaluate.py Normal file → Executable file
View File

1
text_codecs/hello.txt Normal file
View File

@@ -0,0 +1 @@
hello!

0
text_codecs/register.py Normal file → Executable file
View File