implemented ecoding/decoding in commonchars.py

This commit is contained in:
owengavi2
2026-04-07 18:37:03 -04:00
parent 96add86f83
commit 999aae570f
6 changed files with 146 additions and 4 deletions

1
.envrc Normal file
View File

@@ -0,0 +1 @@
source .venv/bin/activate

1
hello.txt Normal file
View File

@@ -0,0 +1 @@
hello!

View File

@@ -1,6 +1,5 @@
import string
import codecs
from custom_codecs.register import register_codec
from easybits import Bits
allowed_characters = string.ascii_letters + string.digits
@@ -36,4 +35,20 @@ def decode(data):
breakpoint()
return text
def register_codec(encode, decode, name):
"""Registers a codec so that it can later be used to encode
or decode strings and bytes.
"""
def encode_wrapper(text):
return encode(text), len(text)
def decode_wrapper(data):
return decode(data), len(data)
def search_for_codec(query):
if query == name:
return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name)
codecs.register(search_for_codec)
register_codec(encode, decode, "alphanumeric")

View File

@@ -1,6 +1,5 @@
from custom_codecs.register import register_codec
from easybits import Bits
import codecs
def encode(text):
"""An encoder which only handles ASCII: non-ASCII characters
are replaced with '?'. Once all the characters are ASCII, this encoder
@@ -30,4 +29,21 @@ def decode(data):
text += Bits(byte).ascii
return text
def register_codec(encode, decode, name):
"""Registers a codec so that it can later be used to encode
or decode strings and bytes.
"""
def encode_wrapper(text):
return encode(text), len(text)
def decode_wrapper(data):
return decode(data), len(data)
def search_for_codec(query):
if query == name:
return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name)
codecs.register(search_for_codec)
register_codec(encode, decode, "ascii7")

View File

@@ -0,0 +1,93 @@
from easybits import Bits
import codecs
def encode(text):
"An encoder which only handles ASCII: non-ASCII characters are replaced with '?'. "
"this will remove the most common characters in english texts [e, t, a, o, i, n, s, h] and by writing a 1 and followed by 3 bits"
"other charcaters will be written with stadard 8 bit code"
"this ecoding has about a 77 percent compression rate"
"there is some loss in quality since unknown characters are replaced with ? and not decoded"
result = Bits()
commonchars = ['e', 't', 'a', 'o', 'i', 'n', 's', 'h']
for char in text:
try:
b = Bits(char, encoding='ascii')
except UnicodeEncodeError:
b = Bits('?', encoding='ascii')
char = '?'
if char in commonchars:
result = result.concat(Bits('1'))
i = commonchars.index(char)
if i == 0:
result = result.concat(Bits('000'))
elif i == 1:
result = result.concat(Bits("001"))
elif i == 2:
result = result.concat(Bits("010"))
elif i == 3:
result = result.concat(Bits("011"))
elif i == 4:
result = result.concat(Bits("100"))
elif i == 5:
result = result.concat(Bits("101"))
elif i == 6:
result = result.concat(Bits("110"))
elif i == 7:
result = result.concat(Bits("111"))
else:
result = result.concat(b)
return result.bytes
def decode(data):
"Matching decoder. Checks if first bit is 1 or 0. If it is 1, then it changes character out of encoded form, which is 3 bits long. "
"If not, it just converts it using standard ascii which is 8 bits."
bits = Bits(bytes(data))
text = ""
commonchars = ['e', 't', 'a', 'o', 'i', 'n', 's', 'h']
i = 0
while i < len(bits):
if bits[i] == 1:
byte = bits[i+1:i+4]
if byte == Bits('000'):
text += commonchars[0]
elif byte == Bits('001'):
text += commonchars[1]
elif byte == Bits('010'):
text += commonchars[2]
elif byte == Bits('011'):
text += commonchars[3]
elif byte == Bits('100'):
text += commonchars[4]
elif byte == Bits('101'):
text += commonchars[5]
elif byte == Bits('110'):
text += commonchars[6]
elif byte == Bits('111'):
text += commonchars[7]
i += 4
else:
b = bits[i:i+8]
text += b.ascii
i += 8
return text
def register_codec(encode, decode, name):
"""Registers a codec so that it can later be used to encode
or decode strings and bytes.
"""
def encode_wrapper(text):
return encode(text), len(text)
def decode_wrapper(data):
return decode(data), len(data)
def search_for_codec(query):
if query == name:
return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name)
codecs.register(search_for_codec)
register_codec(encode, decode, "commonchars")

View File

@@ -73,3 +73,19 @@ if args.encodings:
if args.inspect:
print(inspect_encoded_text(args.inspect, args.text))
def register_codec(encode, decode, name):
"""Registers a codec so that it can later be used to encode
or decode strings and bytes.
"""
def encode_wrapper(text):
return encode(text), len(text)
def decode_wrapper(data):
return decode(data), len(data)
def search_for_codec(query):
if query == name:
return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name)
codecs.register(search_for_codec)
codecs.register(search_for_codec)