generated from mwc/lab_compression
I created a text codec ascii5 similar to ascii7
but with differnt characters included. This reads/encodes with 5 bits instead of the original 7.
This commit is contained in:
20
text_codecs/alphanumeric.py
Normal file → Executable file
20
text_codecs/alphanumeric.py
Normal file → Executable file
@@ -1,10 +1,28 @@
|
|||||||
import string
|
import string
|
||||||
import codecs
|
import codecs
|
||||||
from custom_codecs.register import register_codec
|
#from text_codecs.register import register_codec
|
||||||
from easybits import Bits
|
from easybits import Bits
|
||||||
|
|
||||||
allowed_characters = string.ascii_letters + string.digits
|
allowed_characters = string.ascii_letters + string.digits
|
||||||
|
|
||||||
|
import codecs
|
||||||
|
|
||||||
|
def register_codec(encode, decode, name):
|
||||||
|
"""Registers a codec so that it can later be used to encode
|
||||||
|
or decode strings and bytes.
|
||||||
|
"""
|
||||||
|
def encode_wrapper(text):
|
||||||
|
return encode(text), len(text)
|
||||||
|
|
||||||
|
def decode_wrapper(data):
|
||||||
|
return decode(data), len(data)
|
||||||
|
|
||||||
|
def search_for_codec(query):
|
||||||
|
if query == name:
|
||||||
|
return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name)
|
||||||
|
|
||||||
|
codecs.register(search_for_codec)
|
||||||
|
|
||||||
def encode(text):
|
def encode(text):
|
||||||
"""A (very) lossy encoder which only saves ASCII letters, numbers, and spaces.
|
"""A (very) lossy encoder which only saves ASCII letters, numbers, and spaces.
|
||||||
Everything else is discarded. All whitespace (e.g. tabs) is converted into spaces.
|
Everything else is discarded. All whitespace (e.g. tabs) is converted into spaces.
|
||||||
|
|||||||
46
text_codecs/ascii5.py
Executable file
46
text_codecs/ascii5.py
Executable file
@@ -0,0 +1,46 @@
|
|||||||
|
from easybits import Bits
|
||||||
|
|
||||||
|
import codecs
|
||||||
|
|
||||||
|
def register_codec(encode, decode, name):
|
||||||
|
def encode_wrapper(text):
|
||||||
|
return encode(text), len(text)
|
||||||
|
|
||||||
|
def decode_wrapper(data):
|
||||||
|
return decode(data), len(data)
|
||||||
|
|
||||||
|
def search_for_codec(query):
|
||||||
|
if query == name:
|
||||||
|
return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name)
|
||||||
|
|
||||||
|
codecs.register(search_for_codec)
|
||||||
|
|
||||||
|
characters=['A','B','C','D','E','F','G','H','I','J','K','L','M', 'N', 'O','P','Q','R','S','T','U','V','W','X','Y','Z',' ','.',',','?','!']
|
||||||
|
|
||||||
|
|
||||||
|
def encode(text):
|
||||||
|
"""This encoder consists of the 26 uppercase letters and select punctuation.
|
||||||
|
Characters that are not defined are removed, which makes the encoding less accurate than ascii7.
|
||||||
|
There are 5 bits for each byte, so the function ascii5 compresses text into 5/8 of its original size."""
|
||||||
|
result= Bits()
|
||||||
|
text = text.upper()
|
||||||
|
for character in text:
|
||||||
|
if character in characters:
|
||||||
|
index = characters.index(character)
|
||||||
|
b=Bits(bin(index)[2:].zfill(5))
|
||||||
|
result = result.concat(b)
|
||||||
|
return result.bytes
|
||||||
|
|
||||||
|
def decode(data):
|
||||||
|
"""This function reads 5 bits and returns the text to match."""
|
||||||
|
bits=Bits(bytes(data))
|
||||||
|
text = ''
|
||||||
|
for i in range(0,len(bits),5):
|
||||||
|
s=bits[i:i+5]
|
||||||
|
index=int(s.bin,2)
|
||||||
|
text+= characters[index]
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
register_codec(encode, decode, "ascii5")
|
||||||
|
|
||||||
20
text_codecs/ascii7.py
Normal file → Executable file
20
text_codecs/ascii7.py
Normal file → Executable file
@@ -1,6 +1,24 @@
|
|||||||
from custom_codecs.register import register_codec
|
#from text_codecs.register import register_codec
|
||||||
from easybits import Bits
|
from easybits import Bits
|
||||||
|
|
||||||
|
import codecs
|
||||||
|
|
||||||
|
def register_codec(encode, decode, name):
|
||||||
|
"""Registers a codec so that it can later be used to encode
|
||||||
|
or decode strings and bytes.
|
||||||
|
"""
|
||||||
|
def encode_wrapper(text):
|
||||||
|
return encode(text), len(text)
|
||||||
|
|
||||||
|
def decode_wrapper(data):
|
||||||
|
return decode(data), len(data)
|
||||||
|
|
||||||
|
def search_for_codec(query):
|
||||||
|
if query == name:
|
||||||
|
return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name)
|
||||||
|
|
||||||
|
codecs.register(search_for_codec)
|
||||||
|
|
||||||
def encode(text):
|
def encode(text):
|
||||||
"""An encoder which only handles ASCII: non-ASCII characters
|
"""An encoder which only handles ASCII: non-ASCII characters
|
||||||
are replaced with '?'. Once all the characters are ASCII, this encoder
|
are replaced with '?'. Once all the characters are ASCII, this encoder
|
||||||
|
|||||||
0
text_codecs/evaluate.py
Normal file → Executable file
0
text_codecs/evaluate.py
Normal file → Executable file
1
text_codecs/hello.txt
Normal file
1
text_codecs/hello.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
hello!
|
||||||
0
text_codecs/register.py
Normal file → Executable file
0
text_codecs/register.py
Normal file → Executable file
Reference in New Issue
Block a user