text codecs ready
This commit is contained in:
39
text_codecs/alphanumeric.py
Normal file
39
text_codecs/alphanumeric.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import string
|
||||
import codecs
|
||||
from custom_codecs.register import register_codec
|
||||
from easybits import Bits
|
||||
|
||||
allowed_characters = string.ascii_letters + string.digits
|
||||
|
||||
def encode(text):
|
||||
"""A (very) lossy encoder which only saves ASCII letters, numbers, and spaces.
|
||||
Everything else is discarded. All whitespace (e.g. tabs) is converted into spaces.
|
||||
"""
|
||||
ascii_characters = []
|
||||
last_character_was_space = False
|
||||
for char in text:
|
||||
if char in allowed_characters:
|
||||
ascii_characters.append(char)
|
||||
last_character_was_space = False
|
||||
elif char in string.whitespace and not last_character_was_space:
|
||||
ascii_characters.append(' ')
|
||||
last_character_was_space = True
|
||||
ascii_text = ''.join(ascii_characters)
|
||||
return Bits(ascii_text).bytes
|
||||
|
||||
def decode(data):
|
||||
"""A decoder which reads bytes and returns (string, length),
|
||||
where length is the length of bytes consumed
|
||||
"""
|
||||
text = ""
|
||||
for i, byte in enumerate(data):
|
||||
try:
|
||||
text += Bits(byte, length=8).ascii
|
||||
except OverflowError:
|
||||
print(i, byte)
|
||||
print(text + '|')
|
||||
print("Error")
|
||||
breakpoint()
|
||||
return text
|
||||
|
||||
register_codec(encode, decode, "alphanumeric")
|
33
text_codecs/ascii7.py
Normal file
33
text_codecs/ascii7.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from custom_codecs.register import register_codec
|
||||
from easybits import Bits
|
||||
|
||||
def encode(text):
|
||||
"""An encoder which only handles ASCII: non-ASCII characters
|
||||
are replaced with '?'. Once all the characters are ASCII, this encoder
|
||||
writes the first seven bits of each byte (the first bit of every
|
||||
ASCII character is 0, so we can easily reconstruct the full byte
|
||||
when we decode). Therefore, this encoder compresses ASCII text into
|
||||
7/8 of the usual size.
|
||||
"""
|
||||
result = Bits()
|
||||
for char in text:
|
||||
try:
|
||||
b = Bits(char, encoding='ascii')
|
||||
except UnicodeEncodeError:
|
||||
b = Bits('?')
|
||||
result = result.concat(b[1:])
|
||||
return result.bytes
|
||||
|
||||
def decode(data):
|
||||
"""The matching decoder. Reads seven bits at a time, putting a 0 on
|
||||
the front to create a full byte. Then converts this byte into its
|
||||
ASCII representation.
|
||||
"""
|
||||
bits = Bits(bytes(data))
|
||||
text = ""
|
||||
for i in range(0, len(bits), 7):
|
||||
byte = Bits('0').concat(bits[i:i+7])
|
||||
text += Bits(byte).ascii
|
||||
return text
|
||||
|
||||
register_codec(encode, decode, "ascii7")
|
17
text_codecs/register.py
Normal file
17
text_codecs/register.py
Normal file
@@ -0,0 +1,17 @@
|
||||
import codecs
|
||||
|
||||
def register_codec(encode, decode, name):
|
||||
"""Registers a codec so that it can later be used to encode
|
||||
or decode strings and bytes.
|
||||
"""
|
||||
def encode_wrapper(text):
|
||||
return encode(text), len(text)
|
||||
|
||||
def decode_wrapper(data):
|
||||
return decode(data), len(data)
|
||||
|
||||
def search_for_codec(query):
|
||||
if query == name:
|
||||
return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name)
|
||||
|
||||
codecs.register(search_for_codec)
|
Reference in New Issue
Block a user