generated from mwc/lab_compression
Submitted the changes I had previously made.
This commit is contained in:
47
text_codecs/23comp.py
Normal file
47
text_codecs/23comp.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
from easybits import Bits
|
||||||
|
import codecs
|
||||||
|
|
||||||
|
def register_codec(encode, decode, name):
|
||||||
|
"""Registers a codec so that it can later be used to encode
|
||||||
|
or decode strings and bytes.
|
||||||
|
"""
|
||||||
|
def encode_wrapper(text):
|
||||||
|
return encode(text), len(text)
|
||||||
|
def decode_wrapper(data):
|
||||||
|
return decode(data), len(data)
|
||||||
|
def search_for_codec(query):
|
||||||
|
if query == name:
|
||||||
|
return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name)
|
||||||
|
codecs.register(search_for_codec)
|
||||||
|
|
||||||
|
com_neg= {"the": -1, "and": -2,
|
||||||
|
"this": -3, "or": -4, "not": -5, "that": -6,
|
||||||
|
"to": -7, "from": -8, "in": -9, "at": -10, "of": -11,
|
||||||
|
"because": -12, "but": -13, "be": -14,
|
||||||
|
"now": -15, "then": -16, "while": -17}
|
||||||
|
|
||||||
|
def common_encode(commonword):
|
||||||
|
return com_neg.get(commonword)
|
||||||
|
|
||||||
|
def encode(text):
|
||||||
|
result = Bits()
|
||||||
|
for char in text:
|
||||||
|
if text in com_neg:
|
||||||
|
result= common_encode(text)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
b = Bits(char, encoding='ascii')
|
||||||
|
except UnicodeEncodeError:
|
||||||
|
b = Bits('?')
|
||||||
|
result = result.concat(b[1:])
|
||||||
|
return result.bytes
|
||||||
|
|
||||||
|
def decode(data):
|
||||||
|
bits = Bits(bytes(data))
|
||||||
|
text = ""
|
||||||
|
for i in range(-17, len(bits), 7):
|
||||||
|
byte = Bits('0').concat(bits[i:i+7])
|
||||||
|
text += Bits(byte).ascii
|
||||||
|
return text
|
||||||
|
|
||||||
|
register_codec(encode, decode, "ascii7")
|
||||||
@@ -1,10 +1,29 @@
|
|||||||
import string
|
import string
|
||||||
import codecs
|
import codecs
|
||||||
from custom_codecs.register import register_codec
|
#from text_codecs.register import register_codec
|
||||||
from easybits import Bits
|
from easybits import Bits
|
||||||
|
|
||||||
allowed_characters = string.ascii_letters + string.digits
|
allowed_characters = string.ascii_letters + string.digits
|
||||||
|
|
||||||
|
import codecs
|
||||||
|
|
||||||
|
def register_codec(encode, decode, name):
|
||||||
|
"""Registers a codec so that it can later be used to encode
|
||||||
|
or decode strings and bytes.
|
||||||
|
"""
|
||||||
|
def encode_wrapper(text):
|
||||||
|
return encode(text), len(text)
|
||||||
|
|
||||||
|
def decode_wrapper(data):
|
||||||
|
return decode(data), len(data)
|
||||||
|
|
||||||
|
def search_for_codec(query):
|
||||||
|
if query == name:
|
||||||
|
return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name)
|
||||||
|
|
||||||
|
codecs.register(search_for_codec)
|
||||||
|
|
||||||
|
|
||||||
def encode(text):
|
def encode(text):
|
||||||
"""A (very) lossy encoder which only saves ASCII letters, numbers, and spaces.
|
"""A (very) lossy encoder which only saves ASCII letters, numbers, and spaces.
|
||||||
Everything else is discarded. All whitespace (e.g. tabs) is converted into spaces.
|
Everything else is discarded. All whitespace (e.g. tabs) is converted into spaces.
|
||||||
|
|||||||
@@ -1,6 +1,25 @@
|
|||||||
from custom_codecs.register import register_codec
|
#from text_codecs.register import register_codec
|
||||||
from easybits import Bits
|
from easybits import Bits
|
||||||
|
|
||||||
|
import codecs
|
||||||
|
|
||||||
|
def register_codec(encode, decode, name):
|
||||||
|
"""Registers a codec so that it can later be used to encode
|
||||||
|
or decode strings and bytes.
|
||||||
|
"""
|
||||||
|
def encode_wrapper(text):
|
||||||
|
return encode(text), len(text)
|
||||||
|
|
||||||
|
def decode_wrapper(data):
|
||||||
|
return decode(data), len(data)
|
||||||
|
|
||||||
|
def search_for_codec(query):
|
||||||
|
if query == name:
|
||||||
|
return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name)
|
||||||
|
|
||||||
|
codecs.register(search_for_codec)
|
||||||
|
|
||||||
|
|
||||||
def encode(text):
|
def encode(text):
|
||||||
"""An encoder which only handles ASCII: non-ASCII characters
|
"""An encoder which only handles ASCII: non-ASCII characters
|
||||||
are replaced with '?'. Once all the characters are ASCII, this encoder
|
are replaced with '?'. Once all the characters are ASCII, this encoder
|
||||||
|
|||||||
Reference in New Issue
Block a user