diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..4a96c22 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +source .venv/bin/activate \ No newline at end of file diff --git a/hello.txt b/hello.txt new file mode 100644 index 0000000..05a682b --- /dev/null +++ b/hello.txt @@ -0,0 +1 @@ +Hello! \ No newline at end of file diff --git a/text_codecs/23comp.py b/text_codecs/23comp.py new file mode 100644 index 0000000..47a5048 --- /dev/null +++ b/text_codecs/23comp.py @@ -0,0 +1,47 @@ +from easybits import Bits +import codecs + +def register_codec(encode, decode, name): + """Registers a codec so that it can later be used to encode + or decode strings and bytes. + """ + def encode_wrapper(text): + return encode(text), len(text) + def decode_wrapper(data): + return decode(data), len(data) + def search_for_codec(query): + if query == name: + return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name) + codecs.register(search_for_codec) + +com_neg= {"the": -1, "and": -2, + "this": -3, "or": -4, "not": -5, "that": -6, + "to": -7, "from": -8, "in": -9, "at": -10, "of": -11, + "because": -12, "but": -13, "be": -14, + "now": -15, "then": -16, "while": -17} + +def common_encode(commonword): + return com_neg.get(commonword) + +def encode(text): + result = Bits() + for char in text: + if text in com_neg: + result= common_encode(text) + else: + try: + b = Bits(char, encoding='ascii') + except UnicodeEncodeError: + b = Bits('?') + result = result.concat(b[1:]) + return result.bytes + +def decode(data): + bits = Bits(bytes(data)) + text = "" + for i in range(-17, len(bits), 7): + byte = Bits('0').concat(bits[i:i+7]) + text += Bits(byte).ascii + return text + +register_codec(encode, decode, "ascii7") \ No newline at end of file diff --git a/text_codecs/alphanumeric.py b/text_codecs/alphanumeric.py index e8cc2ea..c6dc997 100644 --- a/text_codecs/alphanumeric.py +++ b/text_codecs/alphanumeric.py @@ -1,10 +1,29 @@ import string import codecs -from custom_codecs.register import register_codec +#from text_codecs.register import register_codec from easybits import Bits allowed_characters = string.ascii_letters + string.digits +import codecs + +def register_codec(encode, decode, name): + """Registers a codec so that it can later be used to encode + or decode strings and bytes. + """ + def encode_wrapper(text): + return encode(text), len(text) + + def decode_wrapper(data): + return decode(data), len(data) + + def search_for_codec(query): + if query == name: + return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name) + + codecs.register(search_for_codec) + + def encode(text): """A (very) lossy encoder which only saves ASCII letters, numbers, and spaces. Everything else is discarded. All whitespace (e.g. tabs) is converted into spaces. diff --git a/text_codecs/ascii7.py b/text_codecs/ascii7.py index fb24cd0..3df2366 100644 --- a/text_codecs/ascii7.py +++ b/text_codecs/ascii7.py @@ -1,6 +1,25 @@ -from custom_codecs.register import register_codec +#from text_codecs.register import register_codec from easybits import Bits +import codecs + +def register_codec(encode, decode, name): + """Registers a codec so that it can later be used to encode + or decode strings and bytes. + """ + def encode_wrapper(text): + return encode(text), len(text) + + def decode_wrapper(data): + return decode(data), len(data) + + def search_for_codec(query): + if query == name: + return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name) + + codecs.register(search_for_codec) + + def encode(text): """An encoder which only handles ASCII: non-ASCII characters are replaced with '?'. Once all the characters are ASCII, this encoder @@ -30,4 +49,4 @@ def decode(data): text += Bits(byte).ascii return text -register_codec(encode, decode, "ascii7") +register_codec(encode, decode, "ascii7") \ No newline at end of file