generated from mwc/lab_compression
Compare commits
2 Commits
be94b32e16
...
9ac30cfc22
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9ac30cfc22 | ||
|
|
119c7d10d5 |
@@ -1,10 +1,29 @@
|
|||||||
import string
|
import string
|
||||||
import codecs
|
import codecs
|
||||||
from custom_codecs.register import register_codec
|
#from text_codecs.register import register_codec
|
||||||
from easybits import Bits
|
from easybits import Bits
|
||||||
|
|
||||||
allowed_characters = string.ascii_letters + string.digits
|
allowed_characters = string.ascii_letters + string.digits
|
||||||
|
|
||||||
|
import codecs
|
||||||
|
|
||||||
|
def register_codec(encode, decode, name):
|
||||||
|
"""Registers a codec so that it can later be used to encode
|
||||||
|
or decode strings and bytes.
|
||||||
|
"""
|
||||||
|
def encode_wrapper(text):
|
||||||
|
return encode(text), len(text)
|
||||||
|
|
||||||
|
def decode_wrapper(data):
|
||||||
|
return decode(data), len(data)
|
||||||
|
|
||||||
|
def search_for_codec(query):
|
||||||
|
if query == name:
|
||||||
|
return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name)
|
||||||
|
|
||||||
|
codecs.register(search_for_codec)
|
||||||
|
|
||||||
|
|
||||||
def encode(text):
|
def encode(text):
|
||||||
"""A (very) lossy encoder which only saves ASCII letters, numbers, and spaces.
|
"""A (very) lossy encoder which only saves ASCII letters, numbers, and spaces.
|
||||||
Everything else is discarded. All whitespace (e.g. tabs) is converted into spaces.
|
Everything else is discarded. All whitespace (e.g. tabs) is converted into spaces.
|
||||||
|
|||||||
@@ -1,6 +1,25 @@
|
|||||||
from custom_codecs.register import register_codec
|
#from text_codecs.register import register_codec
|
||||||
from easybits import Bits
|
from easybits import Bits
|
||||||
|
|
||||||
|
import codecs
|
||||||
|
|
||||||
|
def register_codec(encode, decode, name):
|
||||||
|
"""Registers a codec so that it can later be used to encode
|
||||||
|
or decode strings and bytes.
|
||||||
|
"""
|
||||||
|
def encode_wrapper(text):
|
||||||
|
return encode(text), len(text)
|
||||||
|
|
||||||
|
def decode_wrapper(data):
|
||||||
|
return decode(data), len(data)
|
||||||
|
|
||||||
|
def search_for_codec(query):
|
||||||
|
if query == name:
|
||||||
|
return codecs.CodecInfo(encode_wrapper, decode_wrapper, name=name)
|
||||||
|
|
||||||
|
codecs.register(search_for_codec)
|
||||||
|
|
||||||
|
|
||||||
def encode(text):
|
def encode(text):
|
||||||
"""An encoder which only handles ASCII: non-ASCII characters
|
"""An encoder which only handles ASCII: non-ASCII characters
|
||||||
are replaced with '?'. Once all the characters are ASCII, this encoder
|
are replaced with '?'. Once all the characters are ASCII, this encoder
|
||||||
|
|||||||
1
text_codecs/hello.txt
Normal file
1
text_codecs/hello.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Hello!
|
||||||
18
text_codecs/mycodec.py
Normal file
18
text_codecs/mycodec.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
from register import register_codec
|
||||||
|
|
||||||
|
def encode(text):
|
||||||
|
text = text.lower()
|
||||||
|
result = ""
|
||||||
|
|
||||||
|
for char in text:
|
||||||
|
if char.isalpha() or char == " ":
|
||||||
|
if char not in "aeiou":
|
||||||
|
result += char
|
||||||
|
|
||||||
|
return result.encode("utf8")
|
||||||
|
|
||||||
|
def decode(data):
|
||||||
|
text = bytes(data).decode("utf8")
|
||||||
|
return text
|
||||||
|
|
||||||
|
register_codec(encode, decode, "mycodec")
|
||||||
22850
text_codecs/texts/little_women.txt
Normal file
22850
text_codecs/texts/little_women.txt
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user