generated from mwc/lab_compression
Started working on custom codec noVow7
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
import string
|
||||
import codecs
|
||||
from custom_codecs.register import register_codec
|
||||
from text_codecs.register import register_codec
|
||||
from easybits import Bits
|
||||
|
||||
allowed_characters = string.ascii_letters + string.digits
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from custom_codecs.register import register_codec
|
||||
from text_codecs.register import register_codec
|
||||
from easybits import Bits
|
||||
|
||||
def encode(text):
|
||||
|
||||
41
text_codecs/noVow7.py
Normal file
41
text_codecs/noVow7.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import string
|
||||
import codecs
|
||||
from text_codecs.register import register_codec
|
||||
from easybits import Bits
|
||||
|
||||
vowels = 'aeiouyAEIOUY'
|
||||
allowed_chars = string.ascii_letters + string.digits
|
||||
|
||||
def encode(text):
|
||||
|
||||
ascii_chars = []
|
||||
last_character_was_space = False
|
||||
for char in text:
|
||||
if char in allowed_chars:
|
||||
ascii_chars.append(char)
|
||||
last_character_was_space = False
|
||||
elif char in string.whitespace and not last_character_was_space:
|
||||
ascii_chars.append(' ')
|
||||
last_character_was_space = True
|
||||
ascii_text = ''.join(ascii_chars)
|
||||
|
||||
no_vowels = []
|
||||
for ch in ascii_text:
|
||||
if ch not in vowels:
|
||||
no_vowels.append(ch)
|
||||
|
||||
return Bits(no_vowels, encoding = 'ascii7')
|
||||
|
||||
|
||||
def decode(data):
|
||||
bits = Bits(bytes(data))
|
||||
text = ""
|
||||
for i in range(0, len(bits), 7):
|
||||
byte = Bits('0').concat(bits[i:i+7])
|
||||
text += Bits(byte).ascii
|
||||
return text
|
||||
|
||||
|
||||
register_codec(encode, decode, "noVow7")
|
||||
|
||||
|
||||
22846
text_codecs/texts/little_women.txt
Normal file
22846
text_codecs/texts/little_women.txt
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user