lab_compression/text_codecs/My Codecs Draft

64 lines
2.5 KiB
Plaintext

allowed_characters = string.ascii_letters + ' .,!?"'
import string
from custom_codecs.register import register_codec
from easybits import Bits
char_to_binary = {
' ': '0100000', '!': '0100001', '"': '0100010', '#': '0100011',
'$': '0100100', '%': '0100101', '&': '0100110', "'": '0100111',
'(': '0101000', ')': '0101001', '*': '0101010', '+': '0101011',
',': '0101100', '-': '0101101', '.': '0101110', '/': '0101111',
'0': '0110000', '1': '0110001', '2': '0110010', '3': '0110011',
'4': '0110100', '5': '0110101', '6': '0110110', '7': '0110111',
'8': '0111000', '9': '0111001', ':': '0111010', ';': '0111011',
'<': '0111100', '=': '0111101', '>': '0111110', '?': '0111111',
'@': '1000000', 'A': '1000001', 'B': '1000010', 'C': '1000011',
'D': '1000100', 'E': '1000101', 'F': '1000110', 'G': '1000111',
'H': '1001000', 'I': '1001001', 'J': '1001010', 'K': '1001011',
'L': '1001100', 'M': '1001101', 'N': '1001110', 'O': '1001111',
'P': '1010000', 'Q': '1010001', 'R': '1010010', 'S': '1010011',
'T': '1010100', 'U': '1010101', 'V': '1010110', 'W': '1010111',
'X': '1011000', 'Y': '1011001', 'Z': '1011010', '[': '1011011',
'\\': '1011100', ']': '1011101', '^': '1011110', '_': '1011111',
'`': '1100000', 'a': '1100001', 'b': '1100010', 'c': '1100011',
'd': '1100100', 'e': '1100101', 'f': '1100110', 'g': '1100111',
'h': '1101000', 'i': '1101001', 'j': '1101010', 'k': '1101011',
'l': '1101100', 'm': '1101101', 'n': '1101110', 'o': '1101111',
'p': '1110000', 'q': '1110001', 'r': '1110010', 's': '1110011',
't': '1110100', 'u': '1110101', 'v': '1110110', 'w': '1110111',
'x': '1111000', 'y': '1111001', 'z': '1111010'
}
the_shortcut = '110111011000110'
def encode(text):
text = text.lower().replace('the', the_shortcut)
binary_output = ''
for char in text:
if char in char_to_binary:
binary_output += char_to_binary[char]
return Bits(binary_output).bytes
def decode(data):
binary_string = Bits(data).ascii
text = ''
i = 0
while i < len(binary_string):
char_binary = binary_string[i:i+7]
if char_binary == the_shortcut:
text += 'the'
i += len(the_shortcut)
elif char_binary in char_to_binary.values():
char = list(char_to_binary.keys())[list(char_to_binary.values()).index(char_binary)]
text += char
i += 7
else:
i += 7
return text
register_codec(encode, decode, "the_binary_codec")