lab_compression/text_codecs/My Codecs Draft

allowed_characters = string.ascii_letters + ' .,!?"'

import string
from custom_codecs.register import register_codec
from easybits import Bits

char_to_binary = {
    ' ': '0100000', '!': '0100001', '"': '0100010', '#': '0100011',
    '$': '0100100', '%': '0100101', '&': '0100110', "'": '0100111',
    '(': '0101000', ')': '0101001', '*': '0101010', '+': '0101011',
    ',': '0101100', '-': '0101101', '.': '0101110', '/': '0101111',
    '0': '0110000', '1': '0110001', '2': '0110010', '3': '0110011',
    '4': '0110100', '5': '0110101', '6': '0110110', '7': '0110111',
    '8': '0111000', '9': '0111001', ':': '0111010', ';': '0111011',
    '<': '0111100', '=': '0111101', '>': '0111110', '?': '0111111',
    '@': '1000000', 'A': '1000001', 'B': '1000010', 'C': '1000011',
    'D': '1000100', 'E': '1000101', 'F': '1000110', 'G': '1000111',
    'H': '1001000', 'I': '1001001', 'J': '1001010', 'K': '1001011',
    'L': '1001100', 'M': '1001101', 'N': '1001110', 'O': '1001111',
    'P': '1010000', 'Q': '1010001', 'R': '1010010', 'S': '1010011',
    'T': '1010100', 'U': '1010101', 'V': '1010110', 'W': '1010111',
    'X': '1011000', 'Y': '1011001', 'Z': '1011010', '[': '1011011',
    '\\': '1011100', ']': '1011101', '^': '1011110', '_': '1011111',
    '`': '1100000', 'a': '1100001', 'b': '1100010', 'c': '1100011',
    'd': '1100100', 'e': '1100101', 'f': '1100110', 'g': '1100111',
    'h': '1101000', 'i': '1101001', 'j': '1101010', 'k': '1101011',
    'l': '1101100', 'm': '1101101', 'n': '1101110', 'o': '1101111',
    'p': '1110000', 'q': '1110001', 'r': '1110010', 's': '1110011',
    't': '1110100', 'u': '1110101', 'v': '1110110', 'w': '1110111',
    'x': '1111000', 'y': '1111001', 'z': '1111010'
}

the_shortcut = '110111011000110'

def encode(text):
    text = text.lower().replace('the', the_shortcut)

    binary_output = ''
    for char in text:
        if char in char_to_binary:
            binary_output += char_to_binary[char]

    return Bits(binary_output).bytes

def decode(data):
    binary_string = Bits(data).ascii
    text = ''
    i = 0
    while i < len(binary_string):
        char_binary = binary_string[i:i+7]
        if char_binary == the_shortcut:
            text += 'the'
            i += len(the_shortcut)
        elif char_binary in char_to_binary.values():
            char = list(char_to_binary.keys())[list(char_to_binary.values()).index(char_binary)]
            text += char
            i += 7
        else:
            i += 7

    return text

register_codec(encode, decode, "the_binary_codec")