generated from mwc/lab_compression
creating a text codec. I went simple, since this was a really complicated process for me. I starte by defining the characters I wanted in the text and listed them based on the set provided. Then after those were input, I defined "the" as a shortcut, since it is a common word in the language. I also added a few more common words to the list for shortcuts. I think that this, in the classroom setting, would be really interesting. Students could potentially create an entire language. Starting with physical words, the using encoding and compression to create lanuages like emojis or shapes or numbers. Students could potentially create cyphers to decode and analyze in groups. Overall, this is making sense, but is definately a challenging concept. Looking forward to continue working on it!
64 lines
2.5 KiB
Plaintext
64 lines
2.5 KiB
Plaintext
allowed_characters = string.ascii_letters + ' .,!?"'
|
|
|
|
import string
|
|
from custom_codecs.register import register_codec
|
|
from easybits import Bits
|
|
|
|
char_to_binary = {
|
|
' ': '0100000', '!': '0100001', '"': '0100010', '#': '0100011',
|
|
'$': '0100100', '%': '0100101', '&': '0100110', "'": '0100111',
|
|
'(': '0101000', ')': '0101001', '*': '0101010', '+': '0101011',
|
|
',': '0101100', '-': '0101101', '.': '0101110', '/': '0101111',
|
|
'0': '0110000', '1': '0110001', '2': '0110010', '3': '0110011',
|
|
'4': '0110100', '5': '0110101', '6': '0110110', '7': '0110111',
|
|
'8': '0111000', '9': '0111001', ':': '0111010', ';': '0111011',
|
|
'<': '0111100', '=': '0111101', '>': '0111110', '?': '0111111',
|
|
'@': '1000000', 'A': '1000001', 'B': '1000010', 'C': '1000011',
|
|
'D': '1000100', 'E': '1000101', 'F': '1000110', 'G': '1000111',
|
|
'H': '1001000', 'I': '1001001', 'J': '1001010', 'K': '1001011',
|
|
'L': '1001100', 'M': '1001101', 'N': '1001110', 'O': '1001111',
|
|
'P': '1010000', 'Q': '1010001', 'R': '1010010', 'S': '1010011',
|
|
'T': '1010100', 'U': '1010101', 'V': '1010110', 'W': '1010111',
|
|
'X': '1011000', 'Y': '1011001', 'Z': '1011010', '[': '1011011',
|
|
'\\': '1011100', ']': '1011101', '^': '1011110', '_': '1011111',
|
|
'`': '1100000', 'a': '1100001', 'b': '1100010', 'c': '1100011',
|
|
'd': '1100100', 'e': '1100101', 'f': '1100110', 'g': '1100111',
|
|
'h': '1101000', 'i': '1101001', 'j': '1101010', 'k': '1101011',
|
|
'l': '1101100', 'm': '1101101', 'n': '1101110', 'o': '1101111',
|
|
'p': '1110000', 'q': '1110001', 'r': '1110010', 's': '1110011',
|
|
't': '1110100', 'u': '1110101', 'v': '1110110', 'w': '1110111',
|
|
'x': '1111000', 'y': '1111001', 'z': '1111010'
|
|
}
|
|
|
|
the_shortcut = '110111011000110'
|
|
|
|
def encode(text):
|
|
text = text.lower().replace('the', the_shortcut)
|
|
|
|
binary_output = ''
|
|
for char in text:
|
|
if char in char_to_binary:
|
|
binary_output += char_to_binary[char]
|
|
|
|
return Bits(binary_output).bytes
|
|
|
|
def decode(data):
|
|
binary_string = Bits(data).ascii
|
|
text = ''
|
|
i = 0
|
|
while i < len(binary_string):
|
|
char_binary = binary_string[i:i+7]
|
|
if char_binary == the_shortcut:
|
|
text += 'the'
|
|
i += len(the_shortcut)
|
|
elif char_binary in char_to_binary.values():
|
|
char = list(char_to_binary.keys())[list(char_to_binary.values()).index(char_binary)]
|
|
text += char
|
|
i += 7
|
|
else:
|
|
i += 7
|
|
|
|
return text
|
|
|
|
register_codec(encode, decode, "the_binary_codec")
|