generated from mwc/lab_compression
	
		
			
				
	
	
		
			40 lines
		
	
	
		
			1.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			40 lines
		
	
	
		
			1.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import string
 | 
						|
import codecs
 | 
						|
from custom_codecs.register import register_codec
 | 
						|
from easybits import Bits
 | 
						|
 | 
						|
allowed_characters = string.ascii_letters + string.digits
 | 
						|
 | 
						|
def encode(text):
 | 
						|
    """A (very) lossy encoder which only saves ASCII letters, numbers, and spaces.
 | 
						|
    Everything else is discarded. All whitespace (e.g. tabs) is converted into spaces.
 | 
						|
    """
 | 
						|
    ascii_characters = []
 | 
						|
    last_character_was_space = False
 | 
						|
    for char in text:
 | 
						|
        if char in allowed_characters:
 | 
						|
            ascii_characters.append(char)
 | 
						|
            last_character_was_space = False
 | 
						|
        elif char in string.whitespace and not last_character_was_space:
 | 
						|
            ascii_characters.append(' ')
 | 
						|
            last_character_was_space = True
 | 
						|
    ascii_text = ''.join(ascii_characters)
 | 
						|
    return Bits(ascii_text).bytes
 | 
						|
 | 
						|
def decode(data):
 | 
						|
    """A decoder which reads bytes and returns (string, length), 
 | 
						|
    where length is the length of bytes consumed
 | 
						|
    """
 | 
						|
    text = ""
 | 
						|
    for i, byte in enumerate(data):
 | 
						|
        try:
 | 
						|
            text += Bits(byte, length=8).ascii
 | 
						|
        except OverflowError:
 | 
						|
            print(i, byte)
 | 
						|
            print(text + '|')
 | 
						|
            print("Error")
 | 
						|
            breakpoint()
 | 
						|
    return text
 | 
						|
    
 | 
						|
register_codec(encode, decode, "alphanumeric")
 |