generated from mwc/lab_compression
	
		
			
				
	
	
		
			34 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			34 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from custom_codecs.register import register_codec
 | 
						|
from easybits import Bits
 | 
						|
 | 
						|
def encode(text):
 | 
						|
    """An encoder which only handles ASCII: non-ASCII characters
 | 
						|
    are replaced with '?'. Once all the characters are ASCII, this encoder
 | 
						|
    writes the first seven bits of each byte (the first bit of every 
 | 
						|
    ASCII character is 0, so we can easily reconstruct the full byte 
 | 
						|
    when we decode). Therefore, this encoder compresses ASCII text into
 | 
						|
    7/8 of the usual size.
 | 
						|
    """
 | 
						|
    result = Bits()
 | 
						|
    for char in text:
 | 
						|
        try:
 | 
						|
            b = Bits(char, encoding='ascii')
 | 
						|
        except UnicodeEncodeError:
 | 
						|
            b = Bits('?')
 | 
						|
        result = result.concat(b[1:])
 | 
						|
    return result.bytes
 | 
						|
 | 
						|
def decode(data):
 | 
						|
    """The matching decoder. Reads seven bits at a time, putting a 0 on 
 | 
						|
    the front to create a full byte. Then converts this byte into its 
 | 
						|
    ASCII representation.
 | 
						|
    """
 | 
						|
    bits = Bits(bytes(data))
 | 
						|
    text = ""
 | 
						|
    for i in range(0, len(bits), 7):
 | 
						|
        byte = Bits('0').concat(bits[i:i+7])
 | 
						|
        text += Bits(byte).ascii
 | 
						|
    return text
 | 
						|
 | 
						|
register_codec(encode, decode, "ascii7")
 |