|
| 1 | +import sys |
| 2 | + |
| 3 | +class Letter: |
| 4 | + def __init__(self, letter, freq): |
| 5 | + self.letter = letter |
| 6 | + self.freq = freq |
| 7 | + self.bitstring = "" |
| 8 | + |
| 9 | + def __repr__(self): |
| 10 | + return f'{self.letter}:{self.freq}' |
| 11 | + |
| 12 | + |
| 13 | +class TreeNode: |
| 14 | + def __init__(self, freq, left, right): |
| 15 | + self.freq = freq |
| 16 | + self.left = left |
| 17 | + self.right = right |
| 18 | + |
| 19 | + |
| 20 | +def parse_file(file_path): |
| 21 | + """ |
| 22 | + Read the file and build a dict of all letters and their |
| 23 | + frequences, then convert the dict into a list of Letters. |
| 24 | + """ |
| 25 | + chars = {} |
| 26 | + with open(file_path) as f: |
| 27 | + while True: |
| 28 | + c = f.read(1) |
| 29 | + if not c: |
| 30 | + break |
| 31 | + chars[c] = chars[c] + 1 if c in chars.keys() else 1 |
| 32 | + letters = [] |
| 33 | + for char, freq in chars.items(): |
| 34 | + letter = Letter(char, freq) |
| 35 | + letters.append(letter) |
| 36 | + letters.sort(key=lambda l: l.freq) |
| 37 | + return letters |
| 38 | + |
| 39 | +def build_tree(letters): |
| 40 | + """ |
| 41 | + Run through the list of Letters and build the min heap |
| 42 | + for the Huffman Tree. |
| 43 | + """ |
| 44 | + while len(letters) > 1: |
| 45 | + left = letters.pop(0) |
| 46 | + right = letters.pop(0) |
| 47 | + total_freq = left.freq + right.freq |
| 48 | + node = TreeNode(total_freq, left, right) |
| 49 | + letters.append(node) |
| 50 | + letters.sort(key=lambda l: l.freq) |
| 51 | + return letters[0] |
| 52 | + |
| 53 | +def traverse_tree(root, bitstring): |
| 54 | + """ |
| 55 | + Recursively traverse the Huffman Tree to set each |
| 56 | + Letter's bitstring, and return the list of Letters |
| 57 | + """ |
| 58 | + if type(root) is Letter: |
| 59 | + root.bitstring = bitstring |
| 60 | + return [root] |
| 61 | + letters = [] |
| 62 | + letters += traverse_tree(root.left, bitstring + "0") |
| 63 | + letters += traverse_tree(root.right, bitstring + "1") |
| 64 | + return letters |
| 65 | + |
| 66 | +def huffman(file_path): |
| 67 | + """ |
| 68 | + Parse the file, build the tree, then run through the file |
| 69 | + again, using the list of Letters to find and print out the |
| 70 | + bitstring for each letter. |
| 71 | + """ |
| 72 | + letters_list = parse_file(file_path) |
| 73 | + root = build_tree(letters_list) |
| 74 | + letters = traverse_tree(root, "") |
| 75 | + print(f'Huffman Coding of {file_path}: ') |
| 76 | + with open(file_path) as f: |
| 77 | + while True: |
| 78 | + c = f.read(1) |
| 79 | + if not c: |
| 80 | + break |
| 81 | + le = list(filter(lambda l: l.letter == c, letters))[0] |
| 82 | + print(le.bitstring, end=" ") |
| 83 | + print() |
| 84 | + |
| 85 | +if __name__ == "__main__": |
| 86 | + # pass the file path to the huffman function |
| 87 | + huffman(sys.argv[1]) |
0 commit comments