Skip to content

Commit 70bb6b2

Browse files
raviolliiiAnupKumarPanwar
authored andcommitted
Added Huffman Coding Algorithm (#798)
1 parent 3f7bec6 commit 70bb6b2

File tree

1 file changed

+87
-0
lines changed

1 file changed

+87
-0
lines changed

compression/huffman.py

+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import sys
2+
3+
class Letter:
4+
def __init__(self, letter, freq):
5+
self.letter = letter
6+
self.freq = freq
7+
self.bitstring = ""
8+
9+
def __repr__(self):
10+
return f'{self.letter}:{self.freq}'
11+
12+
13+
class TreeNode:
14+
def __init__(self, freq, left, right):
15+
self.freq = freq
16+
self.left = left
17+
self.right = right
18+
19+
20+
def parse_file(file_path):
21+
"""
22+
Read the file and build a dict of all letters and their
23+
frequences, then convert the dict into a list of Letters.
24+
"""
25+
chars = {}
26+
with open(file_path) as f:
27+
while True:
28+
c = f.read(1)
29+
if not c:
30+
break
31+
chars[c] = chars[c] + 1 if c in chars.keys() else 1
32+
letters = []
33+
for char, freq in chars.items():
34+
letter = Letter(char, freq)
35+
letters.append(letter)
36+
letters.sort(key=lambda l: l.freq)
37+
return letters
38+
39+
def build_tree(letters):
40+
"""
41+
Run through the list of Letters and build the min heap
42+
for the Huffman Tree.
43+
"""
44+
while len(letters) > 1:
45+
left = letters.pop(0)
46+
right = letters.pop(0)
47+
total_freq = left.freq + right.freq
48+
node = TreeNode(total_freq, left, right)
49+
letters.append(node)
50+
letters.sort(key=lambda l: l.freq)
51+
return letters[0]
52+
53+
def traverse_tree(root, bitstring):
54+
"""
55+
Recursively traverse the Huffman Tree to set each
56+
Letter's bitstring, and return the list of Letters
57+
"""
58+
if type(root) is Letter:
59+
root.bitstring = bitstring
60+
return [root]
61+
letters = []
62+
letters += traverse_tree(root.left, bitstring + "0")
63+
letters += traverse_tree(root.right, bitstring + "1")
64+
return letters
65+
66+
def huffman(file_path):
67+
"""
68+
Parse the file, build the tree, then run through the file
69+
again, using the list of Letters to find and print out the
70+
bitstring for each letter.
71+
"""
72+
letters_list = parse_file(file_path)
73+
root = build_tree(letters_list)
74+
letters = traverse_tree(root, "")
75+
print(f'Huffman Coding of {file_path}: ')
76+
with open(file_path) as f:
77+
while True:
78+
c = f.read(1)
79+
if not c:
80+
break
81+
le = list(filter(lambda l: l.letter == c, letters))[0]
82+
print(le.bitstring, end=" ")
83+
print()
84+
85+
if __name__ == "__main__":
86+
# pass the file path to the huffman function
87+
huffman(sys.argv[1])

0 commit comments

Comments
 (0)