- tags
- Compression, Entropy coding
Python implementation
from heapq import heappush, heappop, heapify
def huffman_coding(frequency_dict):
# Create a heap of tuples (frequency, character, code)
heap = [[frequency, [character, ""]] for character, frequency in frequency_dict.items()]
heapify(heap)
while len(heap) > 1:
# Extract the two nodes with the lowest frequencies
left, right = heappop(heap), heappop(heap)
# Assign a "0" to the left child and a "1" to the right child
for pair in left[1:]:
pair[1] = "0" + pair[1]
for pair in right[1:]:
pair[1] = "1" + pair[1]
# Merge the two nodes and add the resulting node back to the heap
heappush(heap, [left[0] + right[0]] + left[1:] + right[1:])
# Extract the coding dictionary from the heap
return dict(sorted(heappop(heap)[1:], key=lambda p: (len(p[-1]), p)))
# Example usage
frequency_dict = {'a': 45, 'b': 13, 'c': 12, 'd': 16, 'e': 9, 'f': 5, 'r': 2, 'q': 1}
print(huffman_coding(frequency_dict))
The code above outputs the following Python dictionary:
{'a': '0', 'b': '101', 'c': '100', 'd': '110', 'e': '1111', 'f': '11101', 'q': '111000', 'r': '111001'}