Huffman coding

tags
Compression, Entropy coding

Python implementation

from heapq import heappush, heappop, heapify

def huffman_coding(frequency_dict):
    # Create a heap of tuples (frequency, character, code)
    heap = [[frequency, [character, ""]] for character, frequency in frequency_dict.items()]
    heapify(heap)
    while len(heap) > 1:
        # Extract the two nodes with the lowest frequencies
        left, right = heappop(heap), heappop(heap)
        # Assign a "0" to the left child and a "1" to the right child
        for pair in left[1:]:
            pair[1] = "0" + pair[1]
        for pair in right[1:]:
            pair[1] = "1" + pair[1]
        # Merge the two nodes and add the resulting node back to the heap
        heappush(heap, [left[0] + right[0]] + left[1:] + right[1:])
    # Extract the coding dictionary from the heap
    return dict(sorted(heappop(heap)[1:], key=lambda p: (len(p[-1]), p)))

# Example usage
frequency_dict = {'a': 45, 'b': 13, 'c': 12, 'd': 16, 'e': 9, 'f': 5, 'r': 2, 'q': 1}
print(huffman_coding(frequency_dict))

The code above outputs the following Python dictionary:

{'a': '0', 'b': '101', 'c': '100', 'd': '110', 'e': '1111', 'f': '11101', 'q': '111000', 'r': '111001'}
Last changed | authored by

Comments


← Back to Notes