返回
夯实哈夫曼树编程,清晰解析五大语言代码实现,极简入门核心思想
前端
2023-09-02 23:05:24
哈夫曼树:数据压缩的利器
导言
在数据存储和传输的领域中,哈夫曼树扮演着至关重要的角色,以其巧妙的压缩方式和无损的还原能力,为我们节省了宝贵的空间和时间。本篇博客将带你踏上哈夫曼树的构建之旅,逐一探索其在五大编程语言中的实现方案——Java、JavaScript、Python、C和C++。让我们共同揭开哈夫曼树背后的奥秘,掌握这门数据压缩的利器。
哈夫曼树的原理
哈夫曼树是一种将字符与编码进行映射的二叉树,它通过最优的方式分配编码长度,以达到压缩数据的目的。这种贪心算法的思想源于著名的信息论之父——克劳德·香农。香农在研究信息熵时发现,不同符号出现的频率不同,那么将其映射到不同长度的编码上,就可以有效减少整体编码的长度。
哈夫曼树的构建过程
哈夫曼树的构建过程如下:
- 将每个字符及其出现的频率作为叶子节点,构建森林。
- 将频率最小的两个节点合并成一个新的节点,其频率等于这两个节点频率之和。
- 重复步骤2,直到只剩下一个节点。
- 从根节点开始,沿着左子树和右子树分别分配0和1,直到所有的叶子节点都分配好编码。
Java实现
import java.util.PriorityQueue;
class HuffmanNode {
int data;
int freq;
HuffmanNode left;
HuffmanNode right;
HuffmanNode(int data, int freq) {
this.data = data;
this.freq = freq;
left = null;
right = null;
}
}
class HuffmanTree {
// Function to build the Huffman tree.
HuffmanNode buildTree(int[] data, int[] freq) {
PriorityQueue<HuffmanNode> pq = new PriorityQueue<>((a, b) -> a.freq - b.freq);
// Create a Huffman node for each data and frequency pair.
for (int i = 0; i < data.length; i++) {
pq.add(new HuffmanNode(data[i], freq[i]));
}
// While there are more than one node in the priority queue.
while (pq.size() > 1) {
// Extract the two nodes with the minimum frequencies.
HuffmanNode left = pq.poll();
HuffmanNode right = pq.poll();
// Create a new node with the sum of the frequencies of the two nodes.
HuffmanNode parent = new HuffmanNode(-1, left.freq + right.freq);
// Make the left and right nodes as children of the parent node.
parent.left = left;
parent.right = right;
// Add the parent node to the priority queue.
pq.add(parent);
}
// Return the root of the Huffman tree.
return pq.poll();
}
// Function to print the Huffman codes.
void printCodes(HuffmanNode root, String code) {
if (root == null) {
return;
}
// If the node is a leaf node, then print the code.
if (root.left == null && root.right == null) {
System.out.println(root.data + ": " + code);
return;
}
// Recursively print the codes for the left and right subtrees.
printCodes(root.left, code + "0");
printCodes(root.right, code + "1");
}
}
JavaScript实现
class HuffmanNode {
constructor(data, freq) {
this.data = data;
this.freq = freq;
this.left = null;
this.right = null;
}
}
class HuffmanTree {
// Function to build the Huffman tree.
buildTree(data, freq) {
// Create a priority queue to store the Huffman nodes.
const pq = new PriorityQueue((a, b) => a.freq - b.freq);
// Create a Huffman node for each data and frequency pair.
for (let i = 0; i < data.length; i++) {
pq.enqueue(new HuffmanNode(data[i], freq[i]));
}
// While there are more than one node in the priority queue.
while (pq.size() > 1) {
// Extract the two nodes with the minimum frequencies.
const left = pq.dequeue();
const right = pq.dequeue();
// Create a new node with the sum of the frequencies of the two nodes.
const parent = new HuffmanNode(null, left.freq + right.freq);
// Make the left and right nodes as children of the parent node.
parent.left = left;
parent.right = right;
// Add the parent node to the priority queue.
pq.enqueue(parent);
}
// Return the root of the Huffman tree.
return pq.dequeue();
}
// Function to print the Huffman codes.
printCodes(root, code) {
if (root == null) {
return;
}
// If the node is a leaf node, then print the code.
if (root.left == null && root.right == null) {
console.log(root.data + ": " + code);
return;
}
// Recursively print the codes for the left and right subtrees.
printCodes(root.left, code + "0");
printCodes(root.right, code + "1");
}
}
Python实现
import heapq
class HuffmanNode:
def __init__(self, data, freq):
self.data = data
self.freq = freq
self.left = None
self.right = None
class HuffmanTree:
# Function to build the Huffman tree.
def build_tree(self, data, freq):
# Create a priority queue to store the Huffman nodes.
pq = []
for i in range(len(data)):
heapq.heappush(pq, (freq[i], data[i]))
# While there are more than one node in the priority queue.
while len(pq) > 1:
# Extract the two nodes with the minimum frequencies.
left = heapq.heappop(pq)
right = heapq.heappop(pq)
# Create a new node with the sum of the frequencies of the two nodes.
parent = HuffmanNode(None, left[0] + right[0])
# Make the left and right nodes as children of the parent node.
parent.left = left
parent.right = right
# Add the parent node to the priority queue.
heapq.heappush(pq, (parent.freq, parent))
# Return the root of the Huffman tree.
return pq[0][1]
# Function to print the Huffman codes.
def print_codes(self, root, code):
if root is None:
return
# If the node is a leaf node, then print the code.
if root.left is None and root.right is None:
print(root.data + ": " + code)
return
# Recursively print the codes for the left and right subtrees.
self.print_codes(root.left, code + "0")
self.print_codes(root.right, code + "1")
C实现
#include <stdio.h>
#include <stdlib.h>
struct HuffmanNode {
int data;
int freq;
struct HuffmanNode *left;
struct HuffmanNode *right;
};
struct HuffmanTree {
struct HuffmanNode *root;
};
// Function to create a new Huffman node.
struct HuffmanNode *create_node(int data, int freq) {
struct HuffmanNode *node = (struct HuffmanNode *)malloc(sizeof(struct HuffmanNode));
node->data = data;
node->freq = freq;
node->left = NULL;
node->right = NULL;
return node;
}
// Function to build the Huffman tree.
struct HuffmanTree *build_tree(int data[], int freq[], int size) {
struct HuffmanTree *tree = (struct HuffmanTree *)malloc(sizeof(struct HuffmanTree));
// Create a priority queue to store the Huffman nodes.
struct HuffmanNode **pq = (struct HuffmanNode ** )malloc(sizeof(struct HuffmanNode *) * size);
int front = 0, rear = 0;
// Create a Huffman node for each data and frequency pair.