Skip to content

Instantly share code, notes, and snippets.

@bczhc
Last active December 28, 2025 09:54
Show Gist options
  • Select an option

  • Save bczhc/bc847e8992e73805d79264a451ce7007 to your computer and use it in GitHub Desktop.

Select an option

Save bczhc/bc847e8992e73805d79264a451ce7007 to your computer and use it in GitHub Desktop.
Huffman编码实现 #huffman
#include <iostream>
#include <optional>
#include <vector>
using namespace std;
using Symbol = string;
class Node {
public:
optional<Symbol> symbol{};
int freq{};
Node *left{};
Node *right{};
Node(int _freq, optional<Symbol> _s = {}) : symbol(_s), freq(_freq) {}
string format() const {
string s;
s += "freq: ";
s += std::to_string(freq);
s += " symbol: ";
if (symbol.has_value()) {
s += symbol.value();
} else {
s += "None";
}
s += " ";
if (left != nullptr) {
s += "left: (";
s += left->format();
s += ") right: (";
s += right->format();
s += ")";
}
return s;
}
};
enum Branch {
Left, Right
};
using EncodingType = vector<pair<Symbol, vector<Branch>>>;
string build_branches_code(const vector<Branch> & b) {
string s{};
for (const auto &item: b) {
if (item == Left) {
s += "0";
} else {
s += "1";
}
}
return s;
}
void sub(Node *root, vector<Branch> prefix, EncodingType &result_vec) {
if (root == nullptr) return;
if (root->symbol.has_value()) {
result_vec.push_back(pair(root->symbol.value(), prefix));
}
auto p1 = prefix; p1.push_back(Left);
auto p2 = prefix; p2.push_back(Right);
sub(root->left, p1, result_vec);
sub(root->right, p2, result_vec);
};
EncodingType collect_encodings(Node *root) {
EncodingType result;
sub(root, {}, result);
return result;
}
Node *build_huffman(Node **data, size_t size) {
vector<Node *> workset;
for (auto i = 0; i < size; ++i) workset.push_back(data[i]);
for (auto iteration_n = 1;; ++iteration_n) {
auto n1 = workset[0];
auto n2 = workset[1];
auto new_node = new Node(n1->freq + n2->freq, {});
if (n1->freq <= n2->freq) {
new_node->left = n1;
new_node->right = n2;
} else {
new_node->left = n2;
new_node->right = n1;
}
// delete the first two nodes
workset.erase(workset.begin());
workset.erase(workset.begin());
// insert to the workset
// if the insertion position not found, new_node is greater than all the nodes; insert it to the end
auto insert_pos = workset.size();
for (auto i = 0; i < workset.size(); ++i) {
if (new_node->freq < workset[i]->freq) {
insert_pos = i;
break;
}
}
auto pos = workset.begin() + (int) insert_pos;
workset.insert(pos, new_node);
if (workset.size() == 1) break;
}
auto root = workset[0];
return root;
}
size_t compute_wpl(const vector<Node *> &data, EncodingType &encoding) {// compute WPL
size_t sum = 0;
for (const auto &item: encoding) {
// find position
optional<size_t> pos{};
for (auto i = 0; i < data.size(); ++i) {
if (data[i]->symbol == optional(item.first)) {
pos = i;
break;
}
}
if (!pos.has_value()) {
throw "unreachable";
}
auto freq = data[pos.value()]->freq;
sum += freq * item.second.size();
}
return sum;
}
int main() {
vector<Node *> data = {
new Node(1, "a"),
new Node(3, "b"),
new Node(12, "c"),
new Node(13, "d"),
new Node(16, "e"),
new Node(45, "f"),
};
auto tree = build_huffman(data.data(), data.size());
auto encoding = collect_encodings(tree);
for (const auto &item: encoding) {
auto symbol = item.first;
auto binary_string = build_branches_code(item.second);
cout << "Symbol: " << symbol << "\tBinary: " << binary_string << endl;
}
size_t wpl = compute_wpl(data, encoding);
cout << "WPL: " << wpl << endl;
return 0;
}

对着教科书上的方式写的naive算法,写了将近一个小时。我太菜了。

输入的data要求freq由小到大排序。代码能够优化:使用堆当作workset、收集路径(sub函数)中避免频繁复制vector,而是使用pop_back回溯。

已经一万年没写过的C++了,完全不会写了。

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment