bczhc · December 28, 2025 09:54
diff --git a/a.cc b/a.cc
 #include <iostream>
 #include <optional>
 #include <vector>

 using namespace std;

 using Symbol = string;

 class Node {
 public:
    optional<Symbol> symbol{};
    int freq{};
    Node *left{};
    Node *right{};

    Node(int _freq, optional<Symbol> _s = {}) : symbol(_s), freq(_freq) {}

    string format() const {
        string s;
        s += "freq: ";
        s += std::to_string(freq);
        s += " symbol: ";
        if (symbol.has_value()) {
            s += symbol.value();
        } else {
            s += "None";
        }
        s += " ";
        if (left != nullptr) {
            s += "left: (";
            s += left->format();
            s += ") right: (";
            s += right->format();
            s += ")";
        }
        return s;
    }
 };


 enum Branch {
    Left, Right
 };

 using EncodingType = vector<pair<Symbol, vector<Branch>>>;

 string build_branches_code(const vector<Branch> & b) {
    string s{};
    for (const auto &item: b) {
        if (item == Left) {
            s += "0";
        } else {
            s += "1";
        }
    }
    return s;
 }


 void sub(Node *root, vector<Branch> prefix, EncodingType &result_vec) {
    if (root == nullptr) return;
    if (root->symbol.has_value()) {
        result_vec.push_back(pair(root->symbol.value(), prefix));
    }

    auto p1 = prefix; p1.push_back(Left);
    auto p2 = prefix; p2.push_back(Right);
    sub(root->left, p1, result_vec);
    sub(root->right, p2, result_vec);
 };

 EncodingType collect_encodings(Node *root) {
    EncodingType result;
    sub(root, {}, result);
    return result;
 }

 Node *build_huffman(Node **data, size_t size) {
    vector<Node *> workset;
    for (auto i = 0; i < size; ++i) workset.push_back(data[i]);

    for (auto iteration_n = 1;; ++iteration_n) {
        auto n1 = workset[0];
        auto n2 = workset[1];
        auto new_node = new Node(n1->freq + n2->freq, {});
        if (n1->freq <= n2->freq) {
            new_node->left = n1;
            new_node->right = n2;
        } else {
            new_node->left = n2;
            new_node->right = n1;
        }

        // delete the first two nodes
        workset.erase(workset.begin());
        workset.erase(workset.begin());

        // insert to the workset
        // if the insertion position not found, new_node is greater than all the nodes; insert it to the end
        auto insert_pos = workset.size();
        for (auto i = 0; i < workset.size(); ++i) {
            if (new_node->freq < workset[i]->freq) {
                insert_pos = i;
                break;
            }
        }
        auto pos = workset.begin() + (int) insert_pos;
        workset.insert(pos, new_node);

        if (workset.size() == 1) break;
    }
    auto root = workset[0];
    return root;
 }

 size_t compute_wpl(const vector<Node *> &data, EncodingType &encoding) {// compute WPL
    size_t sum = 0;
    for (const auto &item: encoding) {
        // find position
        optional<size_t> pos{};
        for (auto i = 0; i < data.size(); ++i) {
            if (data[i]->symbol == optional(item.first)) {
                pos = i;
                break;
            }
        }
        if (!pos.has_value()) {
            throw "unreachable";
        }
        auto freq = data[pos.value()]->freq;
        sum += freq * item.second.size();
    }
    return sum;
 }


 int main() {
    vector<Node *> data = {
            new Node(1, "a"),
            new Node(3, "b"),
            new Node(12, "c"),
            new Node(13, "d"),
            new Node(16, "e"),
            new Node(45, "f"),
    };

    auto tree = build_huffman(data.data(), data.size());
    auto encoding = collect_encodings(tree);
    for (const auto &item: encoding) {
        auto symbol = item.first;
        auto binary_string = build_branches_code(item.second);
        cout << "Symbol: " << symbol << "\tBinary: " << binary_string << endl;
    }

    size_t wpl = compute_wpl(data, encoding);
    cout << "WPL: " << wpl << endl;

    return 0;
 }
diff --git a/a.md b/a.md
	#include <iostream>
	#include <optional>
	#include <vector>

	using namespace std;

	using Symbol = string;

	class Node {
	public:
	optional<Symbol> symbol{};
	int freq{};
	Node *left{};
	Node *right{};

	Node(int _freq, optional<Symbol> _s = {}) : symbol(_s), freq(_freq) {}

	string format() const {
	string s;
	s += "freq: ";
	s += std::to_string(freq);
	s += " symbol: ";
	if (symbol.has_value()) {
	s += symbol.value();
	} else {
	s += "None";
	}
	s += " ";
	if (left != nullptr) {
	s += "left: (";
	s += left->format();
	s += ") right: (";
	s += right->format();
	s += ")";
	}
	return s;
	}
	};


	enum Branch {
	Left, Right
	};

	using EncodingType = vector<pair<Symbol, vector<Branch>>>;

	string build_branches_code(const vector<Branch> & b) {
	string s{};
	for (const auto &item: b) {
	if (item == Left) {
	s += "0";
	} else {
	s += "1";
	}
	}
	return s;
	}


	void sub(Node *root, vector<Branch> prefix, EncodingType &result_vec) {
	if (root == nullptr) return;
	if (root->symbol.has_value()) {
	result_vec.push_back(pair(root->symbol.value(), prefix));
	}

	auto p1 = prefix; p1.push_back(Left);
	auto p2 = prefix; p2.push_back(Right);
	sub(root->left, p1, result_vec);
	sub(root->right, p2, result_vec);
	};

	EncodingType collect_encodings(Node *root) {
	EncodingType result;
	sub(root, {}, result);
	return result;
	}

	Node build_huffman(Node *data, size_t size) {
	vector<Node *> workset;
	for (auto i = 0; i < size; ++i) workset.push_back(data[i]);

	for (auto iteration_n = 1;; ++iteration_n) {
	auto n1 = workset[0];
	auto n2 = workset[1];
	auto new_node = new Node(n1->freq + n2->freq, {});
	if (n1->freq <= n2->freq) {
	new_node->left = n1;
	new_node->right = n2;
	} else {
	new_node->left = n2;
	new_node->right = n1;
	}

	// delete the first two nodes
	workset.erase(workset.begin());
	workset.erase(workset.begin());

	// insert to the workset
	// if the insertion position not found, new_node is greater than all the nodes; insert it to the end
	auto insert_pos = workset.size();
	for (auto i = 0; i < workset.size(); ++i) {
	if (new_node->freq < workset[i]->freq) {
	insert_pos = i;
	break;
	}
	}
	auto pos = workset.begin() + (int) insert_pos;
	workset.insert(pos, new_node);

	if (workset.size() == 1) break;
	}
	auto root = workset[0];
	return root;
	}

	size_t compute_wpl(const vector<Node *> &data, EncodingType &encoding) {// compute WPL
	size_t sum = 0;
	for (const auto &item: encoding) {
	// find position
	optional<size_t> pos{};
	for (auto i = 0; i < data.size(); ++i) {
	if (data[i]->symbol == optional(item.first)) {
	pos = i;
	break;
	}
	}
	if (!pos.has_value()) {
	throw "unreachable";
	}
	auto freq = data[pos.value()]->freq;
	sum += freq * item.second.size();
	}
	return sum;
	}


	int main() {
	vector<Node *> data = {
	new Node(1, "a"),
	new Node(3, "b"),
	new Node(12, "c"),
	new Node(13, "d"),
	new Node(16, "e"),
	new Node(45, "f"),
	};

	auto tree = build_huffman(data.data(), data.size());
	auto encoding = collect_encodings(tree);
	for (const auto &item: encoding) {
	auto symbol = item.first;
	auto binary_string = build_branches_code(item.second);
	cout << "Symbol: " << symbol << "\tBinary: " << binary_string << endl;
	}

	size_t wpl = compute_wpl(data, encoding);
	cout << "WPL: " << wpl << endl;

	return 0;
	}
No results found