Created
November 30, 2018 04:53
-
-
Save ultrasilicon/efa3445156b4503b34a0f6bd595d2393 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /******************************************************************************* | |
| * Name : stairclimber.cpp | |
| * Author : Han Zheng | |
| * Date : 28 Nov. 2018 | |
| * Description : Lists the number of ways to climb n stairs. | |
| * Pledge : I pledge my honor that I have abided by the Stevens Honor System. | |
| ******************************************************************************/ | |
| #include <iostream> | |
| #include <vector> | |
| #include <algorithm> | |
| #include <sstream> | |
| #include <iomanip> | |
| #include <functional> | |
| #include <fstream> | |
| #include <queue> | |
| using namespace std; | |
| struct TrieNode; | |
| static int g_limit = 0; | |
| static string g_data; | |
| static vector<string> g_words; | |
| struct TrieNode | |
| { | |
| TrieNode() | |
| {} | |
| TrieNode(const char& key, TrieNode* parent) | |
| : key_(key) | |
| , parent_(parent) | |
| {} | |
| int num_ = 0; | |
| char key_ = '0'; | |
| char padding[3]; | |
| array<TrieNode*, 27> children_{}; | |
| TrieNode* parent_; | |
| static TrieNode root; | |
| }; | |
| TrieNode TrieNode::root; | |
| void trie_insert(const string &s, TrieNode *n) | |
| { | |
| TrieNode *cur = n; | |
| for(const char& c : s) | |
| { | |
| size_t index = static_cast<size_t>(c) - '`'; | |
| if(!cur->children_[index]) | |
| cur->children_[index] = new TrieNode(c, cur); | |
| cur = cur->children_[index]; | |
| } | |
| ++ cur->num_; | |
| } | |
| string trie_recover(TrieNode *n) | |
| { | |
| string r; | |
| TrieNode* cur = n; | |
| while(cur->key_ != '0') | |
| { | |
| if(cur->key_ == '`') | |
| r.push_back('\''); | |
| else | |
| r.push_back(cur->key_); | |
| cur = cur->parent_; | |
| } | |
| std::reverse(r.begin(), r.end()); | |
| return r; | |
| } | |
| static auto cmp = [](TrieNode* l, TrieNode* r) { | |
| return l->num_ < r->num_; | |
| }; | |
| static priority_queue<TrieNode*, std::vector<TrieNode*>, decltype(cmp)> pq(cmp); | |
| void print_queue(priority_queue<TrieNode*, std::vector<TrieNode*>, decltype(cmp)>& q) { | |
| int size = q.top()->num_; | |
| int digit; | |
| if(size < 10) | |
| digit = 1; | |
| else if(size < 100) | |
| digit = 2; | |
| else if(size < 1000) | |
| digit = 3; | |
| else if(size < 10000) | |
| digit = 4; | |
| else if(size < 100000) | |
| digit = 5; | |
| else if(size < 1000000) | |
| digit = 6; | |
| else if(size < 10000000) | |
| digit = 7; | |
| else if(size < 100000000) | |
| digit = 8; | |
| else | |
| digit = 9; | |
| cout << std::right; | |
| while(!q.empty()) { | |
| cout << setw(digit) << q.top()->num_ << ". " << trie_recover(q.top()) << " \n"; | |
| q.pop(); | |
| } | |
| } | |
| void collect(TrieNode *n) | |
| { | |
| if(n->num_ != 0) | |
| pq.push(n); | |
| for(TrieNode* it : n->children_) | |
| if(it) | |
| collect(it); | |
| } | |
| void wash_data() | |
| { | |
| bool jump = false; | |
| int wordBegin = 0; | |
| auto head = g_data.begin(); | |
| for(auto it = g_data.begin(); it != g_data.end(); ++ it) | |
| { | |
| if(jump) | |
| { | |
| jump = false; | |
| continue; | |
| } | |
| if((*it >= 'a' && *it <= 'z')) | |
| { | |
| *head = *it; | |
| wordBegin = 0; | |
| } | |
| else if(*it == ' ') | |
| { | |
| *head = *it; | |
| wordBegin = 1; | |
| } | |
| else if(*it >= 'A' && *it <= 'Z') | |
| { | |
| *head = 32 + *it; | |
| wordBegin = 0; | |
| } | |
| else if(*it == '\'') | |
| { | |
| if(wordBegin == 1) | |
| { | |
| *head = ' '; | |
| wordBegin = 1; | |
| } | |
| else if(wordBegin == 2) | |
| { | |
| *head = ' '; | |
| wordBegin = 0; | |
| } | |
| else | |
| *head = '`'; | |
| } | |
| else if(*it == '\\') | |
| jump = true; | |
| else | |
| *head = ' '; | |
| head ++; | |
| } | |
| g_data.erase(head, g_data.cend()); | |
| string tmp; | |
| stringstream ss(g_data); | |
| while (ss >> tmp) | |
| g_words.push_back(tmp); | |
| } | |
| int main(int argc, char * const argv[]) { | |
| if(argc < 2) | |
| { | |
| cout << "Usage: ./commonwordfinder <filename> [limit]" << endl; | |
| return 0; | |
| } | |
| ifstream file; | |
| file.open(argv[1]); | |
| if(!file) | |
| { | |
| cout << "Usage: ./commonwordfinder <filename> [limit]" << endl; | |
| return 0; | |
| } | |
| if(argc == 3) | |
| { | |
| istringstream iss(argv[2]); | |
| if(!iss >> g_limit) | |
| { | |
| cout << "Error: Invalid limit '" << argv[2] << "' received." << endl; | |
| return 0; | |
| } | |
| } | |
| cout << "1" << endl; | |
| g_data = string((std::istreambuf_iterator<char>(file)), (std::istreambuf_iterator<char>())); | |
| cout << "2" << endl; | |
| wash_data(); | |
| cout << "3" << endl; | |
| for(const string &s : g_words) | |
| { | |
| trie_insert(s, &TrieNode::root); | |
| } | |
| cout << "4" << endl; | |
| TrieNode *n = &TrieNode::root; | |
| collect(n); | |
| print_queue(pq); | |
| return 0; | |
| } |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
/*******************************************************************************
******************************************************************************/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
using namespace std;
struct TrieNode;
static int g_limit = 10;
static string g_data;
static size_t g_max_word_width = 0;
struct TrieNode
{
TrieNode()
{}
TrieNode(const char& key, TrieNode* parent)
: key_(key)
, parent_(parent)
{}
int value_ = 0;
char key_ = '0';
char padding[3];
array<TrieNode*, 27> children_{};
TrieNode* parent_;
static TrieNode root;
};
TrieNode TrieNode::root;
void trie_insert(const string &s, TrieNode *n)
{
TrieNode *cur = n;
for(const char& c : s)
{
size_t index = static_cast<size_t>(c) - '`';
if(!cur->children_[index])
cur->children_[index] = new TrieNode(c, cur);
cur = cur->children_[index];
}
++ cur->value_;
}
string trie_recover(TrieNode n)
{
string r;
TrieNode cur = n;
while(cur->key_ != '0')
{
if(cur->key_ == '`')
r.push_back(''');
else
r.push_back(cur->key_);
cur = cur->parent_;
}
std::reverse(r.begin(), r.end());
return r;
}
static auto ranking_cmp = [](TrieNode* l, TrieNode* r) {
return l->value_ < r->value_;
};
static priority_queue<TrieNode*, std::vector<TrieNode*>, decltype(ranking_cmp)> ranking_pq(ranking_cmp);
int num_digit(const int& num){
int r;
if(num < 10)
r = 1;
else if(num < 100)
r = 2;
else if(num < 1000)
r = 3;
else if(num < 10000)
r = 4;
else if(num < 100000)
r = 5;
else if(num < 1000000)
r = 6;
else if(num < 10000000)
r = 7;
else if(num < 100000000)
r = 8;
else
r = 9;
return r;
}
void print_queue(priority_queue<TrieNode*, std::vector<TrieNode*>, decltype(ranking_cmp)>& q) {
cout << "Total unique words: " << (int)q.size() <<'\n'<< std::right;
list<tuple<int, set>> r;
int currentCount1 = q.top()->value_;
set tmpSet;
while(!q.empty())
{
tmpSet.insert(trie_recover(q.top()));
q.pop();
if(!q.top())
break;
int tmpV = q.top()->value_;
r.push_back(make_tuple(currentCount1, tmpSet));
int indexWidth = 0;
int wordWidth = 0;
int limit = g_limit;
for(auto it : r)
{
if(limit == 0)
break;
for(auto jt : get<1>(it))
{
if(limit == 0)
break;
indexWidth ++;
if((int)jt.size() > wordWidth)
wordWidth = jt.size();
-- limit;
}
}
wordWidth ++;
int index = 0;
for(auto it : r)
{
for(auto jt : get<1>(it))
{
if(g_limit == 0)
return;
cout << setw(num_digit(indexWidth)) << ++ index << ". "
<< std::left << setw((int)wordWidth) << jt
<< get<0>(it) << '\n' << std::right;
-- g_limit;
}
}
}
void collect(TrieNode n)
{
if(n->value_ != 0)
ranking_pq.push(n);
for(TrieNode it : n->children_)
if(it)
collect(it);
}
void wash_data()
{
bool jump = false;
int wordBegin = 0;
auto head = g_data.begin();
for(auto it = g_data.begin(); it != g_data.end(); ++ it)
{
if(jump)
{
jump = false;
continue;
}
if((*it >= 'a' && *it <= 'z'))
{
*head = *it;
wordBegin = 0;
}
else if(*it == ' ')
{
*head = *it;
wordBegin = 1;
}
else if(*it >= 'A' && *it <= 'Z')
{
*head = 32 + *it;
wordBegin = 0;
}
else if(*it == ''')
{
if(wordBegin == 1)
{
*head = ' ';
wordBegin = 1;
}
else if(wordBegin == 2)
{
*head = ' ';
wordBegin = 0;
}
else
*head = '`';
}
else if(*it == '\')
jump = true;
else
*head = ' ';
head ++;
}
g_data.erase(head, g_data.cend());
string tmp;
stringstream ss(g_data);
while (ss >> tmp)
{
if(tmp.size() > g_max_word_width)
g_max_word_width = tmp.size();
trie_insert(tmp, &TrieNode::root);
}
}
int main(int argc, char * const argv[]) {
ios_base::sync_with_stdio(false);
cin.tie(nullptr);
if(argc < 2)
{
cout << "Usage: ./commonwordfinder [limit]" << endl;
return 0;
}
ifstream file;
file.open(argv[1]);
if(!file)
{
cout << "Error: Cannot open file '"<< argv[1] <<"' for input.\n" << endl;
return 0;
}
if(argc == 3)
{
istringstream iss(argv[2]);
if(!(iss >> g_limit))
{
cout << "Error: Invalid limit '" << argv[2] << "' received." << endl;
return 0;
}
if(g_limit < 0)
{
cout << "Error: Invalid limit '" << argv[2] << "' received." << endl;
return 0;
}
}
g_data = string((std::istreambuf_iterator(file)), (std::istreambuf_iterator()));
wash_data();
collect(&TrieNode::root);
print_queue(ranking_pq);
return 0;
}