Last active
June 2, 2018 12:30
-
-
Save bobosheep/5a462a8bfafb794769246f10f2c0f463 to your computer and use it in GitHub Desktop.
count word and save it to BST
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include<stdio.h> | |
| #include<stdlib.h> | |
| #include<string.h> | |
| #define MAX_LENGTH 1024 | |
| #define MAX_WORD 32 | |
| #define MAX_CHECK 256 | |
| struct BST{ | |
| int cnt; //辭頻統計 | |
| char* word; //單字 | |
| int len; //單字長度 | |
| struct BST* left; //左子樹 | |
| struct BST* right; //右子樹 | |
| }; | |
| void SetCheckPoint(char ptr[], char* tokens); | |
| char *GetWord(char *sentence, char point[], char *word); | |
| void Insert(struct BST* root, char* word); | |
| void PrintBST(struct BST* root); | |
| struct BST* CopyBST(struct BST* copy, struct BST* dest); | |
| void InsertSortBST(struct BST* root, struct BST* node); | |
| void PrintSortBST(struct BST* root, int control); | |
| void FreeBST(struct BST* root); | |
| int main() | |
| { | |
| char checkPoint[MAX_CHECK]; | |
| char getSentence[MAX_LENGTH]; | |
| char word[MAX_WORD], *ptr = NULL; | |
| int start = 1; | |
| struct BST* myBST = NULL; | |
| struct BST* sortBST = NULL; | |
| SetCheckPoint(checkPoint, " \t\n\\\"\',?!(){}[]=+*&^%$<>@%#:;./"); | |
| while(fgets(getSentence, MAX_LENGTH, stdin) != NULL) | |
| { | |
| ptr = getSentence; | |
| while(ptr = GetWord(ptr, checkPoint, word)) | |
| { | |
| if(start) | |
| { | |
| myBST = (struct BST*) malloc(sizeof(struct BST)); | |
| myBST->len = strlen(word); | |
| myBST->word = (char *) malloc(sizeof(char) * (myBST->len + 1)); | |
| strcpy(myBST->word, word); | |
| //printf("%s\n", myBST->word); | |
| myBST->cnt = 1; | |
| start = 0; | |
| } | |
| else | |
| Insert(myBST, word); | |
| } | |
| } | |
| free(ptr); | |
| printf("Tcount Result :\n"); | |
| PrintBST(myBST); | |
| sortBST = CopyBST(myBST, sortBST); //要回傳sortBST的root位置,不然在裡面malloc的位置會不見 | |
| printf("Sorted by ascending order :\n"); | |
| PrintSortBST(sortBST, 0); //0代表從大到小輸出 | |
| printf("Sorted by descending order :\n"); | |
| PrintSortBST(sortBST, 1); //1代表從小到大輸出 | |
| FreeBST(myBST); | |
| FreeBST(sortBST); | |
| return 0; | |
| } | |
| void SetCheckPoint(char ptr[], char* tokens) | |
| { | |
| //這邊有個重要的概念,用char來表示標點符號是用ascii碼來對應的 | |
| //所以每個標點符號代表一個整數,且ascii碼總共256個(0~255) | |
| int i; | |
| for(i = 0 ; i < MAX_CHECK ; i++) | |
| ptr[i] = 0; //先都表示不是斷點 | |
| while(*tokens != '\0') | |
| { | |
| ptr[*tokens] = 1; //對應的標點符號標示為斷點。 *tokens是一個十進位的值喔 | |
| tokens++; | |
| } | |
| } | |
| char *GetWord(char *sentence, char point[], char *word) | |
| { | |
| char *ptr = sentence; | |
| int len = 0; | |
| while(point[*ptr]) ptr++; //從上個一結束的位置開始到下一個非斷點的地方(單字的開始) | |
| if(*ptr == '\0' || *ptr == '\n') | |
| return NULL; //代表一個句子結束 | |
| while(point[*ptr] == 0 && *ptr != '\0') | |
| { | |
| //取單字 | |
| len++; | |
| if(len >= MAX_WORD) | |
| break; | |
| *word++ = *ptr++; //複製字元 | |
| //概念等同於 | |
| //*word = *ptr; | |
| //word++; | |
| //ptr++; | |
| } | |
| *word = '\0'; | |
| return ptr; | |
| } | |
| void Insert(struct BST* root, char* word) | |
| { | |
| /* | |
| * 放進去用strcmp去比較bst裡的word和準備要insert的word | |
| * 如果要insert的word比較小就往左子樹去找 | |
| * 反之就往右子樹 | |
| * 如果strcmp == 0代表是同一個單字,則該node的單字cnt數加一 | |
| */ | |
| if(root == NULL) | |
| return; | |
| //printf("root->word is %s\n", root->word); | |
| if(strcmp(root->word, word) < 0) | |
| { | |
| if(root->left != NULL) | |
| Insert(root->left, word); | |
| else | |
| { | |
| //創建左子樹 | |
| struct BST* node = (struct BST*) malloc(sizeof(struct BST)); | |
| node->cnt = 1; | |
| node->len = strlen(word); | |
| node->word = (char *) malloc(sizeof(char) * (node->len + 1)); | |
| strcpy(node->word, word); | |
| root->left = node; | |
| } | |
| } | |
| else if(strcmp(root->word, word) > 0) | |
| { | |
| if(root->right != NULL) | |
| Insert(root->right, word); | |
| else | |
| { | |
| //創建右子樹 | |
| struct BST* node = (struct BST*) malloc(sizeof(struct BST)); | |
| node->cnt = 1; | |
| node->len = strlen(word); | |
| node->word = (char *) malloc(sizeof(char) * (node->len + 1)); | |
| strcpy(node->word, word); | |
| root->right = node; | |
| } | |
| } | |
| else | |
| { | |
| //printf("word is %s\n", word); | |
| root->cnt++; | |
| } | |
| } | |
| void PrintBST(struct BST* root) | |
| { | |
| //這是按照單字出現的順序輸出 | |
| if(root == NULL) | |
| return; | |
| printf("%d\t%s\n", root->cnt, root->word); | |
| PrintBST(root->left); | |
| PrintBST(root->right); | |
| } | |
| struct BST* CopyBST(struct BST* copy, struct BST* dest) | |
| { | |
| if(copy == NULL) | |
| return dest; | |
| if(dest == NULL) | |
| { | |
| //造樹運動 | |
| dest = (struct BST*) malloc(sizeof(struct BST)); | |
| dest->cnt = copy->cnt; | |
| dest->len = copy->len; | |
| dest->word = (char *) malloc(sizeof(char) * (dest->len + 1)); | |
| strcpy(dest->word, copy->word); | |
| CopyBST(copy->left, dest); | |
| CopyBST(copy->right, dest); | |
| } | |
| else | |
| { | |
| InsertSortBST(dest, copy); | |
| CopyBST(copy->left, dest); | |
| CopyBST(copy->right, dest); | |
| } | |
| return dest; | |
| } | |
| void InsertSortBST(struct BST* root, struct BST* node) | |
| { | |
| //printf("root word is %s, node word is %s\n", root->word, node->word); | |
| if(root == NULL) | |
| return; | |
| if(root->cnt >= node->cnt) | |
| { | |
| //printf("go left\n"); | |
| if(root->left != NULL) | |
| InsertSortBST(root->left, node); | |
| else | |
| { | |
| struct BST* tmpnode = (struct BST*) malloc(sizeof(struct BST)); | |
| tmpnode->cnt = node->cnt; | |
| tmpnode->len = node->len; | |
| tmpnode->word = (char *) malloc(sizeof(char) * (tmpnode->len + 1)); | |
| strcpy(tmpnode->word, node->word); | |
| root->left = tmpnode; | |
| } | |
| } | |
| else | |
| { | |
| //printf("go right\n"); | |
| if(root->right != NULL) | |
| InsertSortBST(root->right, node); | |
| else | |
| { | |
| struct BST* tmpnode = (struct BST*) malloc(sizeof(struct BST)); | |
| tmpnode->cnt = node->cnt; | |
| tmpnode->len = node->len; | |
| tmpnode->word = (char *) malloc(sizeof(char) * (tmpnode->len + 1)); | |
| strcpy(tmpnode->word, node->word); | |
| root->right = tmpnode; | |
| } | |
| } | |
| } | |
| void PrintSortBST(struct BST* root, int control) | |
| { | |
| if(root == NULL) | |
| return; | |
| if(control) | |
| { | |
| //由小到大排序 | |
| PrintSortBST(root->left, control); | |
| printf("%d\t%s\n", root->cnt, root->word); | |
| PrintSortBST(root->right, control); | |
| } | |
| else | |
| { | |
| //由大到小排序 | |
| PrintSortBST(root->right, control); | |
| printf("%d\t%s\n", root->cnt, root->word); | |
| PrintSortBST(root->left, control); | |
| } | |
| } | |
| void FreeBST(struct BST* root) | |
| { | |
| //清理pointer | |
| if(root == NULL) | |
| return; | |
| FreeBST(root->left); | |
| FreeBST(root->right); | |
| free(root); //這一定要放最後 | |
| } | |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
add sorted BST and free the memory.