Skip to content

Instantly share code, notes, and snippets.

@bobosheep
Last active June 2, 2018 12:30
Show Gist options
  • Select an option

  • Save bobosheep/5a462a8bfafb794769246f10f2c0f463 to your computer and use it in GitHub Desktop.

Select an option

Save bobosheep/5a462a8bfafb794769246f10f2c0f463 to your computer and use it in GitHub Desktop.
count word and save it to BST
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define MAX_LENGTH 1024
#define MAX_WORD 32
#define MAX_CHECK 256
struct BST{
int cnt; //辭頻統計
char* word; //單字
int len; //單字長度
struct BST* left; //左子樹
struct BST* right; //右子樹
};
void SetCheckPoint(char ptr[], char* tokens);
char *GetWord(char *sentence, char point[], char *word);
void Insert(struct BST* root, char* word);
void PrintBST(struct BST* root);
struct BST* CopyBST(struct BST* copy, struct BST* dest);
void InsertSortBST(struct BST* root, struct BST* node);
void PrintSortBST(struct BST* root, int control);
void FreeBST(struct BST* root);
int main()
{
char checkPoint[MAX_CHECK];
char getSentence[MAX_LENGTH];
char word[MAX_WORD], *ptr = NULL;
int start = 1;
struct BST* myBST = NULL;
struct BST* sortBST = NULL;
SetCheckPoint(checkPoint, " \t\n\\\"\',?!(){}[]=+*&^%$<>@%#:;./");
while(fgets(getSentence, MAX_LENGTH, stdin) != NULL)
{
ptr = getSentence;
while(ptr = GetWord(ptr, checkPoint, word))
{
if(start)
{
myBST = (struct BST*) malloc(sizeof(struct BST));
myBST->len = strlen(word);
myBST->word = (char *) malloc(sizeof(char) * (myBST->len + 1));
strcpy(myBST->word, word);
//printf("%s\n", myBST->word);
myBST->cnt = 1;
start = 0;
}
else
Insert(myBST, word);
}
}
free(ptr);
printf("Tcount Result :\n");
PrintBST(myBST);
sortBST = CopyBST(myBST, sortBST); //要回傳sortBST的root位置,不然在裡面malloc的位置會不見
printf("Sorted by ascending order :\n");
PrintSortBST(sortBST, 0); //0代表從大到小輸出
printf("Sorted by descending order :\n");
PrintSortBST(sortBST, 1); //1代表從小到大輸出
FreeBST(myBST);
FreeBST(sortBST);
return 0;
}
void SetCheckPoint(char ptr[], char* tokens)
{
//這邊有個重要的概念,用char來表示標點符號是用ascii碼來對應的
//所以每個標點符號代表一個整數,且ascii碼總共256個(0~255)
int i;
for(i = 0 ; i < MAX_CHECK ; i++)
ptr[i] = 0; //先都表示不是斷點
while(*tokens != '\0')
{
ptr[*tokens] = 1; //對應的標點符號標示為斷點。 *tokens是一個十進位的值喔
tokens++;
}
}
char *GetWord(char *sentence, char point[], char *word)
{
char *ptr = sentence;
int len = 0;
while(point[*ptr]) ptr++; //從上個一結束的位置開始到下一個非斷點的地方(單字的開始)
if(*ptr == '\0' || *ptr == '\n')
return NULL; //代表一個句子結束
while(point[*ptr] == 0 && *ptr != '\0')
{
//取單字
len++;
if(len >= MAX_WORD)
break;
*word++ = *ptr++; //複製字元
//概念等同於
//*word = *ptr;
//word++;
//ptr++;
}
*word = '\0';
return ptr;
}
void Insert(struct BST* root, char* word)
{
/*
* 放進去用strcmp去比較bst裡的word和準備要insert的word
* 如果要insert的word比較小就往左子樹去找
* 反之就往右子樹
* 如果strcmp == 0代表是同一個單字,則該node的單字cnt數加一
*/
if(root == NULL)
return;
//printf("root->word is %s\n", root->word);
if(strcmp(root->word, word) < 0)
{
if(root->left != NULL)
Insert(root->left, word);
else
{
//創建左子樹
struct BST* node = (struct BST*) malloc(sizeof(struct BST));
node->cnt = 1;
node->len = strlen(word);
node->word = (char *) malloc(sizeof(char) * (node->len + 1));
strcpy(node->word, word);
root->left = node;
}
}
else if(strcmp(root->word, word) > 0)
{
if(root->right != NULL)
Insert(root->right, word);
else
{
//創建右子樹
struct BST* node = (struct BST*) malloc(sizeof(struct BST));
node->cnt = 1;
node->len = strlen(word);
node->word = (char *) malloc(sizeof(char) * (node->len + 1));
strcpy(node->word, word);
root->right = node;
}
}
else
{
//printf("word is %s\n", word);
root->cnt++;
}
}
void PrintBST(struct BST* root)
{
//這是按照單字出現的順序輸出
if(root == NULL)
return;
printf("%d\t%s\n", root->cnt, root->word);
PrintBST(root->left);
PrintBST(root->right);
}
struct BST* CopyBST(struct BST* copy, struct BST* dest)
{
if(copy == NULL)
return dest;
if(dest == NULL)
{
//造樹運動
dest = (struct BST*) malloc(sizeof(struct BST));
dest->cnt = copy->cnt;
dest->len = copy->len;
dest->word = (char *) malloc(sizeof(char) * (dest->len + 1));
strcpy(dest->word, copy->word);
CopyBST(copy->left, dest);
CopyBST(copy->right, dest);
}
else
{
InsertSortBST(dest, copy);
CopyBST(copy->left, dest);
CopyBST(copy->right, dest);
}
return dest;
}
void InsertSortBST(struct BST* root, struct BST* node)
{
//printf("root word is %s, node word is %s\n", root->word, node->word);
if(root == NULL)
return;
if(root->cnt >= node->cnt)
{
//printf("go left\n");
if(root->left != NULL)
InsertSortBST(root->left, node);
else
{
struct BST* tmpnode = (struct BST*) malloc(sizeof(struct BST));
tmpnode->cnt = node->cnt;
tmpnode->len = node->len;
tmpnode->word = (char *) malloc(sizeof(char) * (tmpnode->len + 1));
strcpy(tmpnode->word, node->word);
root->left = tmpnode;
}
}
else
{
//printf("go right\n");
if(root->right != NULL)
InsertSortBST(root->right, node);
else
{
struct BST* tmpnode = (struct BST*) malloc(sizeof(struct BST));
tmpnode->cnt = node->cnt;
tmpnode->len = node->len;
tmpnode->word = (char *) malloc(sizeof(char) * (tmpnode->len + 1));
strcpy(tmpnode->word, node->word);
root->right = tmpnode;
}
}
}
void PrintSortBST(struct BST* root, int control)
{
if(root == NULL)
return;
if(control)
{
//由小到大排序
PrintSortBST(root->left, control);
printf("%d\t%s\n", root->cnt, root->word);
PrintSortBST(root->right, control);
}
else
{
//由大到小排序
PrintSortBST(root->right, control);
printf("%d\t%s\n", root->cnt, root->word);
PrintSortBST(root->left, control);
}
}
void FreeBST(struct BST* root)
{
//清理pointer
if(root == NULL)
return;
FreeBST(root->left);
FreeBST(root->right);
free(root); //這一定要放最後
}
@bobosheep
Copy link
Author

add sorted BST and free the memory.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment