短文 词频

来源:互联网 发布:二尺袖配袴 淘宝 编辑:程序博客网 时间:2024/06/02 16:33

本程序从正文文件text.txt读入一篇英文短文,统计该短文中不同单词和它的出现次数,并按词典编辑顺序将单词及它的出现次数输出到正文文件word.txt中.程序用一棵有序二叉树存储这些单词及其出现的次数,一边读入一边建立.然后中序遍历该二叉树,将遍历经过的二叉树上的节点的内容输出.


#include<iostream>#include<string>#include<fstream>using namespace std;ifstream inFile("text.txt",ios::in);ofstream outFile("word.txt",ios::trunc);struct treeNode{string data;int count;treeNode * leftChild,*rightChild;treeNode(string data){this->data = data;this->count = 1;this->leftChild = this->rightChild = NULL;}};void biTree(treeNode *&node,string word)//建立有序二叉树{if( node==NULL ){node = new treeNode(word);}else{int cmp;cmp = strcmp(node->data.c_str(),word.c_str());if(cmp == 0)++node->count;else if(cmp > 0)biTree(node->leftChild,word);elsebiTree(node->rightChild,word);}}void midOrder(treeNode *node) //中序遍历{if(node==NULL){return;}midOrder(node->leftChild);cout<<node->data<<ends<<node->count<<endl;outFile<<node->data<<ends<<node->count<<endl;midOrder(node->rightChild);}int main(){treeNode *node=NULL;//char inBuffer[100];string inBuffer;string word;if(!inFile.is_open()){cout<<"Error opening file"<<endl;exit(0);}if(!outFile){cerr<<"Error opening outfile"<<endl;exit(0);}while(getline(inFile,inBuffer))  //读取一行{size_t i=0;inBuffer+='\0';//cout<<inBuffer<<endl;while(i<inBuffer.size())   //分解成单词{ if((inBuffer[i]<='z'&&inBuffer[i]>='a') ||(inBuffer[i]<='Z'&&inBuffer[i]>='A')) word += inBuffer[i];else{if(!word.empty())biTree(node,word);//cout<<word<<endl;word.clear();}++i;}inBuffer.clear();}midOrder(node);inFile.close();outFile.close();return 0;}