CI20.8--多模式串匹配问题

来源:互联网 发布:网络模块接法图解 a b 编辑:程序博客网 时间:2024/06/08 13:06

给定一个目标串串T和若干个模式串P,设计一个算法去匹配每一个模式串。

思路:

多模式串匹配问题(设m为目标串的长度,n为模式串的平均长度)。可以用后缀trie树,时间复杂度为O(m^2 + kn)。利用AC自动机的时间复杂度为O(m + kn + z)(其中z为T中出现的模式串个数)。还可以用后缀树,后缀树的方法比较复杂,这里不做介绍。

下面是后缀trie树的代码

数组形式:

#include <iostream>#include <string>#include <vector>using namespace std;class Trie{public:static const int CLD = 26;int size;vector<int> trie;Trie(const string& s){int len = s.size();for (int i = 0; i < len * len * CLD; ++i)trie.push_back(-1);size = 0;for (int i = 0; i < s.size(); ++i){string sub(s, i, s.size() - 1);Insert(sub);}}void Insert(const string& s){if (s.size() == 0)return;int index = 0;for (int i = 0; i < s.size(); ++i){int j = s[i] - 'a';if (trie[index * CLD + j] == -1)trie[index * CLD + j] = ++size;index = trie[index * CLD + j];}}bool Search(const string& s){if (s.size() == 0)return false;int index = 0;for (int i = 0; i < s.size(); ++i){int j = s[i] - 'a';if (trie[index * CLD + j] == -1)return false;index = trie[index * CLD + j];}return true;}};void main(){string s("mississipi");cout << s.size() << endl;Trie trie(s);vector<string> svec;svec.push_back("is");svec.push_back("sip");svec.push_back("hi");svec.push_back("sis");svec.push_back("mississippa");for (int i = 0; i < 5; ++i)cout << trie.Search(svec[i]) << endl;}


树的形式:

#include <iostream>#include <string>#include <vector>#include <assert.h>using namespace std;const int CLD = 26;struct TNode{vector<TNode*> pcld;TNode(){for (int i = 0; i < CLD; ++i)pcld.push_back(NULL);}};void Insert(TNode*& root, const string& s){assert(root != NULL && s.size() > 0);TNode* temp = root;for (int i = 0; i < s.size(); ++i){int j = s[i] - 'a';if (temp->pcld[j] == NULL){TNode* tn = new TNode();temp->pcld[j] = tn;}temp = temp->pcld[j];}}bool Search(TNode* root, const string& s){assert(root != NULL && s.size() > 0);TNode* temp = root;for (int i = 0; i < s.size(); ++i){int j = s[i] - 'a';if (temp->pcld[j] == NULL)return false;temp = temp->pc ld[j];}return true;}void main(){string s("mississipi");TNode* root = new TNode();for (int i = 0; i < s.size(); ++i){string sub(s, i);Insert(root, sub);}vector<string> svec;svec.push_back("is");svec.push_back("sip");svec.push_back("hi");svec.push_back("sis");svec.push_back("mississippa");for (int i = 0; i < 5; ++i)cout << Search(root, svec[i]) << endl;}

以下是AC自动机代码:

#include <iostream>#include <string>#include <vector>#include <queue>#include <assert.h>using namespace std;const int CLD = 26;struct TNode{vector<TNode*> pcld;TNode* fail;bool tag;TNode(){for (int i = 0; i < CLD; ++i)pcld.push_back(NULL);tag = false;}};void Insert(TNode*& root, const string& s){assert(root != NULL && s.size() > 0);TNode* temp = root;for (int i = 0; i < s.size(); ++i){int j = s[i] - 'a';if (temp->pcld[j] == NULL){TNode* tn = new TNode();temp->pcld[j] = tn;}temp = temp->pcld[j];}temp->tag = true;}void Build(TNode*& root){assert(root != NULL);queue<TNode*> que;que.push(root);root->fail = NULL;while (!que.empty()){TNode* cur = que.front();que.pop();for (int i = 0; i < CLD; ++i){if (cur->pcld[i] == NULL)continue;TNode* temp = cur->fail;while (temp != NULL && temp->pcld[i] == NULL)temp = temp->fail;if (temp == NULL)cur->pcld[i]->fail = root;elsecur->pcld[i]->fail = temp->pcld[i];que.push(cur->pcld[i]);}}}int Search(TNode* root, const string& s){assert(root != NULL && s.size() > 0);TNode* temp = root;int res = 0;for (int i = 0; i < s.size(); ++i){int j = s[i] - 'a';while (temp != root && temp->pcld[j] == NULL)temp = temp->fail;temp = temp->pcld[j];if (temp == NULL)temp = root;TNode* p = temp;while (p != root){if (p->tag)++res;p = p->fail;}}return res;}void main(){string s("missisip");TNode* root = new TNode();vector<string> svec;svec.push_back("is");svec.push_back("sip");svec.push_back("ssis");svec.push_back("sis");svec.push_back("missisip");svec.push_back("ip");for (int i = 0; i < svec.size(); ++i)Insert(root, svec[i]);Build(root);cout << Search(root, s) << endl;}


原创粉丝点击