-
Notifications
You must be signed in to change notification settings - Fork 9
/
aho_corasick.cpp
127 lines (101 loc) · 2.46 KB
/
aho_corasick.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
// aho_corasick.cpp
// Eric K. Zhang; July 15, 2016
// Problem: Given a text string and a dictionary
// of search strings, find all occurences of a search
// string in the text (overlap included).
// Bad solution: Run KMP a lot, giving O(N * |S|)
// Good solution: Aho-Corasick gives O(N + sum|S|) by
// essentially extending KMP, creating a trie with
// "failure" functions. Basically a FSM.
// Note: The automation generated by Aho-Corasick can
// also be used as a first step in many problems. This
// is often how it'll be used (e.g. sparse table) in
// competitions, since Rabin-Karp is easier to code
// for the simple version of the problem.
#include <bits/stdc++.h>
using namespace std;
typedef long long LL;
#define MAXN 100013
int N; // size of dictionary
string dict[MAXN];
string text;
#define MAXM 100013
int M; // number of states in the automation
int g[MAXM][26]; // the normal edges in the trie
int f[MAXM]; // failure function
LL out[MAXM]; // output function
int aho_corasick() {
memset(g, -1, sizeof g);
memset(out, 0, sizeof out);
int nodes = 1;
// build the trie
for (int i = 0; i < N; i++) {
string& s = dict[i];
int cur = 0;
for (int j = 0; j < s.size(); j++) {
if (g[cur][s[j] - 'a'] == -1) {
g[cur][s[j] - 'a'] = nodes++;
}
cur = g[cur][s[j] - 'a'];
}
out[cur]++;
}
for (int ch = 0; ch < 26; ch++) {
if (g[0][ch] == -1) {
g[0][ch] = 0;
}
}
// BFS to calculate out and failure functions
memset(f, -1, sizeof f);
queue<int> q;
for (int ch = 0; ch < 26; ch++) {
if (g[0][ch] != 0) {
f[g[0][ch]] = 0;
q.push(g[0][ch]);
}
}
while (!q.empty()) {
int state = q.front();
q.pop();
for (int ch = 0; ch < 26; ch++) {
if (g[state][ch] == -1) continue;
int fail = f[state];
while (g[fail][ch] == -1) {
fail = f[fail];
}
f[g[state][ch]] = g[fail][ch];
out[g[state][ch]] += out[g[fail][ch]];
q.push(g[state][ch]);
}
}
return nodes;
}
LL search() {
// Using the Aho-Corasick automation, search the text.
int state = 0;
LL ret = 0;
for (char c : text) {
while (g[state][c - 'a'] == -1) {
state = f[state];
}
state = g[state][c - 'a'];
ret += out[state];
}
// It's that simple!
return ret;
}
int main() {
// make I/O faster
ios_base::sync_with_stdio(false);
cin.tie(0);
freopen("aho_corasick.in", "r", stdin);
cin >> N;
for (int i = 0; i < N; i++) {
cin >> dict[i];
}
cin >> text;
M = aho_corasick();
LL result = search();
cout << result << endl;
return 0;
}