Skip to content

Commit

Permalink
输出时要照顾所有的fallback路径
Browse files Browse the repository at this point in the history
  • Loading branch information
pyloque committed Oct 28, 2018
1 parent cf2a7c1 commit aae2115
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 39 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,11 @@ options = {quick: false, longest: false}

字数|耗时
---|----
| 20000 words | 12ms |
| 40000 words | 28ms |
| 60000 words | 35ms |
| 80000 words | 49ms |
| 100000 words | 51ms |
| 20000 words | 14ms |
| 40000 words | 32ms |
| 60000 words | 67ms |
| 80000 words | 71ms |
| 100000 words | 84ms |

fastscan 可以做到以迅雷不及掩耳的速度扫遍一幅 10w 字的长文,10w 大概就是一部中篇小说的长度了。如果你要扫百万字的长篇小说,那还是建议你分章分节来扫吧。

Expand Down
48 changes: 21 additions & 27 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
val: null, // 当前节点的字符,null表示根节点
back: null, // 跳跃指针,也称失败指针
parent: null, // 父节点指针,
depth: 0, // 节点深度
accept: false // 是否形成了一个完整的词汇,中间节点也可能为true
}
// make trie tree
Expand Down Expand Up @@ -57,8 +56,7 @@
val: c,
accept: false,
back: root,
parent: current,
depth: current.depth + 1
parent: current
}
}
current = current.next[c];
Expand All @@ -77,13 +75,13 @@
}
var parent = node.parent
var back = parent.back
while(back != null) {
while (back != null) {
// 匹配父节点的跳跃节点的子节点
var child = back.next[node.val]
if (child) {
node.back = child
break
}
}
back = back.back
}
}
Expand Down Expand Up @@ -176,39 +174,35 @@
var offWords = [];
var current = this.root;
options = options || {}
for (var i = 0; i < content.length;i++) {
for (var i = 0; i < content.length; i++) {
var c = content[i];
var next = current.next[c];
if(!next) {
// 递归匹配跳跃节点的子节点
if (!next) {
// 当前分支上找不到,跳到其它分支上找
var back = current.back
while(back != null) {
if(back.accept) {
while (back != null) {
next = back.next[c]
if (next) {
break
}
back = back.back
}
}
if (next) {
var back = next;
do {
// 收集匹配的词汇
if (back.accept) {
var word = collect(back)
offWords.push([i - word.length, word]);
offWords.push([i - word.length + 1, word]);
// 只选第一个词
if (options.quick) {
return offWords
}
}
next = back.next[c]
if(next) {
break
}
back = back.back
}
}
if(next) {
} while (back != this.root);
current = next;
// 收集匹配的词汇
if (current.accept) {
var word = collect(current)
offWords.push([i - word.length + 1, word]);
// 只选第一个词
if (options.quick) {
return offWords
}
}
continue
}
// 重置
Expand Down
2 changes: 1 addition & 1 deletion index.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 12 additions & 5 deletions index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,25 @@ describe('测试叠加词汇', function () {
var scanner = new FastScanner(["近平", "习近平棒", "习近平好"])
var content = "习近平拽"
var offWords = scanner.search(content)
console.log(offWords)
console.log(offWords)
assert.deepEqual([[1, '近平']], offWords)
});
it('扫的狠一点', function () {
var scanner = new FastScanner(["近平", "习近平", "习近平好"])
var content = "我不说习近平好,也不是习近平坏"
var offWords = scanner.search(content)
assert.deepEqual([[3, '习近平'], [3, '习近平好'], [11, '习近平'], [12, '近平']], offWords)
assert.deepEqual([[3, '习近平'], [4, '近平'], [3, '习近平好'], [11, '习近平'], [12, '近平']], offWords)
var offWords = scanner.search(content, { quick: true })
assert.deepEqual([[3, '习近平']], offWords)
var offWords = scanner.search(content, { longest: true })
assert.deepEqual([[3, '习近平好'], [11, '习近平'], [12, '近平']], offWords)
assert.deepEqual([[3, '习近平好'], [4, '近平'], [11, '习近平'], [12, '近平']], offWords)
});
});
describe('wikipedia demo', function () {
it('一个都不能少', function () {
var scanner = new FastScanner(["a", "ab", "bab", "bc", "bca", "c", "caa"])
var offWords = scanner.search("abccab")
assert.deepEqual([[0, "a"], [0, "ab"], [1, "bc"], [2, "c"], [3, "c"], [4, "a"], [4, "ab"]], offWords)
});
});
describe('动态增加词汇', function () {
Expand Down Expand Up @@ -101,11 +108,11 @@ describe('排列组合词汇', function () {
return permute(inputArr);
}
var words = permutator(seed)
for(var i=0;i<words.length;i++) {
for (var i = 0; i < words.length; i++) {
words[i] = words[i].join('')
}
var scanner = new FastScanner(words)
for(var i=0;i<words.length;i++) {
for (var i = 0; i < words.length; i++) {
scanner.search(words[i])
}
})
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "fastscan",
"version": "1.0.5",
"version": "1.0.6",
"description": "quickly search by ahocorasick algorithm ",
"main": "index.min.js",
"scripts": {
Expand Down

0 comments on commit aae2115

Please sign in to comment.