-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTermFrequence.java
75 lines (64 loc) · 2.47 KB
/
TermFrequence.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import java.io.*;
import java.lang.*;
import java.util.*;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.util.Version;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
public class TermFrequence
{
public Integer compute(String context, String term) throws IOException
{
TermTokenizer tknzr = new TermTokenizer();
SimpleAnalyzer sa = new SimpleAnalyzer(Version.LUCENE_46);
/*TODO: if term is not a single word but a compound statement, call tokenizer again and return an object(Integer or List<String>)*/
/*Call tokenizer doing string tokenization*/
List<String> listing = tknzr.tokenizeString(sa, context);
/*Do word count*/
Map<String, Integer> map = new HashMap<String, Integer>();
/*
* 因為 Map 無法取得 Key 的值,只有 value,所以 value 做成一個自定義的類別 Pair,含有詞和詞頻
* Note: can't use primitive type int as second parameter type in Map
*/
final int COUNT = 1; //declare a constant to initialize count of each word
int accumulator = 0;
Integer result;
for(String data : listing)
{
if(data.equals(term))
{
if(map.get(data) == null)
map.put(data, COUNT);
else if((accumulator = map.get(data)) != 0)
map.put(data, ++accumulator);
}
}
try{
result = map.get(term);
return result;
}
catch(NullPointerException e){
/*TODO: Write error log (After class and deployment to wenshang.plsm)*/
return null;
}
}//end of function compute
}//end of class TermFrequence
final class TermTokenizer
{
public TermTokenizer() {}
public static List<String> tokenizeString(SimpleAnalyzer analyzer, String string)
{
List<String> result = new ArrayList<String>();
try{
TokenStream stream = analyzer.tokenStream(null, new StringReader(string));
stream.reset();
while(stream.incrementToken())
result.add(stream.getAttribute(CharTermAttribute.class).toString());
}
catch(IOException e) {
// not thrown b/c we're using a string reader...
throw new RuntimeException(e);
}
return result;
}
}