2
2
3
3
import java .io .File ;
4
4
import java .io .IOException ;
5
+ import java .io .StringReader ;
5
6
import java .net .MalformedURLException ;
6
7
import java .net .URL ;
7
8
import java .nio .file .FileSystems ;
22
23
import javax .annotation .PreDestroy ;
23
24
24
25
import org .apache .lucene .analysis .Analyzer ;
26
+ import org .apache .lucene .analysis .TokenStream ;
27
+ import org .apache .lucene .analysis .Tokenizer ;
25
28
import org .apache .lucene .analysis .core .KeywordAnalyzer ;
29
+ import org .apache .lucene .analysis .core .LowerCaseFilter ;
26
30
import org .apache .lucene .analysis .core .WhitespaceAnalyzer ;
31
+ import org .apache .lucene .analysis .custom .CustomAnalyzer ;
32
+ import org .apache .lucene .analysis .miscellaneous .ASCIIFoldingFilter ;
27
33
import org .apache .lucene .analysis .miscellaneous .PerFieldAnalyzerWrapper ;
28
34
import org .apache .lucene .analysis .standard .StandardAnalyzer ;
35
+ import org .apache .lucene .analysis .standard .StandardFilter ;
36
+ import org .apache .lucene .analysis .standard .StandardTokenizer ;
37
+ import org .apache .lucene .analysis .standard .StandardTokenizerFactory ;
38
+ import org .apache .lucene .analysis .tokenattributes .CharTermAttribute ;
29
39
import org .apache .lucene .document .Document ;
30
40
import org .apache .lucene .document .Field ;
31
41
import org .apache .lucene .document .SortedDocValuesField ;
36
46
import org .apache .lucene .index .IndexWriterConfig ;
37
47
import org .apache .lucene .index .IndexableField ;
38
48
import org .apache .lucene .index .Term ;
49
+ import org .apache .lucene .queryparser .classic .QueryParser ;
39
50
import org .apache .lucene .search .BooleanClause ;
40
51
import org .apache .lucene .search .BooleanClause .Occur ;
41
52
import org .apache .lucene .search .BooleanQuery ;
53
64
import org .apache .lucene .store .Directory ;
54
65
import org .apache .lucene .store .FSDirectory ;
55
66
import org .apache .lucene .util .BytesRef ;
67
+ import org .apache .lucene .util .PagedBytes .Reader ;
56
68
import org .apache .lucene .util .QueryBuilder ;
69
+ import org .apache .lucene .util .Version ;
57
70
import org .jsoup .Jsoup ;
58
71
import org .slf4j .Logger ;
59
72
import org .slf4j .LoggerFactory ;
62
75
import org .springframework .context .annotation .PropertySource ;
63
76
import org .springframework .core .env .Environment ;
64
77
import org .springframework .stereotype .Component ;
65
-
66
78
import edu .asu .conceptpower .app .constants .LuceneFieldNames ;
67
79
import edu .asu .conceptpower .app .constants .SearchFieldNames ;
68
80
import edu .asu .conceptpower .app .db4o .IConceptDBManager ;
@@ -124,6 +136,7 @@ public class LuceneUtility implements ILuceneUtility {
124
136
125
137
private String lucenePath ;
126
138
139
+
127
140
private int numberOfResults ;
128
141
129
142
private IndexWriter writer = null ;
@@ -132,19 +145,23 @@ public class LuceneUtility implements ILuceneUtility {
132
145
private Directory index ;
133
146
private Path relativePath = null ;
134
147
private IndexSearcher searcher = null ;
135
-
148
+ private Analyzer customAnalyzer = null ;
149
+
136
150
/**
137
151
*
138
152
* @throws LuceneException
139
153
*/
140
154
@ PostConstruct
141
- public void init () throws LuceneException {
155
+ public void init () throws LuceneException , IOException {
156
+ customAnalyzer = CustomAnalyzer .builder ().withTokenizer ("keyword" ).addTokenFilter ("asciifolding" ).addTokenFilter ("worddelimiter" ).
157
+ addTokenFilter ("lowercase" ).build ();
142
158
lucenePath = env .getProperty ("lucenePath" );
143
159
numberOfResults = Integer .parseInt (env .getProperty ("numberOfLuceneResults" ));
144
160
try {
145
161
relativePath = FileSystems .getDefault ().getPath (lucenePath , "index" );
162
+
146
163
index = FSDirectory .open (relativePath );
147
- configWhiteSpace = new IndexWriterConfig (standardAnalyzer );
164
+ configWhiteSpace = new IndexWriterConfig (customAnalyzer );
148
165
writer = new IndexWriter (index , configWhiteSpace );
149
166
reader = DirectoryReader .open (writer , true );
150
167
searcher = new IndexSearcher (reader );
@@ -256,7 +273,7 @@ private ConceptEntry getConceptFromDocument(Document d) throws IllegalAccessExce
256
273
LuceneField luceneFieldAnnotation = field .getAnnotation (LuceneField .class );
257
274
field .setAccessible (true );
258
275
if (luceneFieldAnnotation != null && d .get (luceneFieldAnnotation .lucenefieldName ()) != null )
259
- if (! luceneFieldAnnotation .isMultiple ()) {
276
+ if (luceneFieldAnnotation .isMultiple ()) {
260
277
IndexableField [] indexableFields = d .getFields (luceneFieldAnnotation .lucenefieldName () + LuceneFieldNames .NOT_LOWERCASED );
261
278
if (indexableFields == null || indexableFields .length == 0 ) {
262
279
indexableFields = d .getFields (luceneFieldAnnotation .lucenefieldName ());
@@ -497,7 +514,7 @@ public ConceptEntry[] queryIndex(Map<String, String> fieldMap, String operator,
497
514
if (operator == null || operator .equalsIgnoreCase (SearchParamters .OP_AND )) {
498
515
occur = BooleanClause .Occur .MUST ;
499
516
}
500
-
517
+
501
518
java .lang .reflect .Field [] fields = ConceptEntry .class .getDeclaredFields ();
502
519
503
520
for (java .lang .reflect .Field field : fields ) {
@@ -517,9 +534,8 @@ public ConceptEntry[] queryIndex(Map<String, String> fieldMap, String operator,
517
534
518
535
}
519
536
520
- PerFieldAnalyzerWrapper perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper (standardAnalyzer ,
521
- analyzerPerField );
522
-
537
+
538
+ PerFieldAnalyzerWrapper perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper (customAnalyzer , analyzerPerField );
523
539
QueryBuilder qBuild = new QueryBuilder (perFieldAnalyzerWrapper );
524
540
BooleanQuery .Builder builder = new BooleanQuery .Builder ();
525
541
@@ -579,32 +595,25 @@ public ConceptEntry[] queryIndex(Map<String, String> fieldMap, String operator,
579
595
ConceptEntry entry = getConceptFromDocument (d );
580
596
concepts .add (entry );
581
597
}
598
+ return concepts .toArray (new ConceptEntry [concepts .size ()]);
582
599
}
583
600
584
601
catch (IOException ex ) {
585
602
throw new LuceneException ("Issues in querying lucene index. Please retry" , ex );
586
603
}
587
- logger .debug ("Number of concepts retrieved from lucene = " + concepts .size ());
588
- return concepts .toArray (new ConceptEntry [concepts .size ()]);
589
-
590
604
}
591
605
592
- private void buildQuery (BooleanClause .Occur occur , PerFieldAnalyzerWrapper perFieldAnalyzerWrapper ,
593
- QueryBuilder qBuild , BooleanQuery .Builder builder , LuceneField luceneFieldAnnotation , String searchString ) {
606
+ private void buildQuery (BooleanClause .Occur occur , PerFieldAnalyzerWrapper perFieldAnalyzerWrapper ,QueryBuilder qBuild , BooleanQuery .Builder builder , LuceneField luceneFieldAnnotation , String searchString ) {
594
607
if (luceneFieldAnnotation .isTokenized ()) {
595
608
BooleanQuery .Builder tokenizedQueryBuilder = new BooleanQuery .Builder ();
596
- buildTokenizedOrWildCardQuery (luceneFieldAnnotation , searchString , tokenizedQueryBuilder );
609
+ buildTokenizedOrWildCardQuery (luceneFieldAnnotation , searchString , qBuild , tokenizedQueryBuilder );
597
610
598
611
if (luceneFieldAnnotation .isShortPhraseSearchable ()) {
599
612
BooleanQuery .Builder rootQueryBuilder = new BooleanQuery .Builder ();
600
613
rootQueryBuilder .add (tokenizedQueryBuilder .build (), Occur .SHOULD );
601
614
// Short word searching
602
615
BooleanQuery .Builder shortWordSearchQueryBuilder = new BooleanQuery .Builder ();
603
- shortWordSearchQueryBuilder .add (
604
- new PhraseQuery (luceneFieldAnnotation .lucenefieldName () + LuceneFieldNames .UNTOKENIZED_SUFFIX ,
605
- searchString ),
606
- Occur .SHOULD );
607
-
616
+ shortWordSearchQueryBuilder .add (new PhraseQuery (luceneFieldAnnotation .lucenefieldName () + LuceneFieldNames .UNTOKENIZED_SUFFIX , searchString ), Occur .SHOULD );
608
617
rootQueryBuilder .add (shortWordSearchQueryBuilder .build (), Occur .SHOULD );
609
618
tokenizedQueryBuilder = rootQueryBuilder ;
610
619
}
@@ -613,24 +622,25 @@ private void buildQuery(BooleanClause.Occur occur, PerFieldAnalyzerWrapper perFi
613
622
} else {
614
623
if (luceneFieldAnnotation .isWildCardSearchEnabled ()) {
615
624
createWildCardSearchQuery (luceneFieldAnnotation , searchString , builder , occur );
616
- } else {
617
- builder .add (new BooleanClause (
618
- new TermQuery (new Term (luceneFieldAnnotation .lucenefieldName (), searchString )), occur ));
619
625
}
620
- }
626
+ builder .add (new BooleanClause ((qBuild .createPhraseQuery (luceneFieldAnnotation .lucenefieldName (), searchString )), occur ));
627
+ }
621
628
}
622
629
623
- private void buildTokenizedOrWildCardQuery (LuceneField luceneFieldAnnotation , String searchString ,
624
- BooleanQuery .Builder tokenizedQueryBuilder ) {
625
- for (String searchValue : searchString .split (" " )) {
630
+ private void buildTokenizedOrWildCardQuery (LuceneField luceneFieldAnnotation , String searchString , QueryBuilder qBuild ,
631
+ BooleanQuery .Builder tokenizedQueryBuilder ) {
626
632
if (luceneFieldAnnotation .isWildCardSearchEnabled ()) {
627
- createWildCardSearchQuery (luceneFieldAnnotation , searchValue , tokenizedQueryBuilder , Occur .MUST );
633
+ BooleanQuery .Builder analyzedBuilder = new BooleanQuery .Builder ();
634
+ createWildCardSearchQuery (luceneFieldAnnotation , searchString , analyzedBuilder , Occur .SHOULD );
635
+ analyzedBuilder .add (new BooleanClause (
636
+ (qBuild .createPhraseQuery (luceneFieldAnnotation .lucenefieldName (), searchString )), Occur .SHOULD ));
637
+ tokenizedQueryBuilder .add (analyzedBuilder .build (), Occur .MUST );
628
638
} else {
629
- tokenizedQueryBuilder .add (new PhraseQuery (luceneFieldAnnotation .lucenefieldName (), searchValue ),
639
+ tokenizedQueryBuilder .add (qBuild . createPhraseQuery (luceneFieldAnnotation .lucenefieldName (), searchString ),
630
640
Occur .MUST );
631
641
}
632
642
}
633
- }
643
+
634
644
635
645
/**
636
646
* This method adds the wild card query to the query builder when the
0 commit comments