Skip to content

Commit 07f36c6

Browse files
committed
use matched chunks in search results
1 parent 611858d commit 07f36c6

File tree

3 files changed

+113
-10
lines changed

3 files changed

+113
-10
lines changed

src/main/java/org/codelibs/fess/webapp/semantic_search/SemanticSearchConstants.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ public class SemanticSearchConstants {
4040

4141
public static final String CONTENT_CHUNK_FIELD = PREFIX + "content.chunk_field";
4242

43+
public static final String CONTENT_CHUNK_SIZE = PREFIX + "content.chunk_size";
44+
4345
public static final String MIN_SCORE = PREFIX + "min_score";
4446

4547
public static final String MIN_CONTENT_LENGTH = PREFIX + "min_content_length";

src/main/java/org/codelibs/fess/webapp/semantic_search/helper/SemanticSearchHelper.java

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,10 @@
5050
import org.codelibs.opensearch.runner.net.OpenSearchCurl;
5151
import org.dbflute.optional.OptionalThing;
5252
import org.lastaflute.web.util.LaRequestUtil;
53+
import org.opensearch.index.query.InnerHitBuilder;
5354
import org.opensearch.index.query.QueryBuilder;
5455
import org.opensearch.index.query.QueryBuilders;
56+
import org.opensearch.search.fetch.subphase.FetchSourceContext;
5557

5658
import com.google.common.base.CharMatcher;
5759

@@ -64,6 +66,8 @@ public class SemanticSearchHelper {
6466

6567
protected Long minContentLength;
6668

69+
protected int chunkSize;
70+
6771
@PostConstruct
6872
public void init() {
6973
final SearchEngineClient client = ComponentUtil.getSearchEngineClient();
@@ -199,6 +203,20 @@ protected String load() {
199203
}
200204
}
201205

206+
buf.append("chunk_size=");
207+
final String chunkSizeValue = System.getProperty(SemanticSearchConstants.CONTENT_CHUNK_SIZE, "1");
208+
if (StringUtil.isNotBlank(chunkSizeValue)) {
209+
try {
210+
chunkSize = Integer.parseInt(chunkSizeValue);
211+
buf.append(chunkSize);
212+
} catch (final NumberFormatException e) {
213+
logger.debug("Failed to parse {}.", chunkSizeValue, e);
214+
chunkSize = 1;
215+
}
216+
} else {
217+
chunkSize = 1;
218+
}
219+
202220
return buf.toString();
203221
}
204222

@@ -286,24 +304,25 @@ public OptionalThing<QueryBuilder> newNeuralQueryBuilder(final String text) {
286304
final String nestedField = System.getProperty(CONTENT_NESTED_FIELD); // ex. content_vector
287305
if (StringUtil.isNotBlank(nestedField)) {
288306
final String vectorField = nestedField + "." + field;
307+
final InnerHitBuilder innerHit =
308+
new InnerHitBuilder(nestedField).setSize(chunkSize).setFetchSourceContext(new FetchSourceContext(false));
289309
return OptionalThing.of(QueryBuilders.nestedQuery(nestedField, new NeuralQueryBuilder.Builder().modelId(modelId)
290310
.field(vectorField).query(text).k(LaRequestUtil.getOptionalRequest().map(req -> {
291311
final Object pageSize = req.getAttribute(Constants.REQUEST_PAGE_SIZE);
292312
if (pageSize != null) {
293313
return Integer.parseInt(pageSize.toString());
294314
}
295315
return Constants.DEFAULT_PAGE_SIZE;
296-
}).orElse(Constants.DEFAULT_PAGE_SIZE)).build(), ScoreMode.Max));
297-
} else {
298-
return OptionalThing.of(new NeuralQueryBuilder.Builder().modelId(modelId).field(field).query(text)
299-
.k(LaRequestUtil.getOptionalRequest().map(req -> {
300-
final Object pageSize = req.getAttribute(Constants.REQUEST_PAGE_SIZE);
301-
if (pageSize != null) {
302-
return Integer.parseInt(pageSize.toString());
303-
}
304-
return Constants.DEFAULT_PAGE_SIZE;
305-
}).orElse(Constants.DEFAULT_PAGE_SIZE)).build());
316+
}).orElse(Constants.DEFAULT_PAGE_SIZE)).build(), ScoreMode.Max).innerHit(innerHit));
306317
}
318+
return OptionalThing.of(new NeuralQueryBuilder.Builder().modelId(modelId).field(field).query(text)
319+
.k(LaRequestUtil.getOptionalRequest().map(req -> {
320+
final Object pageSize = req.getAttribute(Constants.REQUEST_PAGE_SIZE);
321+
if (pageSize != null) {
322+
return Integer.parseInt(pageSize.toString());
323+
}
324+
return Constants.DEFAULT_PAGE_SIZE;
325+
}).orElse(Constants.DEFAULT_PAGE_SIZE)).build());
307326
}
308327
return OptionalThing.empty();
309328
}

src/main/java/org/codelibs/fess/webapp/semantic_search/rank/fusion/SemanticSearcher.java

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,40 @@
1515
*/
1616
package org.codelibs.fess.webapp.semantic_search.rank.fusion;
1717

18+
import static org.codelibs.fess.webapp.semantic_search.SemanticSearchConstants.CONTENT_CHUNK_FIELD;
19+
import static org.codelibs.fess.webapp.semantic_search.SemanticSearchConstants.CONTENT_NESTED_FIELD;
20+
21+
import java.util.ArrayList;
22+
import java.util.Arrays;
23+
import java.util.List;
1824
import java.util.Locale;
1925
import java.util.Map;
26+
import java.util.stream.Stream;
2027

2128
import javax.annotation.PostConstruct;
2229

2330
import org.apache.logging.log4j.LogManager;
2431
import org.apache.logging.log4j.Logger;
32+
import org.codelibs.core.lang.StringUtil;
2533
import org.codelibs.fess.entity.FacetInfo;
2634
import org.codelibs.fess.entity.GeoInfo;
2735
import org.codelibs.fess.entity.HighlightInfo;
2836
import org.codelibs.fess.entity.SearchRequestParams;
37+
import org.codelibs.fess.es.client.SearchEngineClient.SearchCondition;
38+
import org.codelibs.fess.es.client.SearchEngineClient.SearchConditionBuilder;
2939
import org.codelibs.fess.mylasta.action.FessUserBean;
40+
import org.codelibs.fess.mylasta.direction.FessConfig;
3041
import org.codelibs.fess.rank.fusion.DefaultSearcher;
3142
import org.codelibs.fess.rank.fusion.SearchResult;
3243
import org.codelibs.fess.util.ComponentUtil;
44+
import org.codelibs.fess.util.DocumentUtil;
3345
import org.codelibs.fess.webapp.semantic_search.SemanticSearchConstants;
3446
import org.codelibs.fess.webapp.semantic_search.helper.SemanticSearchHelper;
3547
import org.dbflute.optional.OptionalThing;
48+
import org.opensearch.action.search.SearchRequestBuilder;
49+
import org.opensearch.search.SearchHit;
50+
import org.opensearch.search.SearchHit.NestedIdentity;
51+
import org.opensearch.search.SearchHits;
3652

3753
public class SemanticSearcher extends DefaultSearcher {
3854
private static final Logger logger = LogManager.getLogger(SemanticSearcher.class);
@@ -74,6 +90,72 @@ protected SearchResult search(final String query, final SearchRequestParams para
7490
}
7591
}
7692

93+
@Override
94+
protected SearchCondition<SearchRequestBuilder> createSearchCondition(final String query, final SearchRequestParams params,
95+
final OptionalThing<FessUserBean> userBean) {
96+
final String chunkField = System.getProperty(CONTENT_CHUNK_FIELD); // ex. content_chunk
97+
if (StringUtil.isBlank(chunkField)) {
98+
return super.createSearchCondition(query, params, userBean);
99+
}
100+
final String[] responseFields =
101+
Stream.concat(Arrays.stream(params.getResponseFields()), Stream.of(chunkField)).toArray(String[]::new);
102+
if (logger.isDebugEnabled()) {
103+
logger.debug("responseFields={}", Arrays.toString(responseFields));
104+
}
105+
return searchRequestBuilder -> {
106+
ComponentUtil.getQueryHelper().processSearchPreference(searchRequestBuilder, userBean, query);
107+
return SearchConditionBuilder.builder(searchRequestBuilder).query(query).offset(params.getStartPosition())
108+
.size(params.getPageSize()).facetInfo(params.getFacetInfo()).geoInfo(params.getGeoInfo())
109+
.highlightInfo(params.getHighlightInfo()).similarDocHash(params.getSimilarDocHash()).responseFields(responseFields)
110+
.searchRequestType(params.getType()).trackTotalHits(params.getTrackTotalHits()).minScore(params.getMinScore()).build();
111+
};
112+
}
113+
114+
@Override
115+
protected Map<String, Object> parseSearchHit(final FessConfig fessConfig, final String hlPrefix, final SearchHit searchHit) {
116+
final Map<String, Object> docMap = super.parseSearchHit(fessConfig, hlPrefix, searchHit);
117+
final Map<String, SearchHits> innerHits = searchHit.getInnerHits();
118+
if (innerHits != null) {
119+
final String chunkField = System.getProperty(CONTENT_CHUNK_FIELD); // ex. content_chunk
120+
if (StringUtil.isNotBlank(chunkField)) {
121+
final String nestedField = System.getProperty(CONTENT_NESTED_FIELD); // ex. content_vector
122+
final SearchHits innerSearchHits = innerHits.get(nestedField);
123+
if (logger.isDebugEnabled()) {
124+
logger.debug("nestedField={}, innerSearchHits={}", nestedField, innerSearchHits);
125+
}
126+
final String[] chunks = DocumentUtil.getValue(docMap, chunkField, String[].class);
127+
docMap.remove(chunkField);
128+
if (innerSearchHits != null) {
129+
final List<String> chunkList = new ArrayList<>();
130+
String contentDesc = null;
131+
for (final SearchHit hit : innerSearchHits.getHits()) {
132+
final NestedIdentity nestedIdentity = hit.getNestedIdentity();
133+
if (nestedIdentity != null) {
134+
final int offset = nestedIdentity.getOffset();
135+
if (logger.isDebugEnabled()) {
136+
logger.debug("offset={}, chunks={}", offset, chunks);
137+
}
138+
if (chunks != null && chunks.length > offset) {
139+
if (contentDesc == null) {
140+
contentDesc = chunks[offset];
141+
}
142+
chunkList.add(chunks[offset]);
143+
}
144+
}
145+
}
146+
if (StringUtil.isNotBlank(contentDesc)) {
147+
if (logger.isDebugEnabled()) {
148+
logger.debug("matched chunk: {}={}", fessConfig.getResponseFieldContentDescription(), contentDesc);
149+
}
150+
docMap.put(fessConfig.getResponseFieldContentDescription(), contentDesc);
151+
}
152+
docMap.put(chunkField, chunkList.toArray(n -> new String[n]));
153+
}
154+
}
155+
}
156+
return docMap;
157+
}
158+
77159
protected boolean isSearchableField(final String field) {
78160
for (final String f : ComponentUtil.getQueryFieldConfig().getSearchFields()) {
79161
if (field.equals(f)) {

0 commit comments

Comments
 (0)