|
| 1 | +/* |
| 2 | + * Copyright 2012-2024 CodeLibs Project and the Others. |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, |
| 13 | + * either express or implied. See the License for the specific language |
| 14 | + * governing permissions and limitations under the License. |
| 15 | + */ |
| 16 | +package org.codelibs.fess.crawler.service; |
| 17 | + |
| 18 | +import java.util.List; |
| 19 | +import java.util.Map; |
| 20 | + |
| 21 | +import org.apache.logging.log4j.LogManager; |
| 22 | +import org.apache.logging.log4j.Logger; |
| 23 | +import org.codelibs.fess.crawler.entity.EsUrlQueue; |
| 24 | +import org.codelibs.fess.crawler.service.impl.EsUrlQueueService; |
| 25 | +import org.codelibs.fess.crawler.util.EsCrawlerConfig; |
| 26 | +import org.codelibs.fess.es.config.exentity.CrawlingConfig; |
| 27 | +import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName; |
| 28 | +import org.codelibs.fess.helper.CrawlingConfigHelper; |
| 29 | +import org.codelibs.fess.util.ComponentUtil; |
| 30 | +import org.opensearch.index.query.QueryBuilders; |
| 31 | +import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; |
| 32 | +import org.opensearch.index.query.functionscore.RandomScoreFunctionBuilder; |
| 33 | +import org.opensearch.search.sort.SortBuilders; |
| 34 | +import org.opensearch.search.sort.SortOrder; |
| 35 | + |
| 36 | +public class FessUrlQueueService extends EsUrlQueueService { |
| 37 | + private static final Logger logger = LogManager.getLogger(FessUrlQueueService.class); |
| 38 | + |
| 39 | + public FessUrlQueueService(final EsCrawlerConfig crawlerConfig) { |
| 40 | + super(crawlerConfig); |
| 41 | + } |
| 42 | + |
| 43 | + @Override |
| 44 | + protected List<EsUrlQueue> fetchUrlQueueList(final String sessionId) { |
| 45 | + final CrawlingConfigHelper crawlingConfigHelper = ComponentUtil.getCrawlingConfigHelper(); |
| 46 | + final CrawlingConfig crawlingConfig = crawlingConfigHelper.get(sessionId); |
| 47 | + final Map<String, String> configParams = crawlingConfig.getConfigParameterMap(ConfigName.CONFIG); |
| 48 | + final String crawlOrder = configParams.getOrDefault(CrawlingConfig.Param.Config.CRAWL_ORDER, "sequential"); |
| 49 | + if ("random".equals(crawlOrder)) { |
| 50 | + return getList(EsUrlQueue.class, sessionId, |
| 51 | + QueryBuilders.functionScoreQuery(QueryBuilders.matchAllQuery(), |
| 52 | + new FunctionScoreQueryBuilder.FilterFunctionBuilder[] { new FunctionScoreQueryBuilder.FilterFunctionBuilder( |
| 53 | + new RandomScoreFunctionBuilder().seed(sessionId.hashCode())) }), |
| 54 | + 0, pollingFetchSize, SortBuilders.scoreSort().order(SortOrder.ASC)); |
| 55 | + } else if (!"sequential".equals(crawlOrder)) { |
| 56 | + logger.warn("Invalid crawl order specified: {}. Falling back to sequential.", crawlOrder); |
| 57 | + } |
| 58 | + return getList(EsUrlQueue.class, sessionId, null, 0, pollingFetchSize, SortBuilders.fieldSort(CREATE_TIME).order(SortOrder.ASC)); |
| 59 | + } |
| 60 | +} |
0 commit comments