From dd9a4c07573efb36a06d30eda837109c1890adcd Mon Sep 17 00:00:00 2001 From: Anshu Avinash Date: Mon, 29 Jul 2024 16:42:33 +0530 Subject: [PATCH] Use efficient filtering for opensearch (#167) Current filtering query does post-filtering which is not ideal and resulted in low precision. Results with post filtering: ``` { "params": { "dataset": "arxiv-titles-384-angular-filters", "experiment": "opensearch-default", "engine": "opensearch", "parallel": 1, "config": { "knn.algo_param.ef_search": 128 } }, "results": { "total_time": 708.6168032020068, "mean_time": 0.07045893384491791, "mean_precisions": 0.11399200000000001, "std_time": 0.06840096039381999, "min_time": 0.008397486002650112, "max_time": 3.3753458530118223, "rps": 14.111999538838594, "p95_time": 0.18164870390755816, "p99_time": 0.20864198897208555 } } ``` Results with new efficient filtering: ``` { "params": { "dataset": "arxiv-titles-384-angular-filters", "experiment": "opensearch-default", "engine": "opensearch", "parallel": 1, "config": { "knn.algo_param.ef_search": 128 } }, "results": { "total_time": 394.4290532110026, "mean_time": 0.03913764159695711, "mean_precisions": 0.610144, "std_time": 0.05352479065894972, "min_time": 0.0009066620114026591, "max_time": 2.1307434440095676, "rps": 25.35310195481576, "p95_time": 0.1274049270534305, "p99_time": 0.2078282342318563 } } ``` --- engine/clients/opensearch/search.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/engine/clients/opensearch/search.py b/engine/clients/opensearch/search.py index a3e36058..fc7b5cbf 100644 --- a/engine/clients/opensearch/search.py +++ b/engine/clients/opensearch/search.py @@ -59,12 +59,7 @@ def search_one(cls, query: Query, top: int) -> List[Tuple[int, float]]: meta_conditions = cls.parser.parse(query.meta_conditions) if meta_conditions: - opensearch_query = { - "bool": { - "must": [opensearch_query], - "filter": meta_conditions, - } - } + opensearch_query["knn"]["vector"]["filter"] = meta_conditions res = cls.client.search( index=OPENSEARCH_INDEX,