From 9db1d83b747bd4563ff75aec63abe1c58f819df0 Mon Sep 17 00:00:00 2001 From: Anshu Avinash Date: Mon, 5 Aug 2024 13:54:51 +0530 Subject: [PATCH] Fix opensearch query parser (#172) * Fix opensearch query parser Also specify schema for id in arxiv-titles-384-angular filters dataset. This is needed as default mapping created by opensearch takes the type as float, but in the dataset we also have string. This leads to 99% precision on the test dataset with opensearch. ``` { "params": { "dataset": "arxiv-titles-384-angular-filters", "experiment": "opensearch-default", "engine": "opensearch", "parallel": 10, "config": { "knn.algo_param.ef_search": 128 } }, "results": { "total_time": 391.3466711850051, "mean_time": 0.37053632343088827, "mean_precisions": 0.98962, "std_time": 0.3485163122349799, "min_time": 0.04950378900684882, "max_time": 3.548553360000369, "rps": 25.552791773390613, "p95_time": 1.0473700279486362, "p99_time": 1.4396829416653787 } } ``` Fixes #171 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- datasets/datasets.json | 3 ++- engine/clients/opensearch/parser.py | 7 ++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/datasets/datasets.json b/datasets/datasets.json index c9111a04..b3622777 100644 --- a/datasets/datasets.json +++ b/datasets/datasets.json @@ -138,7 +138,8 @@ "schema": { "update_date_ts": "int", "labels": "keyword", - "submitter": "keyword" + "submitter": "keyword", + "id": "keyword" } }, { diff --git a/engine/clients/opensearch/parser.py b/engine/clients/opensearch/parser.py index 31eccd8f..527dc5ab 100644 --- a/engine/clients/opensearch/parser.py +++ b/engine/clients/opensearch/parser.py @@ -25,7 +25,12 @@ def build_range_filter( lte: Optional[FieldValue], gte: Optional[FieldValue], ) -> Any: - return {"range": {field_name: {"lt": lt, "gt": gt, "lte": lte, "gte": gte}}} + field_filters = { + k: v + for k, v in {"lt": lt, "gt": gt, "lte": lte, "gte": gte}.items() + if v is not None + } + return {"range": {field_name: field_filters}} def build_geo_filter( self, field_name: str, lat: float, lon: float, radius: float