From e9dd72d9a8d05ca8c99622ec9ee281cc5674f44b Mon Sep 17 00:00:00 2001 From: Kai Schlamp Date: Sun, 31 Mar 2024 14:47:43 +0000 Subject: [PATCH] Fix RAG preparation --- radis/vespa/providers.py | 4 ++-- radis/vespa/utils/document_utils.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/radis/vespa/providers.py b/radis/vespa/providers.py index 695a102b..ac48aea2 100644 --- a/radis/vespa/providers.py +++ b/radis/vespa/providers.py @@ -7,7 +7,7 @@ from radis.rag.site import RetrievalResult from radis.search.site import Search, SearchResult -from .utils.document_utils import document_from_vespa_response +from .utils.document_utils import document_from_vespa_response, extract_document_id from .utils.query_utils import build_yql_filter from .vespa_app import ( BM25_RANK_PROFILE, @@ -133,5 +133,5 @@ def retrieve_bm25(search: Search) -> RetrievalResult: return RetrievalResult( total_count=response.json["root"]["fields"]["totalCount"], coverage=response.json["root"]["coverage"]["coverage"], - document_ids=[hit["fields"]["document_id"] for hit in response.hits], + document_ids=[extract_document_id(hit["fields"]["documentid"]) for hit in response.hits], ) diff --git a/radis/vespa/utils/document_utils.py b/radis/vespa/utils/document_utils.py index 0be2cb50..3764435d 100644 --- a/radis/vespa/utils/document_utils.py +++ b/radis/vespa/utils/document_utils.py @@ -104,13 +104,13 @@ def callback(response: VespaResponse, id: str): ) -def _extract_document_id(documentid: str) -> str: +def extract_document_id(documentid: str) -> str: # https://docs.vespa.ai/en/documents.html#document-ids return documentid.split(":")[-1] def document_from_vespa_response(record: dict[str, Any]) -> ReportDocument: - document_id = _extract_document_id(record["fields"]["documentid"]) + document_id = extract_document_id(record["fields"]["documentid"]) patient_birth_date = date.fromtimestamp(record["fields"]["patient_birth_date"]) study_datetime = datetime.fromtimestamp(record["fields"]["study_datetime"])