Skip to content

Commit

Permalink
[Releases 2.13] Make custom vector query result JSON serializable (#1109
Browse files Browse the repository at this point in the history
)
  • Loading branch information
vicilliar authored Feb 3, 2025
1 parent c05a794 commit bfd241d
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 3 deletions.
6 changes: 5 additions & 1 deletion src/marqo/tensor_search/tensor_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -1635,7 +1635,11 @@ def search(config: Config, index_name: str, text: Optional[Union[str, dict, Cust
except Exception as e:
raise api_exceptions.BadRequestError(f"reranking failure due to {str(e)}")

search_result["query"] = text
if isinstance(text, CustomVectorQuery):
search_result["query"] = text.dict() # Make object JSON serializable
else:
search_result["query"] = text

search_result["limit"] = result_count
search_result["offset"] = offset

Expand Down
37 changes: 37 additions & 0 deletions tests/tensor_search/integ_tests/test_hybrid_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from marqo.tensor_search.models.api_models import ScoreModifierLists
from marqo.tensor_search.models.search import SearchContext
from tests.marqo_test import MarqoTestCase, TestImageUrls
from fastapi.responses import JSONResponse, ORJSONResponse


class TestHybridSearch(MarqoTestCase):
Expand Down Expand Up @@ -361,6 +362,12 @@ def run():
sample_vector)
self.assertIn("hits", res)

# Result should be JSON serializable
try:
ORJSONResponse(res)
except TypeError as e:
self.fail(f"Result is not JSON serializable: {e}")

with self.subTest("Custom vector query, with context, with content"):
@unittest.mock.patch("marqo.vespa.vespa_client.VespaClient.query", mock_vespa_client_query)
def run():
Expand Down Expand Up @@ -398,6 +405,12 @@ def run():
[i*1.5 for i in sample_vector]) # Should average the query & context vectors
self.assertIn("hits", res)

# Result should be JSON serializable
try:
ORJSONResponse(res)
except TypeError as e:
self.fail(f"Result is not JSON serializable: {e}")

with self.subTest("Custom vector query, no content, no context, tensor/tensor"):
@unittest.mock.patch("marqo.vespa.vespa_client.VespaClient.query", mock_vespa_client_query)
def run():
Expand Down Expand Up @@ -432,6 +445,12 @@ def run():
sample_vector)
self.assertIn("hits", res)

# Result should be JSON serializable
try:
ORJSONResponse(res)
except TypeError as e:
self.fail(f"Result is not JSON serializable: {e}")

def test_hybrid_search_semi_structured_with_custom_vector_query(self):
"""
Tests that using a custom vector query sends the correct arguments to vespa
Expand Down Expand Up @@ -495,6 +514,12 @@ def run():
sample_vector)
self.assertIn("hits", res)

# Result should be JSON serializable
try:
ORJSONResponse(res)
except TypeError as e:
self.fail(f"Result is not JSON serializable: {e}")

with self.subTest("Custom vector query, with context, with content"):
@unittest.mock.patch("marqo.vespa.vespa_client.VespaClient.query", mock_vespa_client_query)
def run():
Expand Down Expand Up @@ -532,6 +557,12 @@ def run():
[i*1.5 for i in sample_vector]) # Should average the query & context vectors
self.assertIn("hits", res)

# Result should be JSON serializable
try:
ORJSONResponse(res)
except TypeError as e:
self.fail(f"Result is not JSON serializable: {e}")

with self.subTest("Custom vector query, no content, no context, tensor/tensor"):
@unittest.mock.patch("marqo.vespa.vespa_client.VespaClient.query", mock_vespa_client_query)
def run():
Expand Down Expand Up @@ -565,6 +596,12 @@ def run():
sample_vector)
self.assertIn("hits", res)

# Result should be JSON serializable
try:
ORJSONResponse(res)
except TypeError as e:
self.fail(f"Result is not JSON serializable: {e}")

def test_hybrid_search_disjunction_rrf_zero_alpha_same_as_lexical(self):
"""
Tests that hybrid search with:
Expand Down
36 changes: 34 additions & 2 deletions tests/tensor_search/integ_tests/test_search_combined.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
from marqo.core.models.score_modifier import ScoreModifierType, ScoreModifier
from marqo.core.structured_vespa_index.structured_vespa_index import StructuredVespaIndex
from marqo.core.unstructured_vespa_index.unstructured_vespa_index import UnstructuredVespaIndex
from marqo.tensor_search.models.api_models import SearchQuery
from marqo.tensor_search.models.api_models import SearchQuery, CustomVectorQuery
from pydantic import ValidationError
import marqo.api.exceptions as api_exceptions
from fastapi.responses import JSONResponse, ORJSONResponse


class TestSearch(MarqoTestCase):
Expand Down Expand Up @@ -1046,4 +1047,35 @@ def test_search_invalid_image_url_image_return_proper_error(self):
tensor_search.search(
text=query, config=self.config, index_name=index_name.name,
)
self.assertIn("Error vectorising content", str(e.exception))
self.assertIn("Error vectorising content", str(e.exception))

def test_search_results_always_json_serializable(self):
"""
The search() text parameter can either be str, dict, or CustomVectorQuery.
All queries are returned in the result. Ensure all types of queries end up with JSON serializable results.
"""

test_cases = [
"hello",
{"hello": 1, "another one": 2},
{"hello": 1.5, "another one": 2.34},
CustomVectorQuery(
customVector=CustomVectorQuery.CustomVector(
content="hello",
vector=[0 for _ in range(384)]
)
)
]

for index in [self.structured_default_text_index, self.unstructured_default_text_index]:
for query in test_cases:
with self.subTest(index=index.type, query=query):
res = tensor_search.search(
text=query, config=self.config, index_name=index.name,
)

# Result should be JSON serializable
try:
ORJSONResponse(res)
except TypeError as e:
self.fail(f"Result is not JSON serializable: {e}")

0 comments on commit bfd241d

Please sign in to comment.