Skip to content

Commit

Permalink
Validating for broken index (#544)
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesbraza authored Oct 9, 2024
1 parent 65660d4 commit 62d4275
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 6 deletions.
4 changes: 4 additions & 0 deletions paperqa/agents/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,10 @@ async def get_directory_index( # noqa: PLR0912
# NOTE: if the index was not previously built, its index_files will be empty.
# Otherwise, the index_files will not be empty
if not build:
if not await search_index.index_files:
raise RuntimeError(
f"Index {search_index.index_name} was empty, please rebuild it."
)
return search_index

if not sync_index_w_directory:
Expand Down
5 changes: 5 additions & 0 deletions tests/test_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ async def test_get_directory_index(agent_test_settings: Settings) -> None:
assert len(await index.index_files) == len(path_to_id) - 1
mock_aadd.assert_not_awaited(), "Expected we didn't re-add files"

# Note let's delete files.zip, and confirm we can't load the index
await (await index.file_index_filename).unlink()
with pytest.raises(RuntimeError, match="please rebuild"):
await get_directory_index(settings=agent_test_settings, build=False)


EXPECTED_STUB_DATA_FILES = {
"bates.txt",
Expand Down
12 changes: 6 additions & 6 deletions tests/test_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ldp.alg.runners import Evaluator, EvaluatorConfig

from paperqa import Docs, QueryRequest, Settings
from paperqa.agents import SearchIndex
from paperqa.agents import get_directory_index
from paperqa.agents.task import (
GradablePaperQAEnvironment,
LitQATaskDataset,
Expand Down Expand Up @@ -77,6 +77,7 @@ def test___len__(

@pytest.mark.asyncio
async def test_evaluation(self, base_query_request: QueryRequest) -> None:
await get_directory_index(settings=base_query_request.settings) # Build
docs = Docs()
# Why are we constructing a TaskConfig here using a serialized QueryRequest and
# Docs? It's to confirm everything works as if hydrating from a YAML config file
Expand Down Expand Up @@ -136,15 +137,14 @@ async def test_tool_failure(self, base_query_request: QueryRequest) -> None:
dataset=dataset,
callbacks=[metrics_callback],
)
with patch.object(
SearchIndex,
"query",
with patch(
"paperqa.agents.search.SearchIndex",
side_effect=Exception("Totally unexpected but retryable error."),
) as mock_query:
) as mock_SearchIndex:
await evaluator.evaluate() # Confirm this does not crash
assert (
metrics_callback.eval_means["truncation_rate"] == 1.0
), "Expected 100% truncations due to max_rollout_steps"
mock_query.assert_awaited(), "Expected failures to come from unit test"
mock_SearchIndex.assert_called(), "Expected failures to come from unit test"
assert metrics_callback.eval_means["correct"] == 0.0
assert metrics_callback.eval_means["correct_unsure"] == 0.0

0 comments on commit 62d4275

Please sign in to comment.