diff --git a/paperqa/agents/search.py b/paperqa/agents/search.py
index f010a2a2b..b14f90702 100644
--- a/paperqa/agents/search.py
+++ b/paperqa/agents/search.py
@@ -535,6 +535,10 @@ async def get_directory_index(  # noqa: PLR0912
     # NOTE: if the index was not previously built, its index_files will be empty.
     # Otherwise, the index_files will not be empty
     if not build:
+        if not await search_index.index_files:
+            raise RuntimeError(
+                f"Index {search_index.index_name} was empty, please rebuild it."
+            )
         return search_index
 
     if not sync_index_w_directory:
diff --git a/tests/test_agents.py b/tests/test_agents.py
index d561afb20..0bb60bebe 100644
--- a/tests/test_agents.py
+++ b/tests/test_agents.py
@@ -94,6 +94,11 @@ async def test_get_directory_index(agent_test_settings: Settings) -> None:
         assert len(await index.index_files) == len(path_to_id) - 1
         mock_aadd.assert_not_awaited(), "Expected we didn't re-add files"
 
+        # Note let's delete files.zip, and confirm we can't load the index
+        await (await index.file_index_filename).unlink()
+        with pytest.raises(RuntimeError, match="please rebuild"):
+            await get_directory_index(settings=agent_test_settings, build=False)
+
 
 EXPECTED_STUB_DATA_FILES = {
     "bates.txt",
diff --git a/tests/test_task.py b/tests/test_task.py
index d9aec2413..4e8a56794 100644
--- a/tests/test_task.py
+++ b/tests/test_task.py
@@ -7,7 +7,7 @@
 from ldp.alg.runners import Evaluator, EvaluatorConfig
 
 from paperqa import Docs, QueryRequest, Settings
-from paperqa.agents import SearchIndex
+from paperqa.agents import get_directory_index
 from paperqa.agents.task import (
     GradablePaperQAEnvironment,
     LitQATaskDataset,
@@ -77,6 +77,7 @@ def test___len__(
 
     @pytest.mark.asyncio
     async def test_evaluation(self, base_query_request: QueryRequest) -> None:
+        await get_directory_index(settings=base_query_request.settings)  # Build
         docs = Docs()
         # Why are we constructing a TaskConfig here using a serialized QueryRequest and
         # Docs? It's to confirm everything works as if hydrating from a YAML config file
@@ -136,15 +137,14 @@ async def test_tool_failure(self, base_query_request: QueryRequest) -> None:
             dataset=dataset,
             callbacks=[metrics_callback],
         )
-        with patch.object(
-            SearchIndex,
-            "query",
+        with patch(
+            "paperqa.agents.search.SearchIndex",
             side_effect=Exception("Totally unexpected but retryable error."),
-        ) as mock_query:
+        ) as mock_SearchIndex:
             await evaluator.evaluate()  # Confirm this does not crash
         assert (
             metrics_callback.eval_means["truncation_rate"] == 1.0
         ), "Expected 100% truncations due to max_rollout_steps"
-        mock_query.assert_awaited(), "Expected failures to come from unit test"
+        mock_SearchIndex.assert_called(), "Expected failures to come from unit test"
         assert metrics_callback.eval_means["correct"] == 0.0
         assert metrics_callback.eval_means["correct_unsure"] == 0.0