diff --git a/paperqa/agents/env.py b/paperqa/agents/env.py index da265bd6..171ee0ba 100644 --- a/paperqa/agents/env.py +++ b/paperqa/agents/env.py @@ -129,6 +129,9 @@ def make_initial_state_and_tools(self) -> tuple[EnvironmentState, list[Tool]]: return self.state, self.tools async def reset(self) -> tuple[list[Message], list[Tool]]: + # NOTE: don't build the index here, as sometimes we asyncio.gather over this + # method, and our current design (as of v5.0.10) could hit race conditions + # because index building does not use file locks self._docs.clear_docs() self.state, self.tools = self.make_initial_state_and_tools() return ( diff --git a/paperqa/agents/main.py b/paperqa/agents/main.py index 49d8c54a..c5f62f6c 100644 --- a/paperqa/agents/main.py +++ b/paperqa/agents/main.py @@ -27,7 +27,7 @@ from .env import PaperQAEnvironment from .helpers import litellm_get_search_query, table_formatter from .models import AgentStatus, AnswerResponse, QueryRequest, SimpleProfiler -from .search import SearchDocumentStorage, SearchIndex +from .search import SearchDocumentStorage, SearchIndex, get_directory_index from .tools import EnvironmentState, GatherEvidence, GenerateAnswer, PaperSearch if TYPE_CHECKING: @@ -106,6 +106,8 @@ async def run_agent( f" query {query.model_dump()}." ) + # Build the index once here, and then all tools won't need to rebuild it + await get_directory_index(settings=query.settings) if isinstance(agent_type, str) and agent_type.lower() == FAKE_AGENT_TYPE: answer, agent_status = await run_fake_agent(query, docs, **runner_kwargs) elif tool_selector_or_none := query.settings.make_aviary_tool_selector(agent_type): diff --git a/paperqa/agents/tools.py b/paperqa/agents/tools.py index 69e330ae..f18b1c2b 100644 --- a/paperqa/agents/tools.py +++ b/paperqa/agents/tools.py @@ -124,7 +124,7 @@ async def paper_search( offset = self.previous_searches[search_key] = 0 logger.info(f"Starting paper search for {query!r}.") - index = await get_directory_index(settings=self.settings) + index = await get_directory_index(settings=self.settings, build=False) results: list[Docs] = await index.query( query, top_n=self.settings.agent.search_count,