Skip to content

Commit 6d98d72

Browse files
committed
Embedded query generation into Data provider
1 parent 26421e8 commit 6d98d72

File tree

8 files changed

+318
-348
lines changed

8 files changed

+318
-348
lines changed

.env.example

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,6 @@ MILVUS_URL=""
1919

2020
# miscellaneous
2121
MCP_PROXY_LOCAL_PORT=""
22+
23+
# Config path
24+
EVALUATOR_CONFIG_PATH=""

evaluator/algorithms/tool_rag_algorithm.py

Lines changed: 7 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@ class ToolRagAlgorithm(Algorithm):
8484
- max_document_size: the maximal size, in characters, of a single indexed document, or None to disable the size limit.
8585
- indexed_tool_def_parts: the parts of the MCP tool definition to be used for index construction, such as 'name',
8686
'description', 'args', etc.
87-
You can also include 'additional_queries' (or 'examples') to append example queries for each tool if provided
88-
via the 'additional_queries' setting (see defaults below).
87+
You can also include 'examples' (or 'examples') to append example queries for each tool if provided
88+
via the 'examples' setting (see defaults below).
8989
- hybrid_mode: True to enable hybrid (sparse + dense) search and False to only enable dense search.
9090
- analyzer_params: parameters for the Milvus BM25 analyzer.
9191
- fusion_type: the algorithm for combining the dense and the sparse scores if hybrid mode is activated. Milvus only
@@ -130,7 +130,7 @@ def get_default_settings(self) -> Dict[str, Any]:
130130
"embedding_model_id": "all-MiniLM-L6-v2",
131131
"similarity_metric": "COSINE",
132132
"index_type": "FLAT",
133-
"indexed_tool_def_parts": ["name", "description", "additional_queries"],
133+
"indexed_tool_def_parts": ["name", "description"],
134134

135135

136136
# preprocessing
@@ -208,7 +208,7 @@ def _render_args_schema(schema: Dict[str, Any]) -> str:
208208
return " ".join(parts)
209209

210210
@staticmethod
211-
def _render_examples(examples: List[str], max_examples: int = 3) -> str:
211+
def _render_examples(examples: List[str], max_examples: int = 5) -> str:
212212
exs = (examples or [])[:max_examples]
213213
return " || ".join(exs)
214214

@@ -234,9 +234,8 @@ def _compose_tool_text(self, tool: BaseTool) -> str:
234234
tags = tool.tags or []
235235
if tags:
236236
segments.append(f"tags: {' '.join(tags)}")
237-
elif p.lower() == "additional_queries":
238-
examples_map = self._settings.get("additional_queries") or {}
239-
examples_list = examples_map.get(tool.name) or []
237+
elif p.lower() == "examples":
238+
examples_list =list(tool.metadata['examples'].values())
240239
if examples_list:
241240
rendered = self._render_examples(examples_list)
242241
if rendered:
@@ -256,30 +255,6 @@ def _create_docs_from_tools(self, tools: List[BaseTool]) -> List[Document]:
256255
documents.append(Document(page_content=page_content, metadata={"name": tool.name}))
257256
return documents
258257

259-
def _collect_examples_from_tool_specs(self, tool_specs: Dict[str, Dict[str, Any]]) -> Dict[str, List[str]]:
260-
"""
261-
Build {tool_name: [example1, example2, ...]} from a tools dict where each
262-
value may contain an 'additional_queries' dict mapping query keys to strings.
263-
"""
264-
examples: Dict[str, List[str]] = {}
265-
for tool_name, spec in (tool_specs or {}).items():
266-
if not isinstance(spec, dict):
267-
continue
268-
aq = spec.get("additional_queries")
269-
if isinstance(aq, dict):
270-
for _, qtext in aq.items():
271-
if isinstance(qtext, str) and qtext.strip():
272-
examples.setdefault(tool_name, []).append(qtext.strip())
273-
# de-duplicate while preserving order
274-
for k, v in list(examples.items()):
275-
seen, out = set(), []
276-
for s in v:
277-
if s not in seen:
278-
seen.add(s)
279-
out.append(s)
280-
examples[k] = out
281-
return examples
282-
283258
def _index_tools(self, tools: List[BaseTool]) -> None:
284259
self.tool_name_to_base_tool = {tool.name: tool for tool in tools}
285260

@@ -339,7 +314,7 @@ def _index_tools(self, tools: List[BaseTool]) -> None:
339314
search_params=search_params,
340315
)
341316

342-
def set_up(self, model: BaseChatModel, tools: List[BaseTool], tool_specs: Any) -> None:
317+
def set_up(self, model: BaseChatModel, tools: List[BaseTool]) -> None:
343318
super().set_up(model, tools)
344319

345320
if self._settings["cross_encoder_model_name"]:
@@ -351,14 +326,6 @@ def set_up(self, model: BaseChatModel, tools: List[BaseTool], tool_specs: Any) -
351326
if self._settings["enable_query_decomposition"] or self._settings["enable_query_rewriting"]:
352327
self.query_rewriting_model = self._get_llm(self._settings["query_rewriting_model_id"])
353328

354-
# Build additional_queries mapping from provided specs (accept dict of tool specs or list of QuerySpecifications)
355-
try:
356-
examples_map: Dict[str, List[str]] = {}
357-
if isinstance(tool_specs, dict):
358-
examples_map = self._collect_examples_from_tool_specs(tool_specs)
359-
self._settings["additional_queries"] = examples_map
360-
except Exception:
361-
pass
362329
self._index_tools(tools)
363330

364331
def _threshold_results(self, docs_and_scores: List[Tuple[Document, float]]) -> List[Document]:

0 commit comments

Comments
 (0)