feat：改进检查机制以建议备选工具增加步骤可行性

Rasic2 · Rasic2 · commit 42a132dd3422 · 2026-02-07T14:19:07.000+08:00
diff --git a/agents/matmaster_agent/flow_agents/agent.py b/agents/matmaster_agent/flow_agents/agent.py
@@ -98,7 +98,7 @@
     is_job_submitted_step,
 )
 from agents.matmaster_agent.flow_agents.step_validation_agent.prompt import (
-    STEP_VALIDATION_INSTRUCTION,
+    create_step_validation_instruction,
 )
 from agents.matmaster_agent.flow_agents.step_validation_agent.schema import (
     StepValidationSchema,
@@ -109,6 +109,7 @@
 )
 from agents.matmaster_agent.flow_agents.thinking_agent.constant import THINKING_AGENT
 from agents.matmaster_agent.flow_agents.utils import (
+    find_alternative_tool,
     get_tools_list,
     scenes_contain_query_job_status,
     should_bypass_confirmation,
@@ -140,6 +141,7 @@
 from agents.matmaster_agent.services.session_files import get_session_files
 from agents.matmaster_agent.state import (
     CURRENT_STEP,
+    CURRENT_STEP_TOOL_NAME,
     EXPAND,
     FINISHED_STATE,
     HISTORY_STEPS,
@@ -336,11 +338,19 @@ def all_finished_agent(self) -> DisallowTransferAndContentLimitSchemaAgent:
     def _build_execution_agent_for_plan(
         self, ctx: InvocationContext
     ) -> MatMasterSupervisorAgent:
+        current_step = get_current_step(ctx)
+        current_step_tool_name = current_step.get(CURRENT_STEP_TOOL_NAME)
+        belonging_agent = ALL_TOOLS.get(current_step_tool_name, {}).get(
+            'belonging_agent'
+        )
+
         step_validation_agent = DisallowTransferAndContentLimitSchemaAgent(
             name='step_validation_agent',
             model=MatMasterLlmConfig.tool_schema_model,
             description='校验步骤执行结果是否合理',
-            instruction=STEP_VALIDATION_INSTRUCTION,
+            instruction=create_step_validation_instruction(
+                find_alternative_tool(current_step_tool_name)
+            ),
             output_schema=StepValidationSchema,
             state_key='step_validation',
             after_model_callback=MatMasterLlmConfig.opik_tracer.after_model_callback,
@@ -356,10 +366,6 @@ def _build_execution_agent_for_plan(
             before_model_callback=filter_llm_contents,
             after_model_callback=MatMasterLlmConfig.opik_tracer.after_model_callback,
         )
-        current_step = get_current_step(ctx)
-        tool_name = current_step.get('tool_name')
-        belonging_agent = ALL_TOOLS.get(tool_name, {}).get('belonging_agent')
-
         execution_agent = MatMasterSupervisorAgent(
             name='execution_agent',
             model=MatMasterLlmConfig.default_litellm_model,
diff --git a/agents/matmaster_agent/flow_agents/all_finished_agent/prompt.py b/agents/matmaster_agent/flow_agents/all_finished_agent/prompt.py
@@ -13,7 +13,6 @@ def create_all_finished_instruction(user_request, history_steps, session_files):
     return f"""
 You are a "Goal Completion Judge" agent. Decide whether the user's overall final objective
 has been completed *as of now*, based ONLY on history_steps and session_files.
-
 Key principle: "finished" indicates whether the session should STOP now.
 - If the goal is completed: finished=true.
 - If the goal is NOT completed but still achievable with further actions: finished=false.
@@ -26,33 +25,58 @@ def create_all_finished_instruction(user_request, history_steps, session_files):
 
 IMPORTANT: Treat explicit numeric/parameter constraints (layers, vacuum thickness, slab orientation/cut, supercell expansion like 5×5×1, etc.) as mandatory. finished=true ONLY if history_steps explicitly confirms EACH constraint was applied.
 
+IMPORTANT (NEW, HIGH PRIORITY): history_steps[*].suggestion is PRIMARY evidence for whether the task is still achievable.
+- If ANY actionable suggestion exists (even if earlier), and it has NOT been explicitly attempted and exhausted in later history_steps, you MUST set finished=false (unless the goal is already completed).
+- Actionable suggestions include: retrying with modified parameters, switching tools/providers, requesting missing inputs, rerunning with fixes, alternative workflows, etc.
+- You MUST NOT output finished=true (Termination/Unachievable) when there exists any untried actionable suggestion.
+- Only consider Termination/Unachievable when (a) NOT completed, AND (b) all actionable suggestions have been tried (and are evidenced as tried) with continued failure, AND (c) no remaining viable next action is suggested anywhere in history_steps.
+
+CRITICAL: Do NOT treat "suggestion was not acted upon" as evidence of unachievability.
+If there exists any actionable history_steps[*].suggestion that has not been tried, the task is still achievable => finished=false.
+
 # Input
 user_request:
 {user_request}
-
 history_steps (JSON):
 {history_text}
-
 session_files (JSON):
 {session_files_text}
 
 # Decision Rules (must follow)
 1) Judge ONLY the user's final goal completion / stop condition, not whether all intermediate steps ran.
+
 2) Deliverable type:
    - If a file artifact is required (PDF/DOCX/ZIP/code project/structure file, etc.), you MUST verify an appropriate OSS link exists in session_files; otherwise finished=false (unless Termination/Unachievable applies).
    - If in-chat content is required, verify the complete requested content already exists in history_steps outputs; otherwise finished=false (unless Termination/Unachievable applies).
+
 3) If any critical step is failed/missing/running OR outputs are insufficient to prove completion, set finished=false (unless Termination/Unachievable applies).
+
 4) Insufficient evidence => finished=false and state exactly what is missing (unless Termination/Unachievable applies).
+
 5) Contradictions: prefer later entries; if still unclear => finished=false and explain contradiction (unless Termination/Unachievable applies).
+
 6) Do NOT assume results not explicitly supported by history_steps/session_files.
+
 6.1) For explicit parameter constraints, if ANY constraint is not explicitly evidenced, finished=false (unless Termination/Unachievable applies).
-7) Termination/Unachievable (STOP even though not done):
-   If the goal is NOT completed AND is blocked/unachievable such that no viable next action exists (e.g., repeated critical failures; missing required inputs that cannot be obtained; hard constraints prevent completion),
-   you MUST output finished=true and the reason MUST include:
+
+7) Suggestion-first achievability check (MUST APPLY BEFORE declaring finished=true for Termination/Unachievable):
+   - Scan ALL history_steps for actionable suggestions.
+   - If any actionable suggestion is not explicitly shown as attempted and exhausted, output finished=false.
+
+8) Termination/Unachievable (STOP even though not done):
+   You may output finished=true for Termination/Unachievable ONLY if:
+   - The goal is NOT completed, AND
+   - history_steps provide concrete evidence that no viable next action exists, AND
+   - EVERY actionable history_steps[*].suggestion has been explicitly tried in later history_steps and still failed, leaving no remaining options.
+   If ANY unresolved suggestion proposes a viable next action (e.g., change parameters, switch provider/tool, request missing info),
+   you MUST output finished=false (the session should continue), unless the goal is already completed.
+
+   If you output finished=true (Termination/Unachievable), the reason MUST include:
    - "NOT completed" and
    - "cannot be completed / unachievable" and
    - the blocking evidence (specific failed steps / missing inputs).
-   You MUST NOT output finished=false if you claim the task is blocked/unachievable.
+
+   You MUST NOT output finished=true (Termination/Unachievable) when the only blocking evidence is that a tool failed once and the agent has not yet tried actionable suggestions (e.g., switching provider/tool, changing parameters). In that case, output finished=false.
 
 # Output Format
 Output ONLY ONE JSON object exactly:
diff --git a/agents/matmaster_agent/flow_agents/execution_agent/agent.py b/agents/matmaster_agent/flow_agents/execution_agent/agent.py
@@ -23,11 +23,12 @@
     is_job_submitted_step,
 )
 from agents.matmaster_agent.flow_agents.step_validation_agent.prompt import (
-    STEP_VALIDATION_INSTRUCTION,
+    create_step_validation_instruction,
 )
 from agents.matmaster_agent.flow_agents.style import separate_card
 from agents.matmaster_agent.flow_agents.utils import (
     check_plan,
+    find_alternative_tool,
     get_agent_for_tool,
 )
 from agents.matmaster_agent.llm_config import MatMasterLlmConfig
@@ -36,6 +37,7 @@
 from agents.matmaster_agent.state import (
     CURRENT_STEP,
     CURRENT_STEP_DESCRIPTION,
+    CURRENT_STEP_STATUS,
     CURRENT_STEP_TOOL_NAME,
     HISTORY_STEPS,
     PLAN,
@@ -208,7 +210,10 @@ async def _tool_result_validation(
         )
         validation_instruction = '\n'.join(lines)
         self.validation_agent.instruction = (
-            STEP_VALIDATION_INSTRUCTION + validation_instruction
+            create_step_validation_instruction(
+                find_alternative_tool(current_step_tool_name)
+            )
+            + validation_instruction
         )
 
         async for validation_event in self.validation_agent.run_async(ctx):
@@ -320,15 +325,14 @@ async def _run_events(self, ctx: InvocationContext) -> AsyncGenerator[Event, Non
             yield _core_execution_event
 
         post_execution_step = get_current_step(ctx)
-        # 工具调用结果返回【成功】
-        if post_execution_step['status'] == PlanStepStatusEnum.SUCCESS:
+        if post_execution_step[CURRENT_STEP_STATUS] != PlanStepStatusEnum.SUBMITTED:
             # 校验工具结果
             async for _tool_result_validation_event in self._tool_result_validation(
                 ctx
             ):
                 yield _tool_result_validation_event
         # 异步任务，直接退出当前函数
-        elif post_execution_step['status'] == PlanStepStatusEnum.SUBMITTED:
+        else:
             return
 
         update_history_steps = copy.deepcopy(ctx.session.state[HISTORY_STEPS])
diff --git a/agents/matmaster_agent/flow_agents/step_validation_agent/prompt.py b/agents/matmaster_agent/flow_agents/step_validation_agent/prompt.py
@@ -1,4 +1,8 @@
-STEP_VALIDATION_INSTRUCTION = """
+from typing import List
+
+
+def create_step_validation_instruction(alternative_tools: List[str]):
+    return f"""
 You are a validation agent responsible for checking if the execution result of a step matches the user's requirements and basic chemical/materials science knowledge.
 
 Your task is to analyze:
@@ -9,6 +13,9 @@
 
 Based on this analysis, determine if the result is reasonable and matches expectations.
 
+Backup tools you may suggest using if the result is invalid or uncertain:
+{alternative_tools}
+
 # Validation Criteria:
 1. **Relevance**: Does the result address the step's intended purpose?
 2. **Accuracy**: Is the result consistent with basic chemical/materials science knowledge?
@@ -17,11 +24,12 @@
 
 # Output Format:
 You must respond with a JSON object containing:
-{
+{{
     "is_valid": boolean,  // true if result matches requirements and knowledge, false otherwise
     "reason": "string",   // brief explanation of validation result
-    "confidence": "high|medium|low"  // confidence level in the validation
-}
+    "confidence": "high|medium|low",  // confidence level in the validation
+    "suggestion": "string"  // actionable suggestion; if invalid/uncertain, suggest fixes or using one of the backup tools above
+}}
 
 # Important Rules:
 - If the result contains obvious errors (wrong chemical formulas, impossible physical properties, etc.), mark as invalid
diff --git a/agents/matmaster_agent/flow_agents/step_validation_agent/schema.py b/agents/matmaster_agent/flow_agents/step_validation_agent/schema.py
@@ -5,3 +5,4 @@ class StepValidationSchema(BaseModel):
     is_valid: bool
     reason: str
     confidence: str  # "high", "medium", "low"
+    suggestion: str