AnguseZhang
diff --git a/‎agents/matmaster_agent/core_agents/base_agents/mcp_agent.py‎
Lines changed: 5 additions & 3 deletions b/‎agents/matmaster_agent/core_agents/base_agents/mcp_agent.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎agents/matmaster_agent/core_agents/comp_agents/recommend_summary_agent/agent.py‎
Lines changed: 2 additions & 2 deletions b/‎agents/matmaster_agent/core_agents/comp_agents/recommend_summary_agent/agent.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎agents/matmaster_agent/core_agents/public_agents/job_agents/submit_core_agent/agent.py‎
Lines changed: 16 additions & 11 deletions b/‎agents/matmaster_agent/core_agents/public_agents/job_agents/submit_core_agent/agent.py‎
Lines changed: 16 additions & 11 deletions
diff --git a/‎agents/matmaster_agent/flow_agents/agent.py‎
Lines changed: 15 additions & 7 deletions b/‎agents/matmaster_agent/flow_agents/agent.py‎
Lines changed: 15 additions & 7 deletions
diff --git a/‎agents/matmaster_agent/flow_agents/all_finished_agent/prompt.py‎
Lines changed: 66 additions & 35 deletions b/‎agents/matmaster_agent/flow_agents/all_finished_agent/prompt.py‎
Lines changed: 66 additions & 35 deletions
@@ -50,7 +50,7 @@
     store_tool_result_in_memory,
 )
 from agents.matmaster_agent.model import CostFuncType
-from agents.matmaster_agent.state import CURRENT_STEP
+from agents.matmaster_agent.state import CURRENT_STEP, CURRENT_STEP_RESULT
 from agents.matmaster_agent.style import tool_response_failed_card
 from agents.matmaster_agent.utils.event_utils import (
     all_text_event,
@@ -240,8 +240,10 @@ async def _run_events(self, ctx: InvocationContext) -> AsyncGenerator[Event, Non
                         raise
 
                     parsed_tool_result = await parse_result(ctx, dict_result)
-                    logger.info(
-                        f'{ctx.session.id} parsed_tool_result = {parsed_tool_result}'
+                    post_execution_step = copy.deepcopy(ctx.session.state[CURRENT_STEP])
+                    post_execution_step[CURRENT_STEP_RESULT] = parsed_tool_result
+                    yield update_state_event(
+                        ctx, state_delta={CURRENT_STEP: post_execution_step}
                     )
                     for _frontend_render_event in frontend_render_event(
                         ctx,
 
@@ -64,8 +64,8 @@
 )
 from agents.matmaster_agent.state import (
     CURRENT_STEP,
+    CURRENT_STEP_DESCRIPTION,
     RECOMMEND_PARAMS,
-    STEP_DESCRIPTION,
 )
 from agents.matmaster_agent.sub_agents.tools import ALL_TOOLS
 from agents.matmaster_agent.utils.event_utils import (
@@ -215,7 +215,7 @@ async def _run_events(self, ctx: InvocationContext) -> AsyncGenerator[Event, Non
         )
 
         self.tool_call_info_agent.instruction = gen_tool_call_info_instruction(
-            user_prompt=current_step[STEP_DESCRIPTION],
+            user_prompt=current_step[CURRENT_STEP_DESCRIPTION],
             agent_prompt=self.instruction,
             tool_doc=tool_doc,
             tool_schema=tool_schema,
 
@@ -15,10 +15,11 @@
     DisallowTransferAndContentLimitMCPAgent,
 )
 from agents.matmaster_agent.flow_agents.model import PlanStepStatusEnum
+from agents.matmaster_agent.flow_agents.step_utils import get_current_step
 from agents.matmaster_agent.locales import i18n
 from agents.matmaster_agent.logger import PrefixFilter
 from agents.matmaster_agent.model import BohrJobInfo, DFlowJobInfo
-from agents.matmaster_agent.state import CURRENT_STEP
+from agents.matmaster_agent.state import CURRENT_STEP, CURRENT_STEP_STATUS
 from agents.matmaster_agent.style import tool_response_failed_card
 from agents.matmaster_agent.utils.event_utils import (
     all_text_event,
@@ -99,11 +100,13 @@ async def _run_events(self, ctx: InvocationContext) -> AsyncGenerator[Event, Non
                             yield tool_response_failed_event
 
                         # 更新 plan 为失败
-                        update_plan = copy.deepcopy(ctx.session.state['plan'])
-                        update_plan['steps'][ctx.session.state['plan_index']][
-                            'status'
-                        ] = 'failed'
-                        yield update_state_event(ctx, state_delta={'plan': update_plan})
+                        post_execution_step = copy.deepcopy(get_current_step(ctx))
+                        post_execution_step[CURRENT_STEP_STATUS] = (
+                            PlanStepStatusEnum.FAILED
+                        )
+                        yield update_state_event(
+                            ctx, state_delta={CURRENT_STEP: post_execution_step}
+                        )
 
                         raise RuntimeError('Tool Execution Failed')
                     dict_result = load_tool_response(first_part)
@@ -189,12 +192,14 @@ async def _run_events(self, ctx: InvocationContext) -> AsyncGenerator[Event, Non
                                     yield tool_response_failed_event
 
                                 # 更新 plan 为失败
-                                update_plan = copy.deepcopy(ctx.session.state['plan'])
-                                update_plan['steps'][ctx.session.state['plan_index']][
-                                    'status'
-                                ] = 'failed'
+                                post_execution_step = copy.deepcopy(
+                                    get_current_step(ctx)
+                                )
+                                post_execution_step[CURRENT_STEP_STATUS] = (
+                                    PlanStepStatusEnum.FAILED
+                                )
                                 yield update_state_event(
-                                    ctx, state_delta={'plan': update_plan}
+                                    ctx, state_delta={CURRENT_STEP: post_execution_step}
                                 )
 
                                 raise RuntimeError('Tool Execution Failed')
 
@@ -98,7 +98,7 @@
     is_job_submitted_step,
 )
 from agents.matmaster_agent.flow_agents.step_validation_agent.prompt import (
-    STEP_VALIDATION_INSTRUCTION,
+    create_step_validation_instruction,
 )
 from agents.matmaster_agent.flow_agents.step_validation_agent.schema import (
     StepValidationSchema,
@@ -109,6 +109,7 @@
 )
 from agents.matmaster_agent.flow_agents.thinking_agent.constant import THINKING_AGENT
 from agents.matmaster_agent.flow_agents.utils import (
+    find_alternative_tool,
     get_tools_list,
     scenes_contain_query_job_status,
     should_bypass_confirmation,
@@ -140,6 +141,7 @@
 from agents.matmaster_agent.services.session_files import get_session_files
 from agents.matmaster_agent.state import (
     CURRENT_STEP,
+    CURRENT_STEP_TOOL_NAME,
     EXPAND,
     FINISHED_STATE,
     HISTORY_STEPS,
@@ -336,11 +338,19 @@ def all_finished_agent(self) -> DisallowTransferAndContentLimitSchemaAgent:
     def _build_execution_agent_for_plan(
         self, ctx: InvocationContext
     ) -> MatMasterSupervisorAgent:
+        current_step = get_current_step(ctx)
+        current_step_tool_name = current_step.get(CURRENT_STEP_TOOL_NAME)
+        belonging_agent = ALL_TOOLS.get(current_step_tool_name, {}).get(
+            'belonging_agent'
+        )
+
         step_validation_agent = DisallowTransferAndContentLimitSchemaAgent(
             name='step_validation_agent',
             model=MatMasterLlmConfig.tool_schema_model,
             description='校验步骤执行结果是否合理',
-            instruction=STEP_VALIDATION_INSTRUCTION,
+            instruction=create_step_validation_instruction(
+                find_alternative_tool(current_step_tool_name)
+            ),
             output_schema=StepValidationSchema,
             state_key='step_validation',
             after_model_callback=MatMasterLlmConfig.opik_tracer.after_model_callback,
@@ -356,10 +366,6 @@ def _build_execution_agent_for_plan(
             before_model_callback=filter_llm_contents,
             after_model_callback=MatMasterLlmConfig.opik_tracer.after_model_callback,
         )
-        current_step = get_current_step(ctx)
-        tool_name = current_step.get('tool_name')
-        belonging_agent = ALL_TOOLS.get(tool_name, {}).get('belonging_agent')
-
         execution_agent = MatMasterSupervisorAgent(
             name='execution_agent',
             model=MatMasterLlmConfig.default_litellm_model,
@@ -872,9 +878,10 @@ async def _run_research_flow(
         ):
             yield _scene_event
 
+        execution_count = 0
         while True:
             if not is_job_submitted_step(ctx):
-                skip_thinking = scenes_contain_query_job_status(ctx)
+                skip_thinking = scenes_contain_query_job_status(ctx) or execution_count
                 async for _step_make_event in self._run_step_make_agent(
                     ctx,
                     UPDATE_USER_CONTENT,
@@ -885,6 +892,7 @@ async def _run_research_flow(
 
             async for _plan_execute_event in self._run_plan_execute_agent(ctx):
                 yield _plan_execute_event
+            execution_count += 1
 
             # 检查是否为等待异步任务执行完成的阶段
             if not is_job_submitted_step(ctx):
 
@@ -10,50 +10,81 @@ def create_all_finished_instruction(user_request, history_steps, session_files):
     """
     history_text = json.dumps(history_steps, ensure_ascii=False, indent=2)
     session_files_text = json.dumps(session_files, ensure_ascii=False, indent=2)
-
     return f"""
-You are a "Goal Completion Judge" agent. Your task is to determine whether the user's
-overall final objective/task has been completed *as of now*, based solely on the provided
-tool-call history: history_steps and the provided session_files list.
+You are a "Goal Completion Judge" agent. Decide whether the user's overall final objective
+has been completed *as of now*, based ONLY on history_steps and session_files.
+Key principle: "finished" indicates whether the session should STOP now.
+- If the goal is completed: finished=true.
+- If the goal is NOT completed but still achievable with further actions: finished=false.
+- If the goal is NOT completed AND is blocked/unachievable given the evidence: finished=true (Termination/Unachievable), and the reason MUST explicitly say it is not completed but cannot be completed.
+
+IMPORTANT: The user's goal may be "content in chat" (e.g., a tutorial/summary), not necessarily a file.
+Only require session_files evidence when the user explicitly asked for a file or a file is clearly the expected deliverable.
+
+IMPORTANT: If user_request asks for multiple items (A and B / compare X vs Y / generate N variants), finished=true ONLY when ALL are done.
+
+IMPORTANT: Treat explicit numeric/parameter constraints (layers, vacuum thickness, slab orientation/cut, supercell expansion like 5×5×1, etc.) as mandatory. finished=true ONLY if history_steps explicitly confirms EACH constraint was applied.
+
+IMPORTANT (NEW, HIGH PRIORITY): history_steps[*].suggestion is PRIMARY evidence for whether the task is still achievable.
+- If ANY actionable suggestion exists (even if earlier), and it has NOT been explicitly attempted and exhausted in later history_steps, you MUST set finished=false (unless the goal is already completed).
+- Actionable suggestions include: retrying with modified parameters, switching tools/providers, requesting missing inputs, rerunning with fixes, alternative workflows, etc.
+- You MUST NOT output finished=true (Termination/Unachievable) when there exists any untried actionable suggestion.
+- Only consider Termination/Unachievable when (a) NOT completed, AND (b) all actionable suggestions have been tried (and are evidenced as tried) with continued failure, AND (c) no remaining viable next action is suggested anywhere in history_steps.
+
+CRITICAL: Do NOT treat "suggestion was not acted upon" as evidence of unachievability.
+If there exists any actionable history_steps[*].suggestion that has not been tried, the task is still achievable => finished=false.
+
 # Input
-history_steps is a list. Each element is a past tool invocation record, typically including
-(but not limited to):
-- tool_name: the tool name
-- step_description: what this step attempted to do
-- status: the step status (e.g., success/failed/running/cancelled/unknown, etc.)
-- other fields: such as result/output/error/args/time, etc.
-
-session_files is a list of file links (OSS URLs). Only files that were actually generated
-and persisted for this session will appear here. Use session_files as verifiable evidence
-that a file deliverable truly exists.
-
-Below in the raw user_request:
+user_request:
 {user_request}
-
-Below is the raw history_steps data (JSON):
+history_steps (JSON):
 {history_text}
-
-Below is the raw session_files data (JSON):
+session_files (JSON):
 {session_files_text}
 
 # Decision Rules (must follow)
-1) Use "whether the user's final goal is achieved" as the ONLY criterion, not whether all steps were executed.
-2) If there is clear evidence that the final deliverable/final outcome has been produced and is usable, set finished=true.
-   - For file deliverables, you MUST verify the file exists by checking that an appropriate OSS link is present in session_files.
-3) If any critical step failed, is missing, is still running, or the outputs are insufficient to prove goal completion, set finished=false.
-4) If the information in history_steps and session_files is insufficient to confirm completion (e.g., no final output, only partial logs,
-   or expected output file link is not present in session_files),
-   you MUST return finished=false and explain what information is missing in reason.
-5) If there are contradictions in history_steps, prefer the later entries. If you still cannot decide, return finished=false
-   and explain the contradiction in reason.
-6) Do NOT assume results that are not explicitly supported by history_steps or session_files. Judge only from verifiable evidence.
-# Output Format (very important)
-You must output ONLY ONE JSON object that strictly matches this schema:
+1) Judge ONLY the user's final goal completion / stop condition, not whether all intermediate steps ran.
+
+2) Deliverable type:
+   - If a file artifact is required (PDF/DOCX/ZIP/code project/structure file, etc.), you MUST verify an appropriate OSS link exists in session_files; otherwise finished=false (unless Termination/Unachievable applies).
+   - If in-chat content is required, verify the complete requested content already exists in history_steps outputs; otherwise finished=false (unless Termination/Unachievable applies).
+
+3) If any critical step is failed/missing/running OR outputs are insufficient to prove completion, set finished=false (unless Termination/Unachievable applies).
+
+4) Insufficient evidence => finished=false and state exactly what is missing (unless Termination/Unachievable applies).
+
+5) Contradictions: prefer later entries; if still unclear => finished=false and explain contradiction (unless Termination/Unachievable applies).
+
+6) Do NOT assume results not explicitly supported by history_steps/session_files.
+
+6.1) For explicit parameter constraints, if ANY constraint is not explicitly evidenced, finished=false (unless Termination/Unachievable applies).
+
+7) Suggestion-first achievability check (MUST APPLY BEFORE declaring finished=true for Termination/Unachievable):
+   - Scan ALL history_steps for actionable suggestions.
+   - If any actionable suggestion is not explicitly shown as attempted and exhausted, output finished=false.
+
+8) Termination/Unachievable (STOP even though not done):
+   You may output finished=true for Termination/Unachievable ONLY if:
+   - The goal is NOT completed, AND
+   - history_steps provide concrete evidence that no viable next action exists, AND
+   - EVERY actionable history_steps[*].suggestion has been explicitly tried in later history_steps and still failed, leaving no remaining options.
+   If ANY unresolved suggestion proposes a viable next action (e.g., change parameters, switch provider/tool, request missing info),
+   you MUST output finished=false (the session should continue), unless the goal is already completed.
+
+   If you output finished=true (Termination/Unachievable), the reason MUST include:
+   - "NOT completed" and
+   - "cannot be completed / unachievable" and
+   - the blocking evidence (specific failed steps / missing inputs).
+
+   You MUST NOT output finished=true (Termination/Unachievable) when the only blocking evidence is that a tool failed once and the agent has not yet tried actionable suggestions (e.g., switching provider/tool, changing parameters). In that case, output finished=false.
+
+# Output Format
+Output ONLY ONE JSON object exactly:
 {{
   "finished": true|false,
-  "reason": "A brief, specific explanation in English that cites key evidence from history_steps and/or session_files (e.g., a tool_name status/output or the presence/absence of an OSS link). If not finished, state the critical blocking reason(s) or missing info."
+  "reason": "Brief, specific English explanation citing concrete evidence from history_steps and/or session_files. If using Termination/Unachievable, explicitly state: NOT completed but cannot be completed, and cite the blocking evidence."
 }}
+
 # Output Constraints
-- Output ONLY valid JSON (no Markdown, no code fences, no extra commentary).
-- reason must be an English string and should reference concrete evidence from history_steps and/or session_files.
+- Output ONLY valid JSON (no Markdown / code fences / extra text).
 """.strip()
Original file line number	Diff line number	Diff line change
`@@ -64,8 +64,8 @@`
`64`	`64`	`)`
`65`	`65`	`from agents.matmaster_agent.state import (`
`66`	`66`	`CURRENT_STEP,`
	`67`	`+ CURRENT_STEP_DESCRIPTION,`
`67`	`68`	`RECOMMEND_PARAMS,`
`68`		`- STEP_DESCRIPTION,`
`69`	`69`	`)`
`70`	`70`	`from agents.matmaster_agent.sub_agents.tools import ALL_TOOLS`
`71`	`71`	`from agents.matmaster_agent.utils.event_utils import (`
`@@ -215,7 +215,7 @@ async def _run_events(self, ctx: InvocationContext) -> AsyncGenerator[Event, Non`
`215`	`215`	`)`
`216`	`216`
`217`	`217`	`self.tool_call_info_agent.instruction = gen_tool_call_info_instruction(`
`218`		`- user_prompt=current_step[STEP_DESCRIPTION],`
	`218`	`+ user_prompt=current_step[CURRENT_STEP_DESCRIPTION],`
`219`	`219`	`agent_prompt=self.instruction,`
`220`	`220`	`tool_doc=tool_doc,`
`221`	`221`	`tool_schema=tool_schema,`