refactor：用CURRENT_STEP_DESCRIPTION替换STEP_DESCRIPTION并添加step验证相关的逻辑与状态定义

Rasic2 · Rasic2 · commit 0763699aed44 · 2026-02-06T20:18:40.000+08:00
diff --git a/agents/matmaster_agent/core_agents/base_agents/mcp_agent.py b/agents/matmaster_agent/core_agents/base_agents/mcp_agent.py
@@ -50,7 +50,7 @@
     store_tool_result_in_memory,
 )
 from agents.matmaster_agent.model import CostFuncType
-from agents.matmaster_agent.state import CURRENT_STEP
+from agents.matmaster_agent.state import CURRENT_STEP, CURRENT_STEP_RESULT
 from agents.matmaster_agent.style import tool_response_failed_card
 from agents.matmaster_agent.utils.event_utils import (
     all_text_event,
@@ -240,8 +240,10 @@ async def _run_events(self, ctx: InvocationContext) -> AsyncGenerator[Event, Non
                         raise
 
                     parsed_tool_result = await parse_result(ctx, dict_result)
-                    logger.info(
-                        f'{ctx.session.id} parsed_tool_result = {parsed_tool_result}'
+                    post_execution_step = copy.deepcopy(ctx.session.state[CURRENT_STEP])
+                    post_execution_step[CURRENT_STEP_RESULT] = parsed_tool_result
+                    yield update_state_event(
+                        ctx, state_delta={CURRENT_STEP: post_execution_step}
                     )
                     for _frontend_render_event in frontend_render_event(
                         ctx,
diff --git a/agents/matmaster_agent/core_agents/comp_agents/recommend_summary_agent/agent.py b/agents/matmaster_agent/core_agents/comp_agents/recommend_summary_agent/agent.py
@@ -64,8 +64,8 @@
 )
 from agents.matmaster_agent.state import (
     CURRENT_STEP,
+    CURRENT_STEP_DESCRIPTION,
     RECOMMEND_PARAMS,
-    STEP_DESCRIPTION,
 )
 from agents.matmaster_agent.sub_agents.tools import ALL_TOOLS
 from agents.matmaster_agent.utils.event_utils import (
@@ -215,7 +215,7 @@ async def _run_events(self, ctx: InvocationContext) -> AsyncGenerator[Event, Non
         )
 
         self.tool_call_info_agent.instruction = gen_tool_call_info_instruction(
-            user_prompt=current_step[STEP_DESCRIPTION],
+            user_prompt=current_step[CURRENT_STEP_DESCRIPTION],
             agent_prompt=self.instruction,
             tool_doc=tool_doc,
             tool_schema=tool_schema,
diff --git a/agents/matmaster_agent/flow_agents/all_finished_agent/prompt.py b/agents/matmaster_agent/flow_agents/all_finished_agent/prompt.py
@@ -15,6 +15,11 @@ def create_all_finished_instruction(user_request, history_steps, session_files):
 You are a "Goal Completion Judge" agent. Your task is to determine whether the user's
 overall final objective/task has been completed *as of now*, based solely on the provided
 tool-call history: history_steps and the provided session_files list.
+
+IMPORTANT: The user's goal may be "content in chat" (e.g., a researched tutorial/summary),
+not necessarily a file deliverable. Only require session_files evidence when the user
+explicitly asked for a file or a file is clearly the expected final deliverable.
+
 # Input
 history_steps is a list. Each element is a past tool invocation record, typically including
 (but not limited to):
@@ -25,7 +30,7 @@ def create_all_finished_instruction(user_request, history_steps, session_files):
 
 session_files is a list of file links (OSS URLs). Only files that were actually generated
 and persisted for this session will appear here. Use session_files as verifiable evidence
-that a file deliverable truly exists.
+that a file deliverable truly exists (only when a file deliverable is required).
 
 Below in the raw user_request:
 {user_request}
@@ -38,21 +43,26 @@ def create_all_finished_instruction(user_request, history_steps, session_files):
 
 # Decision Rules (must follow)
 1) Use "whether the user's final goal is achieved" as the ONLY criterion, not whether all steps were executed.
-2) If there is clear evidence that the final deliverable/final outcome has been produced and is usable, set finished=true.
-   - For file deliverables, you MUST verify the file exists by checking that an appropriate OSS link is present in session_files.
+2) Consider the expected deliverable type based on user_request:
+   - If the user asked for a file/output artifact (e.g., PDF/DOCX/ZIP/code project), you MUST verify the file exists by checking
+     that an appropriate OSS link is present in session_files; otherwise finished=false.
+   - If the user asked for "in-chat content" (e.g., search + summarize + tutorial), you should judge completion by whether the final
+     requested content is already present/produced in history_steps outputs (e.g., the assistant/tool produced a complete tutorial/summary).
 3) If any critical step failed, is missing, is still running, or the outputs are insufficient to prove goal completion, set finished=false.
-4) If the information in history_steps and session_files is insufficient to confirm completion (e.g., no final output, only partial logs,
-   or expected output file link is not present in session_files),
+4) If the information in history_steps and session_files is insufficient to confirm completion (e.g., no final summary/tutorial text,
+   only partial logs; or a required output file link is not present in session_files),
    you MUST return finished=false and explain what information is missing in reason.
 5) If there are contradictions in history_steps, prefer the later entries. If you still cannot decide, return finished=false
    and explain the contradiction in reason.
 6) Do NOT assume results that are not explicitly supported by history_steps or session_files. Judge only from verifiable evidence.
+
 # Output Format (very important)
 You must output ONLY ONE JSON object that strictly matches this schema:
 {{
   "finished": true|false,
-  "reason": "A brief, specific explanation in English that cites key evidence from history_steps and/or session_files (e.g., a tool_name status/output or the presence/absence of an OSS link). If not finished, state the critical blocking reason(s) or missing info."
+  "reason": "A brief, specific explanation in English that cites key evidence from history_steps and/or session_files (e.g., a tool_name status/output; or the presence/absence of an OSS link when a file is required). If not finished, state the critical blocking reason(s) or missing info."
 }}
+
 # Output Constraints
 - Output ONLY valid JSON (no Markdown, no code fences, no extra commentary).
 - reason must be an English string and should reference concrete evidence from history_steps and/or session_files.
diff --git a/agents/matmaster_agent/flow_agents/execution_agent/agent.py b/agents/matmaster_agent/flow_agents/execution_agent/agent.py
@@ -19,6 +19,7 @@
 from agents.matmaster_agent.flow_agents.model import PlanStepStatusEnum
 from agents.matmaster_agent.flow_agents.step_utils import (
     get_current_step,
+    get_current_step_validation,
     is_job_submitted_step,
 )
 from agents.matmaster_agent.flow_agents.step_validation_agent.prompt import (
@@ -28,16 +29,16 @@
 from agents.matmaster_agent.flow_agents.utils import (
     check_plan,
     get_agent_for_tool,
-    has_self_check,
 )
 from agents.matmaster_agent.llm_config import MatMasterLlmConfig
 from agents.matmaster_agent.logger import PrefixFilter
 from agents.matmaster_agent.prompt import MatMasterCheckTransferPrompt
 from agents.matmaster_agent.state import (
     CURRENT_STEP,
+    CURRENT_STEP_DESCRIPTION,
+    CURRENT_STEP_TOOL_NAME,
     HISTORY_STEPS,
     PLAN,
-    STEP_DESCRIPTION,
 )
 from agents.matmaster_agent.sub_agents.mapping import (
     MatMasterSubAgentsEnum,
@@ -97,7 +98,7 @@ async def _construct_function_call_ctx(
     ) -> AsyncGenerator[Event, None]:
         current_step = copy.deepcopy(ctx.session.state[CURRENT_STEP])
         current_step_tool_name = current_step['tool_name']
-        current_step_tool_description = current_step[STEP_DESCRIPTION]
+        current_step_tool_description = current_step[CURRENT_STEP_DESCRIPTION]
         current_step['status'] = PlanStepStatusEnum.PROCESS
         yield update_state_event(
             ctx,
@@ -186,9 +187,9 @@ async def _core_execution_agent(
     async def _tool_result_validation(
         self, ctx: InvocationContext
     ) -> AsyncGenerator[Event, None]:
-        current_step_tool_name = ctx.session.state[CURRENT_STEP]['tool_name']
+        current_step_tool_name = ctx.session.state[CURRENT_STEP][CURRENT_STEP_TOOL_NAME]
         current_step_tool_description = ctx.session.state[CURRENT_STEP][
-            STEP_DESCRIPTION
+            CURRENT_STEP_DESCRIPTION
         ]
         user_text = (
             ctx.user_content.parts[0].text
@@ -292,11 +293,11 @@ async def _prepare_retry_other_tool(
         update_plan['steps'][index]['tool_name'] = next_tool
         update_plan['steps'][index]['status'] = PlanStepStatusEnum.PROCESS
         original_description = ctx.session.state[PLAN]['steps'][index][
-            STEP_DESCRIPTION
+            CURRENT_STEP_DESCRIPTION
         ].split('\n\n注意：')[
             0
         ]  # 移除之前的失败原因
-        update_plan['steps'][index][STEP_DESCRIPTION] = original_description
+        update_plan['steps'][index][CURRENT_STEP_DESCRIPTION] = original_description
         yield update_state_event(
             ctx,
             state_delta={
@@ -321,17 +322,17 @@ async def _run_events(self, ctx: InvocationContext) -> AsyncGenerator[Event, Non
         post_execution_step = get_current_step(ctx)
         # 工具调用结果返回【成功】
         if post_execution_step['status'] == PlanStepStatusEnum.SUCCESS:
-            # 对成功的工具调用结果进行校验
-            if has_self_check(post_execution_step['tool_name']):
-                # 校验工具结果
-                async for _tool_result_validation_event in self._tool_result_validation(
-                    ctx
-                ):
-                    yield _tool_result_validation_event
+            # 校验工具结果
+            async for _tool_result_validation_event in self._tool_result_validation(
+                ctx
+            ):
+                yield _tool_result_validation_event
         # 异步任务，直接退出当前函数
         elif post_execution_step['status'] == PlanStepStatusEnum.SUBMITTED:
             return
 
         update_history_steps = copy.deepcopy(ctx.session.state[HISTORY_STEPS])
-        update_history_steps.append(post_execution_step)
+        update_history_steps.append(
+            {**post_execution_step, **get_current_step_validation(ctx)}
+        )
         yield update_state_event(ctx, state_delta={HISTORY_STEPS: update_history_steps})
diff --git a/agents/matmaster_agent/flow_agents/step_utils.py b/agents/matmaster_agent/flow_agents/step_utils.py
@@ -1,13 +1,21 @@
 from google.adk.agents import InvocationContext
 
 from agents.matmaster_agent.flow_agents.model import PlanStepStatusEnum
-from agents.matmaster_agent.state import CURRENT_STEP, CURRENT_STEP_STATUS
+from agents.matmaster_agent.state import (
+    CURRENT_STEP,
+    CURRENT_STEP_STATUS,
+    CURRENT_STEP_VALIDATION,
+)
 
 
 def get_current_step(ctx: InvocationContext):
     return ctx.session.state.get(CURRENT_STEP, {})
 
 
+def get_current_step_validation(ctx: InvocationContext):
+    return ctx.session.state.get(CURRENT_STEP_VALIDATION, {})
+
+
 def is_job_submitted_step(ctx: InvocationContext) -> bool:
     return (
         get_current_step(ctx).get(CURRENT_STEP_STATUS) == PlanStepStatusEnum.SUBMITTED
diff --git a/agents/matmaster_agent/memory/inject_memory_callback.py b/agents/matmaster_agent/memory/inject_memory_callback.py
@@ -16,7 +16,7 @@
 from agents.matmaster_agent.constant import MATMASTER_AGENT_NAME
 from agents.matmaster_agent.logger import PrefixFilter
 from agents.matmaster_agent.services.memory import format_short_term_memory
-from agents.matmaster_agent.state import CURRENT_STEP, STEP_DESCRIPTION
+from agents.matmaster_agent.state import CURRENT_STEP, CURRENT_STEP_DESCRIPTION
 
 logger = logging.getLogger(__name__)
 logger.addFilter(PrefixFilter(MATMASTER_AGENT_NAME))
@@ -31,7 +31,7 @@ def _query_from_request_and_state(
     state = getattr(callback_context, 'state', None)
     if state:
         current_step = state.get(CURRENT_STEP, {})
-        desc = current_step.get(STEP_DESCRIPTION, '')
+        desc = current_step.get(CURRENT_STEP_DESCRIPTION, '')
         if desc:
             return desc.strip()
     # Fallback: last text from contents
diff --git a/agents/matmaster_agent/state.py b/agents/matmaster_agent/state.py
@@ -14,9 +14,9 @@
 CURRENT_STEP = 'current_step'
 CURRENT_STEP_STATUS = 'status'
 CURRENT_STEP_TOOL_NAME = 'tool_name'
+CURRENT_STEP_DESCRIPTION = 'step_description'
+CURRENT_STEP_RESULT = 'step_result'
+CURRENT_STEP_VALIDATION = 'step_validation'
 
 HISTORY_STEPS = 'history_steps'
 FINISHED_STATE = 'finished_state'
-
-# Other Key
-STEP_DESCRIPTION = 'step_description'

Original file line number	Diff line number	Diff line change
`@@ -64,8 +64,8 @@`
`64`	`64`	`)`
`65`	`65`	`from agents.matmaster_agent.state import (`
`66`	`66`	`CURRENT_STEP,`
	`67`	`+ CURRENT_STEP_DESCRIPTION,`
`67`	`68`	`RECOMMEND_PARAMS,`
`68`		`- STEP_DESCRIPTION,`
`69`	`69`	`)`
`70`	`70`	`from agents.matmaster_agent.sub_agents.tools import ALL_TOOLS`
`71`	`71`	`from agents.matmaster_agent.utils.event_utils import (`
`@@ -215,7 +215,7 @@ async def _run_events(self, ctx: InvocationContext) -> AsyncGenerator[Event, Non`
`215`	`215`	`)`
`216`	`216`
`217`	`217`	`self.tool_call_info_agent.instruction = gen_tool_call_info_instruction(`
`218`		`- user_prompt=current_step[STEP_DESCRIPTION],`
	`218`	`+ user_prompt=current_step[CURRENT_STEP_DESCRIPTION],`
`219`	`219`	`agent_prompt=self.instruction,`
`220`	`220`	`tool_doc=tool_doc,`
`221`	`221`	`tool_schema=tool_schema,`