"""PlanExecEngine 单元测试 测试 Plan-and-Execute 执行引擎适配器: 1. 3步任务: plan → execute steps → aggregate 2. 步骤失败时触发重规划 3. 接口兼容性(与 ReActEngine 一致) 4. CancellationToken 取消 """ import asyncio import json from datetime import datetime, timezone from unittest.mock import AsyncMock, MagicMock, patch import pytest from agentkit.core.plan_exec_engine import PlanExecEngine from agentkit.core.plan_executor import PlanExecutionResult, StepExecutionResult from agentkit.core.plan_schema import ExecutionPlan, PlanStep, PlanStepStatus from agentkit.core.protocol import CancellationToken, TaskMessage, TaskResult, TaskStatus from agentkit.core.react import ReActEvent, ReActResult, ReActStep from agentkit.orchestrator.pipeline_schema import ( Pipeline, PipelineResult, PipelineStage, ReflectionReport, StageResult, StageStatus, ) # ── Test Helpers ────────────────────────────────────────── def make_plan( goal: str = "test goal", steps: list[PlanStep] | None = None, parallel_groups: list[list[str]] | None = None, ) -> ExecutionPlan: """快速构造 ExecutionPlan""" if steps is None: steps = [ PlanStep(step_id="step-0", name="Step 0", description="First step"), PlanStep(step_id="step-1", name="Step 1", description="Second step", dependencies=["step-0"]), PlanStep(step_id="step-2", name="Step 2", description="Final step", dependencies=["step-1"]), ] if parallel_groups is None: parallel_groups = [["step-0"], ["step-1"], ["step-2"]] return ExecutionPlan( goal=goal, steps=steps, parallel_groups=parallel_groups, ) def make_step_result( step_id: str, status: PlanStepStatus = PlanStepStatus.COMPLETED, result: dict | None = None, error: str | None = None, ) -> StepExecutionResult: """快速构造 StepExecutionResult""" return StepExecutionResult( step_id=step_id, status=status, result=result or {"content": f"result of {step_id}"}, error=error, ) def make_plan_result( plan_id: str = "test-plan", step_results: dict[str, StepExecutionResult] | None = None, status: TaskStatus = TaskStatus.COMPLETED, ) -> PlanExecutionResult: """快速构造 PlanExecutionResult""" if step_results is None: step_results = { "step-0": make_step_result("step-0"), "step-1": make_step_result("step-1"), "step-2": make_step_result("step-2"), } return PlanExecutionResult( plan_id=plan_id, step_results=step_results, status=status, total_duration_ms=100.0, ) def make_reflection_report( failed_stage: str = "step-1", failure_type: str = "logic_error", root_cause: str = "Test failure", suggested_fix: str = "Retry with adjusted parameters", ) -> ReflectionReport: """快速构造 ReflectionReport""" return ReflectionReport( failure_type=failure_type, root_cause=root_cause, suggested_fix=suggested_fix, failed_stage=failed_stage, reflection_number=1, ) def make_revised_pipeline( original_pipeline: Pipeline, failed_stage: str = "step-1", ) -> Pipeline: """构造修正后的 Pipeline""" new_stages = [] for stage in original_pipeline.stages: if stage.name == failed_stage: new_stages.append(PipelineStage( name=stage.name, agent=stage.agent, action=f"Revised: {stage.action}", depends_on=stage.depends_on, inputs=stage.inputs, )) else: new_stages.append(stage) return Pipeline( name=f"{original_pipeline.name}_replanned", version=original_pipeline.version, description=original_pipeline.description, stages=new_stages, ) # ── Test: 3-step task ──────────────────────────────────── class TestThreeStepTask: """测试 3 步任务: plan → execute steps → aggregate""" async def test_execute_returns_react_result(self): """execute() 应返回 ReActResult""" engine = PlanExecEngine(llm_gateway=None) # Mock GoalPlanner plan = make_plan() with patch.object(engine._planner, "generate_plan", AsyncMock(return_value=plan)): # Mock PlanExecutor plan_result = make_plan_result() with patch("agentkit.core.plan_exec_engine.PlanExecutor") as MockExecutor: mock_executor_instance = MagicMock() mock_executor_instance.execute = AsyncMock(return_value=plan_result) MockExecutor.return_value = mock_executor_instance result = await engine.execute( messages=[{"role": "user", "content": "调研3个竞品并生成报告"}], ) assert isinstance(result, ReActResult) assert result.output # 有输出 assert result.total_steps > 0 assert result.total_tokens >= 0 assert result.status in ("success", "partial", "error", "cancelled", "timeout") async def test_execute_trajectory_contains_plan_and_steps(self): """trajectory 应包含 plan_generated 和步骤完成记录""" engine = PlanExecEngine(llm_gateway=None) plan = make_plan() plan_result = make_plan_result() with patch.object(engine._planner, "generate_plan", AsyncMock(return_value=plan)): with patch("agentkit.core.plan_exec_engine.PlanExecutor") as MockExecutor: mock_executor_instance = MagicMock() mock_executor_instance.execute = AsyncMock(return_value=plan_result) MockExecutor.return_value = mock_executor_instance result = await engine.execute( messages=[{"role": "user", "content": "调研3个竞品并生成报告"}], ) # trajectory 应包含: plan_generated + 3 step_completed + final_answer actions = [s.action for s in result.trajectory] assert "plan_generated" in actions assert "final_answer" in actions # 3 个步骤完成 step_completed_count = sum(1 for a in actions if a == "step_completed") assert step_completed_count == 3 async def test_execute_aggregates_step_results(self): """最终输出应聚合所有成功步骤的结果""" engine = PlanExecEngine(llm_gateway=None) plan = make_plan() step_results = { "step-0": make_step_result("step-0", result={"data": "research result"}), "step-1": make_step_result("step-1", result={"data": "analysis result"}), "step-2": make_step_result("step-2", result={"data": "report result"}), } plan_result = make_plan_result(step_results=step_results) with patch.object(engine._planner, "generate_plan", AsyncMock(return_value=plan)): with patch("agentkit.core.plan_exec_engine.PlanExecutor") as MockExecutor: mock_executor_instance = MagicMock() mock_executor_instance.execute = AsyncMock(return_value=plan_result) MockExecutor.return_value = mock_executor_instance result = await engine.execute( messages=[{"role": "user", "content": "调研3个竞品并生成报告"}], ) # 输出应包含所有步骤的结果 assert "research result" in result.output assert "analysis result" in result.output assert "report result" in result.output async def test_execute_stream_yields_events(self): """execute_stream() 应 yield 正确的事件序列""" engine = PlanExecEngine(llm_gateway=None) plan = make_plan() plan_result = make_plan_result() with patch.object(engine._planner, "generate_plan", AsyncMock(return_value=plan)): with patch("agentkit.core.plan_exec_engine.PlanExecutor") as MockExecutor: mock_executor_instance = MagicMock() mock_executor_instance.execute = AsyncMock(return_value=plan_result) MockExecutor.return_value = mock_executor_instance events = [] async for event in engine.execute_stream( messages=[{"role": "user", "content": "调研3个竞品并生成报告"}], ): events.append(event) event_types = [e.event_type for e in events] assert "planning" in event_types assert "plan_generated" in event_types assert "step_executing" in event_types assert "step_completed" in event_types assert "final_answer" in event_types async def test_execute_stream_final_answer_event(self): """final_answer 事件应包含输出和元数据""" engine = PlanExecEngine(llm_gateway=None) plan = make_plan() plan_result = make_plan_result() with patch.object(engine._planner, "generate_plan", AsyncMock(return_value=plan)): with patch("agentkit.core.plan_exec_engine.PlanExecutor") as MockExecutor: mock_executor_instance = MagicMock() mock_executor_instance.execute = AsyncMock(return_value=plan_result) MockExecutor.return_value = mock_executor_instance events = [] async for event in engine.execute_stream( messages=[{"role": "user", "content": "调研3个竞品并生成报告"}], ): events.append(event) final_event = [e for e in events if e.event_type == "final_answer"][0] assert "output" in final_event.data assert "total_steps" in final_event.data assert "total_tokens" in final_event.data assert "plan_id" in final_event.data # ── Test: Replanning ───────────────────────────────────── class TestReplanning: """测试步骤失败时触发重规划""" async def test_replanning_triggered_on_step_failure(self): """步骤失败时应触发重规划""" engine = PlanExecEngine(llm_gateway=None, max_replans=1) plan = make_plan() # 第一次执行:step-1 失败 failed_step_results = { "step-0": make_step_result("step-0"), "step-1": make_step_result("step-1", status=PlanStepStatus.FAILED, result=None, error="Agent error"), "step-2": make_step_result("step-2", status=PlanStepStatus.SKIPPED, error="Skipped due to dependency"), } first_result = make_plan_result(step_results=failed_step_results, status=TaskStatus.PARTIALLY_COMPLETED) # 重规划后的第二次执行:全部成功 second_result = make_plan_result() # Mock GoalPlanner with patch.object(engine._planner, "generate_plan", AsyncMock(return_value=plan)): # Mock PlanExecutor — 第一次失败,第二次成功 with patch("agentkit.core.plan_exec_engine.PlanExecutor") as MockExecutor: mock_executor_instance = MagicMock() mock_executor_instance.execute = AsyncMock(side_effect=[first_result, second_result]) MockExecutor.return_value = mock_executor_instance # Mock PipelineReflector report = make_reflection_report() with patch.object(engine._reflector, "reflect", AsyncMock(return_value=report)): # Mock PipelineReplanner pipeline = engine._plan_to_pipeline(plan, "") revised_pipeline = make_revised_pipeline(pipeline) with patch.object(engine._replanner, "replan", AsyncMock(return_value=revised_pipeline)): result = await engine.execute( messages=[{"role": "user", "content": "调研3个竞品并生成报告"}], ) # 应有重规划步骤 actions = [s.action for s in result.trajectory] assert "replanning" in actions # 最终结果应该是成功的(重规划后) assert result.status == "success" async def test_replanning_stream_yields_replanning_event(self): """流式执行中重规划应 yield replanning 事件""" engine = PlanExecEngine(llm_gateway=None, max_replans=1) plan = make_plan() failed_step_results = { "step-0": make_step_result("step-0"), "step-1": make_step_result("step-1", status=PlanStepStatus.FAILED, result=None, error="Agent error"), "step-2": make_step_result("step-2", status=PlanStepStatus.SKIPPED, error="Skipped"), } first_result = make_plan_result(step_results=failed_step_results, status=TaskStatus.PARTIALLY_COMPLETED) second_result = make_plan_result() with patch.object(engine._planner, "generate_plan", AsyncMock(return_value=plan)): with patch("agentkit.core.plan_exec_engine.PlanExecutor") as MockExecutor: mock_executor_instance = MagicMock() mock_executor_instance.execute = AsyncMock(side_effect=[first_result, second_result]) MockExecutor.return_value = mock_executor_instance report = make_reflection_report() with patch.object(engine._reflector, "reflect", AsyncMock(return_value=report)): pipeline = engine._plan_to_pipeline(plan, "") revised_pipeline = make_revised_pipeline(pipeline) with patch.object(engine._replanner, "replan", AsyncMock(return_value=revised_pipeline)): events = [] async for event in engine.execute_stream( messages=[{"role": "user", "content": "调研3个竞品并生成报告"}], ): events.append(event) event_types = [e.event_type for e in events] assert "replanning" in event_types async def test_max_replans_exhausted_returns_partial(self): """重规划次数耗尽后应返回部分结果""" engine = PlanExecEngine(llm_gateway=None, max_replans=1) plan = make_plan() # 两次执行都失败 failed_step_results = { "step-0": make_step_result("step-0"), "step-1": make_step_result("step-1", status=PlanStepStatus.FAILED, result=None, error="Persistent error"), "step-2": make_step_result("step-2", status=PlanStepStatus.SKIPPED, error="Skipped"), } failed_result = make_plan_result(step_results=failed_step_results, status=TaskStatus.PARTIALLY_COMPLETED) with patch.object(engine._planner, "generate_plan", AsyncMock(return_value=plan)): with patch("agentkit.core.plan_exec_engine.PlanExecutor") as MockExecutor: mock_executor_instance = MagicMock() mock_executor_instance.execute = AsyncMock(return_value=failed_result) MockExecutor.return_value = mock_executor_instance report = make_reflection_report() with patch.object(engine._reflector, "reflect", AsyncMock(return_value=report)): pipeline = engine._plan_to_pipeline(plan, "") revised_pipeline = make_revised_pipeline(pipeline) with patch.object(engine._replanner, "replan", AsyncMock(return_value=revised_pipeline)): result = await engine.execute( messages=[{"role": "user", "content": "调研3个竞品并生成报告"}], ) # 应该是 partial 状态 assert result.status == "partial" # 输出应包含失败信息 assert "failed" in result.output.lower() or "error" in result.output.lower() or "step-0" in result.output async def test_all_steps_failed_returns_error_status(self): """所有步骤失败时应返回 error 状态""" engine = PlanExecEngine(llm_gateway=None, max_replans=0) plan = make_plan() all_failed_results = { "step-0": make_step_result("step-0", status=PlanStepStatus.FAILED, result=None, error="Error 0"), "step-1": make_step_result("step-1", status=PlanStepStatus.SKIPPED, error="Skipped"), "step-2": make_step_result("step-2", status=PlanStepStatus.SKIPPED, error="Skipped"), } failed_result = make_plan_result(step_results=all_failed_results, status=TaskStatus.FAILED) with patch.object(engine._planner, "generate_plan", AsyncMock(return_value=plan)): with patch("agentkit.core.plan_exec_engine.PlanExecutor") as MockExecutor: mock_executor_instance = MagicMock() mock_executor_instance.execute = AsyncMock(return_value=failed_result) MockExecutor.return_value = mock_executor_instance result = await engine.execute( messages=[{"role": "user", "content": "调研3个竞品并生成报告"}], ) assert result.status == "error" # ── Test: Interface Compatibility ───────────────────────── class TestInterfaceCompatibility: """测试与 ReActEngine 接口兼容性""" async def test_execute_signature_compatible(self): """execute() 签名应与 ReActEngine 一致""" import inspect from agentkit.core.react import ReActEngine react_sig = inspect.signature(ReActEngine.execute) plan_exec_sig = inspect.signature(PlanExecEngine.execute) react_params = list(react_sig.parameters.keys()) plan_exec_params = list(plan_exec_sig.parameters.keys()) assert react_params == plan_exec_params, ( f"Parameter mismatch: ReActEngine has {react_params}, " f"PlanExecEngine has {plan_exec_params}" ) async def test_execute_stream_signature_compatible(self): """execute_stream() 签名应与 ReActEngine 一致""" import inspect from agentkit.core.react import ReActEngine react_sig = inspect.signature(ReActEngine.execute_stream) plan_exec_sig = inspect.signature(PlanExecEngine.execute_stream) react_params = list(react_sig.parameters.keys()) plan_exec_params = list(plan_exec_sig.parameters.keys()) assert react_params == plan_exec_params, ( f"Parameter mismatch: ReActEngine has {react_params}, " f"PlanExecEngine has {plan_exec_params}" ) async def test_returns_react_result(self): """execute() 应返回 ReActResult 实例""" engine = PlanExecEngine(llm_gateway=None) plan = make_plan() plan_result = make_plan_result() with patch.object(engine._planner, "generate_plan", AsyncMock(return_value=plan)): with patch("agentkit.core.plan_exec_engine.PlanExecutor") as MockExecutor: mock_executor_instance = MagicMock() mock_executor_instance.execute = AsyncMock(return_value=plan_result) MockExecutor.return_value = mock_executor_instance result = await engine.execute( messages=[{"role": "user", "content": "test"}], ) assert isinstance(result, ReActResult) assert hasattr(result, "output") assert hasattr(result, "trajectory") assert hasattr(result, "total_steps") assert hasattr(result, "total_tokens") assert hasattr(result, "status") async def test_stream_yields_react_events(self): """execute_stream() 应 yield ReActEvent 实例""" engine = PlanExecEngine(llm_gateway=None) plan = make_plan() plan_result = make_plan_result() with patch.object(engine._planner, "generate_plan", AsyncMock(return_value=plan)): with patch("agentkit.core.plan_exec_engine.PlanExecutor") as MockExecutor: mock_executor_instance = MagicMock() mock_executor_instance.execute = AsyncMock(return_value=plan_result) MockExecutor.return_value = mock_executor_instance async for event in engine.execute_stream( messages=[{"role": "user", "content": "test"}], ): assert isinstance(event, ReActEvent) assert hasattr(event, "event_type") assert hasattr(event, "step") assert hasattr(event, "data") assert hasattr(event, "timestamp") async def test_trajectory_contains_react_steps(self): """trajectory 中的元素应为 ReActStep 实例""" engine = PlanExecEngine(llm_gateway=None) plan = make_plan() plan_result = make_plan_result() with patch.object(engine._planner, "generate_plan", AsyncMock(return_value=plan)): with patch("agentkit.core.plan_exec_engine.PlanExecutor") as MockExecutor: mock_executor_instance = MagicMock() mock_executor_instance.execute = AsyncMock(return_value=plan_result) MockExecutor.return_value = mock_executor_instance result = await engine.execute( messages=[{"role": "user", "content": "test"}], ) for step in result.trajectory: assert isinstance(step, ReActStep) # ── Test: Cancellation ─────────────────────────────────── class TestCancellationToken: """测试 CancellationToken 取消""" async def test_cancelled_before_planning(self): """在规划前取消应抛出 TaskCancelledError""" from agentkit.core.exceptions import TaskCancelledError engine = PlanExecEngine(llm_gateway=None) token = CancellationToken() token.cancel() with pytest.raises(TaskCancelledError): await engine.execute( messages=[{"role": "user", "content": "test"}], cancellation_token=token, ) async def test_cancelled_during_execution(self): """在执行过程中取消应抛出 TaskCancelledError""" from agentkit.core.exceptions import TaskCancelledError engine = PlanExecEngine(llm_gateway=None) token = CancellationToken() plan = make_plan() # 让 generate_plan 正常执行,但在执行循环中取消 call_count = 0 async def mock_generate_plan(*args, **kwargs): return plan async def mock_execute(plan_arg, task_msg): nonlocal call_count call_count += 1 if call_count == 1: # 第一次调用后取消 token.cancel() # 模拟 PlanExecutor 内部在 execute 完成后检查取消 # 这里返回结果,取消会在下一轮循环检查时生效 return make_plan_result(step_results={ "step-0": make_step_result("step-0", status=PlanStepStatus.FAILED, error="fail"), "step-1": make_step_result("step-1", status=PlanStepStatus.SKIPPED, error="Skipped"), "step-2": make_step_result("step-2", status=PlanStepStatus.SKIPPED, error="Skipped"), }, status=TaskStatus.FAILED) with patch.object(engine._planner, "generate_plan", AsyncMock(side_effect=mock_generate_plan)): with patch("agentkit.core.plan_exec_engine.PlanExecutor") as MockExecutor: mock_executor_instance = MagicMock() mock_executor_instance.execute = AsyncMock(side_effect=mock_execute) MockExecutor.return_value = mock_executor_instance # 因为取消发生在 replanning 循环的检查点 with pytest.raises(TaskCancelledError): await engine.execute( messages=[{"role": "user", "content": "test"}], cancellation_token=token, ) async def test_stream_cancelled(self): """流式执行中取消应抛出 TaskCancelledError""" from agentkit.core.exceptions import TaskCancelledError engine = PlanExecEngine(llm_gateway=None) token = CancellationToken() token.cancel() with pytest.raises(TaskCancelledError): async for _ in engine.execute_stream( messages=[{"role": "user", "content": "test"}], cancellation_token=token, ): pass # ── Test: Timeout ──────────────────────────────────────── class TestTimeout: """测试超时处理""" async def test_timeout_raises_task_timeout_error(self): """超时应抛出 TaskTimeoutError""" from agentkit.core.exceptions import TaskTimeoutError engine = PlanExecEngine(llm_gateway=None) plan = make_plan() async def slow_generate_plan(*args, **kwargs): await asyncio.sleep(10) # 模拟慢速规划 return plan with patch.object(engine._planner, "generate_plan", AsyncMock(side_effect=slow_generate_plan)): with pytest.raises(TaskTimeoutError): await engine.execute( messages=[{"role": "user", "content": "test"}], timeout_seconds=0.1, ) # ── Test: Helper Methods ──────────────────────────────── class TestHelperMethods: """测试辅助方法""" def test_extract_goal(self): """应从消息中提取用户目标""" messages = [ {"role": "system", "content": "You are a helpful assistant"}, {"role": "user", "content": "调研3个竞品"}, ] goal = PlanExecEngine._extract_goal(messages) assert goal == "调研3个竞品" def test_extract_goal_empty_messages(self): """空消息应返回空字符串""" assert PlanExecEngine._extract_goal([]) == "" def test_extract_skill_names(self): """应从工具列表中提取名称""" from agentkit.tools.base import Tool class FakeTool(Tool): async def execute(self, **kwargs): return {} tools = [FakeTool(name="search", description="search tool"), FakeTool(name="analyze", description="analyze tool")] names = PlanExecEngine._extract_skill_names(tools) assert names == ["search", "analyze"] def test_extract_skill_names_none(self): """None 工具列表应返回空列表""" assert PlanExecEngine._extract_skill_names(None) == [] def test_aggregate_output_completed(self): """成功步骤应聚合到输出""" plan = make_plan() plan_result = make_plan_result() output = PlanExecEngine._aggregate_output(plan, plan_result) assert "Step 0" in output assert "Step 1" in output assert "Step 2" in output def test_aggregate_output_all_failed(self): """全部失败应返回失败信息""" plan = make_plan() step_results = { "step-0": make_step_result("step-0", status=PlanStepStatus.FAILED, error="Error 0"), "step-1": make_step_result("step-1", status=PlanStepStatus.SKIPPED, error="Skipped"), "step-2": make_step_result("step-2", status=PlanStepStatus.SKIPPED, error="Skipped"), } plan_result = make_plan_result(step_results=step_results, status=TaskStatus.FAILED) output = PlanExecEngine._aggregate_output(plan, plan_result) assert "failed" in output.lower() def test_plan_to_pipeline_conversion(self): """ExecutionPlan 应正确转换为 Pipeline""" plan = make_plan() pipeline = PlanExecEngine._plan_to_pipeline(plan, "test_agent") assert pipeline.name.startswith("plan_") assert len(pipeline.stages) == 3 assert pipeline.stages[0].name == "step-0" assert pipeline.stages[1].depends_on == ["step-0"] def test_pipeline_to_plan_conversion(self): """Pipeline 应正确转回 ExecutionPlan""" plan = make_plan() pipeline = PlanExecEngine._plan_to_pipeline(plan, "test_agent") converted = PlanExecEngine._pipeline_to_plan(pipeline, plan.goal) assert converted.goal == plan.goal assert len(converted.steps) == 3 def test_merge_completed_results(self): """已完成步骤结果应合并到新计划""" plan = make_plan() plan_result = make_plan_result(step_results={ "step-0": make_step_result("step-0", result={"data": "done"}), "step-1": make_step_result("step-1", status=PlanStepStatus.FAILED, error="fail"), "step-2": make_step_result("step-2", status=PlanStepStatus.SKIPPED, error="skip"), }) PlanExecEngine._merge_completed_results(plan, plan_result) assert plan.get_step("step-0").status == PlanStepStatus.COMPLETED assert plan.get_step("step-0").result == {"data": "done"} assert plan.get_step("step-2").status == PlanStepStatus.SKIPPED