"""ReAct Engine 单元测试 - TDD 第一步""" import json from unittest.mock import AsyncMock, MagicMock import pytest from agentkit.llm.gateway import LLMGateway from agentkit.llm.protocol import LLMResponse, TokenUsage, ToolCall from agentkit.tools.base import Tool # ── Test Helpers ────────────────────────────────────────── class FakeTool(Tool): """用于测试的 Fake Tool""" def __init__( self, name: str = "fake_tool", description: str = "A fake tool for testing", result: dict | None = None, should_fail: bool = False, ): super().__init__(name=name, description=description) self._result = result or {"status": "ok"} self._should_fail = should_fail self.call_count = 0 self.last_kwargs: dict | None = None async def execute(self, **kwargs) -> dict: self.call_count += 1 self.last_kwargs = kwargs if self._should_fail: raise RuntimeError(f"Tool '{self.name}' execution failed") return self._result def make_mock_gateway(responses: list[LLMResponse]) -> LLMGateway: """创建一个 mock LLMGateway,按顺序返回给定响应""" gateway = MagicMock(spec=LLMGateway) gateway.chat = AsyncMock(side_effect=responses) return gateway def make_response( content: str = "", tool_calls: list[ToolCall] | None = None, prompt_tokens: int = 10, completion_tokens: int = 20, ) -> LLMResponse: """快速构造 LLMResponse""" return LLMResponse( content=content, model="test-model", usage=TokenUsage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, ), tool_calls=tool_calls or [], ) # ── Test Classes ────────────────────────────────────────── class TestReActStepSingleCompletion: """单步完成:LLM 直接返回最终答案,无工具调用""" async def test_single_step_returns_final_answer(self): from agentkit.core.react import ReActEngine, ReActResult gateway = make_mock_gateway([ make_response(content="The answer is 42"), ]) engine = ReActEngine(llm_gateway=gateway) result = await engine.execute( messages=[{"role": "user", "content": "What is the answer?"}], ) assert isinstance(result, ReActResult) assert result.output == "The answer is 42" assert result.total_steps == 1 assert len(result.trajectory) == 1 assert result.trajectory[0].action == "final_answer" assert result.trajectory[0].content == "The answer is 42" class TestReActTwoStepCompletion: """两步完成:LLM 先调用工具,然后返回最终答案""" async def test_two_step_with_tool_call(self): from agentkit.core.react import ReActEngine, ReActResult tool = FakeTool(name="calculator", result={"value": 42}) gateway = make_mock_gateway([ make_response( content="", tool_calls=[ToolCall(id="tc_1", name="calculator", arguments={"expr": "6*7"})], ), make_response(content="The result is 42"), ]) engine = ReActEngine(llm_gateway=gateway) result = await engine.execute( messages=[{"role": "user", "content": "Calculate 6*7"}], tools=[tool], ) assert result.output == "The result is 42" assert result.total_steps == 2 assert len(result.trajectory) == 2 # Step 1: tool call assert result.trajectory[0].action == "tool_call" assert result.trajectory[0].tool_name == "calculator" assert result.trajectory[0].arguments == {"expr": "6*7"} assert result.trajectory[0].result == {"value": 42} # Step 2: final answer assert result.trajectory[1].action == "final_answer" assert result.trajectory[1].content == "The result is 42" class TestReActMultiStep: """多步推理:3 步 ReAct 循环,每步调用不同工具""" async def test_three_step_react_loop(self): from agentkit.core.react import ReActEngine search_tool = FakeTool(name="search", result={"results": ["Python is great"]}) calc_tool = FakeTool(name="calculator", result={"value": 100}) gateway = make_mock_gateway([ make_response( content="", tool_calls=[ToolCall(id="tc_1", name="search", arguments={"query": "Python"})], ), make_response( content="", tool_calls=[ToolCall(id="tc_2", name="calculator", arguments={"expr": "10*10"})], ), make_response(content="Based on search and calculation, the answer is 100"), ]) engine = ReActEngine(llm_gateway=gateway) result = await engine.execute( messages=[{"role": "user", "content": "Search and calculate"}], tools=[search_tool, calc_tool], ) assert result.total_steps == 3 assert result.trajectory[0].tool_name == "search" assert result.trajectory[1].tool_name == "calculator" assert result.trajectory[2].action == "final_answer" assert search_tool.call_count == 1 assert calc_tool.call_count == 1 class TestReActMaxSteps: """达到最大步数时返回当前最佳结果""" async def test_max_steps_returns_current_best(self): from agentkit.core.react import ReActEngine tool = FakeTool(name="search", result={"results": ["data"]}) # LLM 一直返回 tool_calls,不会给出 final answer always_tool_response = make_response( content="Thinking...", tool_calls=[ToolCall(id="tc_loop", name="search", arguments={"query": "more"})], ) gateway = make_mock_gateway([always_tool_response] * 20) engine = ReActEngine(llm_gateway=gateway, max_steps=3) result = await engine.execute( messages=[{"role": "user", "content": "Keep searching"}], tools=[tool], ) assert result.total_steps == 3 # 当达到 max_steps 时,应返回最后一步的内容 assert result.output is not None class TestReActToolCallFailure: """工具调用失败:LLM 收到错误信息并调整策略""" async def test_tool_failure_included_in_observation(self): from agentkit.core.react import ReActEngine failing_tool = FakeTool(name="broken_tool", should_fail=True) gateway = make_mock_gateway([ make_response( content="", tool_calls=[ToolCall(id="tc_1", name="broken_tool", arguments={})], ), make_response(content="The tool failed, but here is my best answer"), ]) engine = ReActEngine(llm_gateway=gateway) result = await engine.execute( messages=[{"role": "user", "content": "Use the broken tool"}], tools=[failing_tool], ) assert result.total_steps == 2 # 第一步 tool_call 应记录错误信息 assert result.trajectory[0].action == "tool_call" assert result.trajectory[0].result is not None # 错误信息应包含在结果中 assert "error" in str(result.trajectory[0].result).lower() or "failed" in str(result.trajectory[0].result).lower() # 第二步 LLM 调整策略给出最终答案 assert result.trajectory[1].action == "final_answer" assert result.output == "The tool failed, but here is my best answer" class TestReActFunctionCallingMode: """Function Calling 模式:LLM 返回 tool_calls""" async def test_function_calling_tool_execution(self): from agentkit.core.react import ReActEngine tool = FakeTool(name="weather", result={"temp": 25, "city": "Shanghai"}) gateway = make_mock_gateway([ make_response( content="", tool_calls=[ToolCall(id="tc_1", name="weather", arguments={"city": "Shanghai"})], ), make_response(content="Shanghai temperature is 25°C"), ]) engine = ReActEngine(llm_gateway=gateway) result = await engine.execute( messages=[{"role": "user", "content": "What's the weather?"}], tools=[tool], ) assert result.trajectory[0].tool_name == "weather" assert result.trajectory[0].result == {"temp": 25, "city": "Shanghai"} # 验证 gateway.chat 被调用时传入了 tools 参数 first_call = gateway.chat.call_args_list[0] assert first_call.kwargs.get("tools") is not None or first_call[1].get("tools") is not None class TestReActTextParsingMode: """文本解析模式:LLM 返回包含工具调用模式的文本""" async def test_text_parsing_with_action_pattern(self): from agentkit.core.react import ReActEngine tool = FakeTool(name="search", result={"results": ["found"]}) # LLM 返回文本中包含 Action 模式 gateway = make_mock_gateway([ make_response(content='Action: search({"query": "test"})'), make_response(content="Here is what I found"), ]) engine = ReActEngine(llm_gateway=gateway) result = await engine.execute( messages=[{"role": "user", "content": "Search for test"}], tools=[tool], ) # 文本解析模式应能识别 Action 模式并执行工具 assert result.total_steps == 2 assert result.trajectory[0].action == "tool_call" assert result.trajectory[0].tool_name == "search" async def test_text_parsing_with_code_block_pattern(self): from agentkit.core.react import ReActEngine tool = FakeTool(name="search", result={"results": ["found"]}) tool_call_text = '```tool\n{"name": "search", "arguments": {"query": "test"}}\n```' gateway = make_mock_gateway([ make_response(content=tool_call_text), make_response(content="Search results found"), ]) engine = ReActEngine(llm_gateway=gateway) result = await engine.execute( messages=[{"role": "user", "content": "Search for test"}], tools=[tool], ) assert result.total_steps == 2 assert result.trajectory[0].action == "tool_call" assert result.trajectory[0].tool_name == "search" class TestReActEmptyToolList: """空工具列表:直接生成答案""" async def test_no_tools_direct_answer(self): from agentkit.core.react import ReActEngine gateway = make_mock_gateway([ make_response(content="Direct answer without tools"), ]) engine = ReActEngine(llm_gateway=gateway) result = await engine.execute( messages=[{"role": "user", "content": "Hello"}], tools=None, ) assert result.output == "Direct answer without tools" assert result.total_steps == 1 assert result.trajectory[0].action == "final_answer" class TestReActTrajectoryRecording: """轨迹记录:每步的 action、tool_name、result 正确记录""" async def test_trajectory_records_all_steps(self): from agentkit.core.react import ReActEngine, ReActStep tool_a = FakeTool(name="tool_a", result={"a": 1}) tool_b = FakeTool(name="tool_b", result={"b": 2}) gateway = make_mock_gateway([ make_response( content="Step 1", tool_calls=[ToolCall(id="tc_1", name="tool_a", arguments={"x": 1})], ), make_response( content="Step 2", tool_calls=[ToolCall(id="tc_2", name="tool_b", arguments={"y": 2})], ), make_response(content="Final answer"), ]) engine = ReActEngine(llm_gateway=gateway) result = await engine.execute( messages=[{"role": "user", "content": "Multi-step task"}], tools=[tool_a, tool_b], ) assert len(result.trajectory) == 3 step1 = result.trajectory[0] assert isinstance(step1, ReActStep) assert step1.step == 1 assert step1.action == "tool_call" assert step1.tool_name == "tool_a" assert step1.arguments == {"x": 1} assert step1.result == {"a": 1} step2 = result.trajectory[1] assert step2.step == 2 assert step2.action == "tool_call" assert step2.tool_name == "tool_b" assert step2.arguments == {"y": 2} assert step2.result == {"b": 2} step3 = result.trajectory[2] assert step3.step == 3 assert step3.action == "final_answer" assert step3.content == "Final answer" class TestReActTokenAccumulation: """Token 累积:所有步骤的 token 数应累加""" async def test_total_tokens_accumulated(self): from agentkit.core.react import ReActEngine tool = FakeTool(name="search", result={"results": ["data"]}) gateway = make_mock_gateway([ make_response( content="", tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})], prompt_tokens=100, completion_tokens=50, ), make_response( content="Final answer", prompt_tokens=200, completion_tokens=30, ), ]) engine = ReActEngine(llm_gateway=gateway) result = await engine.execute( messages=[{"role": "user", "content": "Search"}], tools=[tool], ) # 100+50 + 200+30 = 380 assert result.total_tokens == 380 # 每步的 tokens 也应记录 assert result.trajectory[0].tokens == 150 assert result.trajectory[1].tokens == 230 class TestReActSystemPrompt: """System prompt 包含在初始消息中""" async def test_system_prompt_included(self): from agentkit.core.react import ReActEngine gateway = make_mock_gateway([ make_response(content="Response"), ]) engine = ReActEngine(llm_gateway=gateway) await engine.execute( messages=[{"role": "user", "content": "Hello"}], system_prompt="You are a helpful assistant", ) # 验证第一次调用 gateway.chat 时 messages 包含 system prompt first_call = gateway.chat.call_args_list[0] call_kwargs = first_call.kwargs messages = call_kwargs.get("messages", first_call[1].get("messages", [])) assert messages[0]["role"] == "system" assert messages[0]["content"] == "You are a helpful assistant" class TestReActMultipleToolCallsInOneStep: """单步多个工具调用:LLM 在一次响应中返回多个 tool_calls""" async def test_multiple_tool_calls_executed(self): from agentkit.core.react import ReActEngine tool_a = FakeTool(name="tool_a", result={"a": 1}) tool_b = FakeTool(name="tool_b", result={"b": 2}) gateway = make_mock_gateway([ make_response( content="", tool_calls=[ ToolCall(id="tc_1", name="tool_a", arguments={"x": 1}), ToolCall(id="tc_2", name="tool_b", arguments={"y": 2}), ], ), make_response(content="Both tools executed"), ]) engine = ReActEngine(llm_gateway=gateway) result = await engine.execute( messages=[{"role": "user", "content": "Run both tools"}], tools=[tool_a, tool_b], ) # 两个工具都应被执行 assert tool_a.call_count == 1 assert tool_b.call_count == 1 assert result.output == "Both tools executed" class TestReActToolNotFound: """工具未找到:LLM 调用了不存在的工具""" async def test_unknown_tool_returns_error_observation(self): from agentkit.core.react import ReActEngine gateway = make_mock_gateway([ make_response( content="", tool_calls=[ToolCall(id="tc_1", name="nonexistent_tool", arguments={})], ), make_response(content="Tool not found, here is my answer anyway"), ]) engine = ReActEngine(llm_gateway=gateway) result = await engine.execute( messages=[{"role": "user", "content": "Use unknown tool"}], tools=[], # 空工具列表 ) # 第一步应记录工具未找到错误 assert result.trajectory[0].action == "tool_call" assert "error" in str(result.trajectory[0].result).lower() or "not found" in str(result.trajectory[0].result).lower() # LLM 应收到错误信息并调整 assert result.total_steps == 2 assert result.output == "Tool not found, here is my answer anyway" class TestReActTimeout: """ReAct 循环超时:超过 timeout_seconds 后抛出 TaskTimeoutError""" async def test_timeout_raises_task_timeout_error(self): import asyncio from agentkit.core.react import ReActEngine from agentkit.core.exceptions import TaskTimeoutError # LLM 每次调用延迟 0.5s,设置 0.3s 超时 async def slow_chat(**kwargs): await asyncio.sleep(0.5) return make_response(content="slow response") gateway = MagicMock(spec=LLMGateway) gateway.chat = AsyncMock(side_effect=slow_chat) engine = ReActEngine(llm_gateway=gateway) with pytest.raises(TaskTimeoutError): await engine.execute( messages=[{"role": "user", "content": "Slow task"}], timeout_seconds=0.3, ) async def test_timeout_zero_means_no_timeout(self): import asyncio from agentkit.core.react import ReActEngine # LLM 延迟 0.1s,timeout=0 表示无超时 async def slightly_slow_chat(**kwargs): await asyncio.sleep(0.1) return make_response(content="done") gateway = MagicMock(spec=LLMGateway) gateway.chat = AsyncMock(side_effect=slightly_slow_chat) engine = ReActEngine(llm_gateway=gateway) result = await engine.execute( messages=[{"role": "user", "content": "Task"}], timeout_seconds=0, ) assert result.output == "done" assert result.status == "success" async def test_default_timeout_used_when_none(self): import asyncio from agentkit.core.react import ReActEngine from agentkit.core.exceptions import TaskTimeoutError async def slow_chat(**kwargs): await asyncio.sleep(0.5) return make_response(content="slow") gateway = MagicMock(spec=LLMGateway) gateway.chat = AsyncMock(side_effect=slow_chat) # default_timeout=0.3s engine = ReActEngine(llm_gateway=gateway, default_timeout=0.3) with pytest.raises(TaskTimeoutError): await engine.execute( messages=[{"role": "user", "content": "Task"}], timeout_seconds=None, # should use default_timeout ) async def test_normal_completion_unaffected_by_timeout(self): from agentkit.core.react import ReActEngine gateway = make_mock_gateway([ make_response(content="Quick answer"), ]) engine = ReActEngine(llm_gateway=gateway) result = await engine.execute( messages=[{"role": "user", "content": "Quick task"}], timeout_seconds=300, ) assert result.output == "Quick answer" assert result.status == "success" class TestReActCancellation: """ReAct 循环取消:CancellationToken 取消后抛出 TaskCancelledError""" async def test_cancel_raises_task_cancelled_error(self): import asyncio from agentkit.core.react import ReActEngine from agentkit.core.protocol import CancellationToken from agentkit.core.exceptions import TaskCancelledError call_count = 0 async def counting_chat(**kwargs): nonlocal call_count call_count += 1 if call_count >= 2: # Simulate cancel after second LLM call pass return make_response(content="response") gateway = MagicMock(spec=LLMGateway) gateway.chat = AsyncMock(side_effect=counting_chat) engine = ReActEngine(llm_gateway=gateway) token = CancellationToken() # Cancel before execution starts token.cancel() with pytest.raises(TaskCancelledError): await engine.execute( messages=[{"role": "user", "content": "Task"}], cancellation_token=token, ) async def test_cancel_mid_execution(self): import asyncio from agentkit.core.react import ReActEngine from agentkit.core.protocol import CancellationToken from agentkit.core.exceptions import TaskCancelledError token = CancellationToken() call_count = 0 async def chat_with_cancel(**kwargs): nonlocal call_count call_count += 1 # Cancel after first call if call_count >= 1: token.cancel() # First call returns tool call, second would be final return make_response( content="", tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})], ) tool = FakeTool(name="search", result={"results": ["data"]}) gateway = MagicMock(spec=LLMGateway) gateway.chat = AsyncMock(side_effect=chat_with_cancel) engine = ReActEngine(llm_gateway=gateway) with pytest.raises(TaskCancelledError): await engine.execute( messages=[{"role": "user", "content": "Search"}], tools=[tool], cancellation_token=token, ) async def test_no_cancel_token_works_normally(self): from agentkit.core.react import ReActEngine gateway = make_mock_gateway([ make_response(content="Normal answer"), ]) engine = ReActEngine(llm_gateway=gateway) result = await engine.execute( messages=[{"role": "user", "content": "Normal task"}], # No cancellation_token ) assert result.output == "Normal answer" assert result.status == "success" async def test_uncancelled_token_works_normally(self): from agentkit.core.react import ReActEngine from agentkit.core.protocol import CancellationToken gateway = make_mock_gateway([ make_response(content="Answer"), ]) engine = ReActEngine(llm_gateway=gateway) token = CancellationToken() # Not cancelled result = await engine.execute( messages=[{"role": "user", "content": "Task"}], cancellation_token=token, ) assert result.output == "Answer" assert result.status == "success"