478 lines
17 KiB
Python
478 lines
17 KiB
Python
"""ReAct Engine 单元测试 - TDD 第一步"""
|
||
|
||
import json
|
||
from unittest.mock import AsyncMock, MagicMock
|
||
|
||
import pytest
|
||
|
||
from agentkit.llm.gateway import LLMGateway
|
||
from agentkit.llm.protocol import LLMResponse, TokenUsage, ToolCall
|
||
from agentkit.tools.base import Tool
|
||
|
||
|
||
# ── Test Helpers ──────────────────────────────────────────
|
||
|
||
|
||
class FakeTool(Tool):
|
||
"""用于测试的 Fake Tool"""
|
||
|
||
def __init__(
|
||
self,
|
||
name: str = "fake_tool",
|
||
description: str = "A fake tool for testing",
|
||
result: dict | None = None,
|
||
should_fail: bool = False,
|
||
):
|
||
super().__init__(name=name, description=description)
|
||
self._result = result or {"status": "ok"}
|
||
self._should_fail = should_fail
|
||
self.call_count = 0
|
||
self.last_kwargs: dict | None = None
|
||
|
||
async def execute(self, **kwargs) -> dict:
|
||
self.call_count += 1
|
||
self.last_kwargs = kwargs
|
||
if self._should_fail:
|
||
raise RuntimeError(f"Tool '{self.name}' execution failed")
|
||
return self._result
|
||
|
||
|
||
def make_mock_gateway(responses: list[LLMResponse]) -> LLMGateway:
|
||
"""创建一个 mock LLMGateway,按顺序返回给定响应"""
|
||
gateway = MagicMock(spec=LLMGateway)
|
||
gateway.chat = AsyncMock(side_effect=responses)
|
||
return gateway
|
||
|
||
|
||
def make_response(
|
||
content: str = "",
|
||
tool_calls: list[ToolCall] | None = None,
|
||
prompt_tokens: int = 10,
|
||
completion_tokens: int = 20,
|
||
) -> LLMResponse:
|
||
"""快速构造 LLMResponse"""
|
||
return LLMResponse(
|
||
content=content,
|
||
model="test-model",
|
||
usage=TokenUsage(
|
||
prompt_tokens=prompt_tokens,
|
||
completion_tokens=completion_tokens,
|
||
),
|
||
tool_calls=tool_calls or [],
|
||
)
|
||
|
||
|
||
# ── Test Classes ──────────────────────────────────────────
|
||
|
||
|
||
class TestReActStepSingleCompletion:
|
||
"""单步完成:LLM 直接返回最终答案,无工具调用"""
|
||
|
||
async def test_single_step_returns_final_answer(self):
|
||
from agentkit.core.react import ReActEngine, ReActResult
|
||
|
||
gateway = make_mock_gateway([
|
||
make_response(content="The answer is 42"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "What is the answer?"}],
|
||
)
|
||
|
||
assert isinstance(result, ReActResult)
|
||
assert result.output == "The answer is 42"
|
||
assert result.total_steps == 1
|
||
assert len(result.trajectory) == 1
|
||
assert result.trajectory[0].action == "final_answer"
|
||
assert result.trajectory[0].content == "The answer is 42"
|
||
|
||
|
||
class TestReActTwoStepCompletion:
|
||
"""两步完成:LLM 先调用工具,然后返回最终答案"""
|
||
|
||
async def test_two_step_with_tool_call(self):
|
||
from agentkit.core.react import ReActEngine, ReActResult
|
||
|
||
tool = FakeTool(name="calculator", result={"value": 42})
|
||
gateway = make_mock_gateway([
|
||
make_response(
|
||
content="",
|
||
tool_calls=[ToolCall(id="tc_1", name="calculator", arguments={"expr": "6*7"})],
|
||
),
|
||
make_response(content="The result is 42"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Calculate 6*7"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
assert result.output == "The result is 42"
|
||
assert result.total_steps == 2
|
||
assert len(result.trajectory) == 2
|
||
# Step 1: tool call
|
||
assert result.trajectory[0].action == "tool_call"
|
||
assert result.trajectory[0].tool_name == "calculator"
|
||
assert result.trajectory[0].arguments == {"expr": "6*7"}
|
||
assert result.trajectory[0].result == {"value": 42}
|
||
# Step 2: final answer
|
||
assert result.trajectory[1].action == "final_answer"
|
||
assert result.trajectory[1].content == "The result is 42"
|
||
|
||
|
||
class TestReActMultiStep:
|
||
"""多步推理:3 步 ReAct 循环,每步调用不同工具"""
|
||
|
||
async def test_three_step_react_loop(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
search_tool = FakeTool(name="search", result={"results": ["Python is great"]})
|
||
calc_tool = FakeTool(name="calculator", result={"value": 100})
|
||
|
||
gateway = make_mock_gateway([
|
||
make_response(
|
||
content="",
|
||
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"query": "Python"})],
|
||
),
|
||
make_response(
|
||
content="",
|
||
tool_calls=[ToolCall(id="tc_2", name="calculator", arguments={"expr": "10*10"})],
|
||
),
|
||
make_response(content="Based on search and calculation, the answer is 100"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Search and calculate"}],
|
||
tools=[search_tool, calc_tool],
|
||
)
|
||
|
||
assert result.total_steps == 3
|
||
assert result.trajectory[0].tool_name == "search"
|
||
assert result.trajectory[1].tool_name == "calculator"
|
||
assert result.trajectory[2].action == "final_answer"
|
||
assert search_tool.call_count == 1
|
||
assert calc_tool.call_count == 1
|
||
|
||
|
||
class TestReActMaxSteps:
|
||
"""达到最大步数时返回当前最佳结果"""
|
||
|
||
async def test_max_steps_returns_current_best(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
tool = FakeTool(name="search", result={"results": ["data"]})
|
||
|
||
# LLM 一直返回 tool_calls,不会给出 final answer
|
||
always_tool_response = make_response(
|
||
content="Thinking...",
|
||
tool_calls=[ToolCall(id="tc_loop", name="search", arguments={"query": "more"})],
|
||
)
|
||
gateway = make_mock_gateway([always_tool_response] * 20)
|
||
engine = ReActEngine(llm_gateway=gateway, max_steps=3)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Keep searching"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
assert result.total_steps == 3
|
||
# 当达到 max_steps 时,应返回最后一步的内容
|
||
assert result.output is not None
|
||
|
||
|
||
class TestReActToolCallFailure:
|
||
"""工具调用失败:LLM 收到错误信息并调整策略"""
|
||
|
||
async def test_tool_failure_included_in_observation(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
failing_tool = FakeTool(name="broken_tool", should_fail=True)
|
||
gateway = make_mock_gateway([
|
||
make_response(
|
||
content="",
|
||
tool_calls=[ToolCall(id="tc_1", name="broken_tool", arguments={})],
|
||
),
|
||
make_response(content="The tool failed, but here is my best answer"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Use the broken tool"}],
|
||
tools=[failing_tool],
|
||
)
|
||
|
||
assert result.total_steps == 2
|
||
# 第一步 tool_call 应记录错误信息
|
||
assert result.trajectory[0].action == "tool_call"
|
||
assert result.trajectory[0].result is not None
|
||
# 错误信息应包含在结果中
|
||
assert "error" in str(result.trajectory[0].result).lower() or "failed" in str(result.trajectory[0].result).lower()
|
||
# 第二步 LLM 调整策略给出最终答案
|
||
assert result.trajectory[1].action == "final_answer"
|
||
assert result.output == "The tool failed, but here is my best answer"
|
||
|
||
|
||
class TestReActFunctionCallingMode:
|
||
"""Function Calling 模式:LLM 返回 tool_calls"""
|
||
|
||
async def test_function_calling_tool_execution(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
tool = FakeTool(name="weather", result={"temp": 25, "city": "Shanghai"})
|
||
gateway = make_mock_gateway([
|
||
make_response(
|
||
content="",
|
||
tool_calls=[ToolCall(id="tc_1", name="weather", arguments={"city": "Shanghai"})],
|
||
),
|
||
make_response(content="Shanghai temperature is 25°C"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "What's the weather?"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
assert result.trajectory[0].tool_name == "weather"
|
||
assert result.trajectory[0].result == {"temp": 25, "city": "Shanghai"}
|
||
# 验证 gateway.chat 被调用时传入了 tools 参数
|
||
first_call = gateway.chat.call_args_list[0]
|
||
assert first_call.kwargs.get("tools") is not None or first_call[1].get("tools") is not None
|
||
|
||
|
||
class TestReActTextParsingMode:
|
||
"""文本解析模式:LLM 返回包含工具调用模式的文本"""
|
||
|
||
async def test_text_parsing_with_action_pattern(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
tool = FakeTool(name="search", result={"results": ["found"]})
|
||
# LLM 返回文本中包含 Action 模式
|
||
gateway = make_mock_gateway([
|
||
make_response(content='Action: search({"query": "test"})'),
|
||
make_response(content="Here is what I found"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Search for test"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
# 文本解析模式应能识别 Action 模式并执行工具
|
||
assert result.total_steps == 2
|
||
assert result.trajectory[0].action == "tool_call"
|
||
assert result.trajectory[0].tool_name == "search"
|
||
|
||
async def test_text_parsing_with_code_block_pattern(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
tool = FakeTool(name="search", result={"results": ["found"]})
|
||
tool_call_text = '```tool\n{"name": "search", "arguments": {"query": "test"}}\n```'
|
||
gateway = make_mock_gateway([
|
||
make_response(content=tool_call_text),
|
||
make_response(content="Search results found"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Search for test"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
assert result.total_steps == 2
|
||
assert result.trajectory[0].action == "tool_call"
|
||
assert result.trajectory[0].tool_name == "search"
|
||
|
||
|
||
class TestReActEmptyToolList:
|
||
"""空工具列表:直接生成答案"""
|
||
|
||
async def test_no_tools_direct_answer(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
gateway = make_mock_gateway([
|
||
make_response(content="Direct answer without tools"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Hello"}],
|
||
tools=None,
|
||
)
|
||
|
||
assert result.output == "Direct answer without tools"
|
||
assert result.total_steps == 1
|
||
assert result.trajectory[0].action == "final_answer"
|
||
|
||
|
||
class TestReActTrajectoryRecording:
|
||
"""轨迹记录:每步的 action、tool_name、result 正确记录"""
|
||
|
||
async def test_trajectory_records_all_steps(self):
|
||
from agentkit.core.react import ReActEngine, ReActStep
|
||
|
||
tool_a = FakeTool(name="tool_a", result={"a": 1})
|
||
tool_b = FakeTool(name="tool_b", result={"b": 2})
|
||
|
||
gateway = make_mock_gateway([
|
||
make_response(
|
||
content="Step 1",
|
||
tool_calls=[ToolCall(id="tc_1", name="tool_a", arguments={"x": 1})],
|
||
),
|
||
make_response(
|
||
content="Step 2",
|
||
tool_calls=[ToolCall(id="tc_2", name="tool_b", arguments={"y": 2})],
|
||
),
|
||
make_response(content="Final answer"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Multi-step task"}],
|
||
tools=[tool_a, tool_b],
|
||
)
|
||
|
||
assert len(result.trajectory) == 3
|
||
|
||
step1 = result.trajectory[0]
|
||
assert isinstance(step1, ReActStep)
|
||
assert step1.step == 1
|
||
assert step1.action == "tool_call"
|
||
assert step1.tool_name == "tool_a"
|
||
assert step1.arguments == {"x": 1}
|
||
assert step1.result == {"a": 1}
|
||
|
||
step2 = result.trajectory[1]
|
||
assert step2.step == 2
|
||
assert step2.action == "tool_call"
|
||
assert step2.tool_name == "tool_b"
|
||
assert step2.arguments == {"y": 2}
|
||
assert step2.result == {"b": 2}
|
||
|
||
step3 = result.trajectory[2]
|
||
assert step3.step == 3
|
||
assert step3.action == "final_answer"
|
||
assert step3.content == "Final answer"
|
||
|
||
|
||
class TestReActTokenAccumulation:
|
||
"""Token 累积:所有步骤的 token 数应累加"""
|
||
|
||
async def test_total_tokens_accumulated(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
tool = FakeTool(name="search", result={"results": ["data"]})
|
||
gateway = make_mock_gateway([
|
||
make_response(
|
||
content="",
|
||
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})],
|
||
prompt_tokens=100,
|
||
completion_tokens=50,
|
||
),
|
||
make_response(
|
||
content="Final answer",
|
||
prompt_tokens=200,
|
||
completion_tokens=30,
|
||
),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Search"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
# 100+50 + 200+30 = 380
|
||
assert result.total_tokens == 380
|
||
# 每步的 tokens 也应记录
|
||
assert result.trajectory[0].tokens == 150
|
||
assert result.trajectory[1].tokens == 230
|
||
|
||
|
||
class TestReActSystemPrompt:
|
||
"""System prompt 包含在初始消息中"""
|
||
|
||
async def test_system_prompt_included(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
gateway = make_mock_gateway([
|
||
make_response(content="Response"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
await engine.execute(
|
||
messages=[{"role": "user", "content": "Hello"}],
|
||
system_prompt="You are a helpful assistant",
|
||
)
|
||
|
||
# 验证第一次调用 gateway.chat 时 messages 包含 system prompt
|
||
first_call = gateway.chat.call_args_list[0]
|
||
call_kwargs = first_call.kwargs
|
||
messages = call_kwargs.get("messages", first_call[1].get("messages", []))
|
||
assert messages[0]["role"] == "system"
|
||
assert messages[0]["content"] == "You are a helpful assistant"
|
||
|
||
|
||
class TestReActMultipleToolCallsInOneStep:
|
||
"""单步多个工具调用:LLM 在一次响应中返回多个 tool_calls"""
|
||
|
||
async def test_multiple_tool_calls_executed(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
tool_a = FakeTool(name="tool_a", result={"a": 1})
|
||
tool_b = FakeTool(name="tool_b", result={"b": 2})
|
||
|
||
gateway = make_mock_gateway([
|
||
make_response(
|
||
content="",
|
||
tool_calls=[
|
||
ToolCall(id="tc_1", name="tool_a", arguments={"x": 1}),
|
||
ToolCall(id="tc_2", name="tool_b", arguments={"y": 2}),
|
||
],
|
||
),
|
||
make_response(content="Both tools executed"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Run both tools"}],
|
||
tools=[tool_a, tool_b],
|
||
)
|
||
|
||
# 两个工具都应被执行
|
||
assert tool_a.call_count == 1
|
||
assert tool_b.call_count == 1
|
||
assert result.output == "Both tools executed"
|
||
|
||
|
||
class TestReActToolNotFound:
|
||
"""工具未找到:LLM 调用了不存在的工具"""
|
||
|
||
async def test_unknown_tool_returns_error_observation(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
gateway = make_mock_gateway([
|
||
make_response(
|
||
content="",
|
||
tool_calls=[ToolCall(id="tc_1", name="nonexistent_tool", arguments={})],
|
||
),
|
||
make_response(content="Tool not found, here is my answer anyway"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Use unknown tool"}],
|
||
tools=[], # 空工具列表
|
||
)
|
||
|
||
# 第一步应记录工具未找到错误
|
||
assert result.trajectory[0].action == "tool_call"
|
||
assert "error" in str(result.trajectory[0].result).lower() or "not found" in str(result.trajectory[0].result).lower()
|
||
# LLM 应收到错误信息并调整
|
||
assert result.total_steps == 2
|
||
assert result.output == "Tool not found, here is my answer anyway"
|