fischer-agentkit/tests/unit/test_react_engine.py

656 lines
23 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""ReAct Engine 单元测试 - TDD 第一步"""
import json
from unittest.mock import AsyncMock, MagicMock
import pytest
from agentkit.llm.gateway import LLMGateway
from agentkit.llm.protocol import LLMResponse, TokenUsage, ToolCall
from agentkit.tools.base import Tool
# ── Test Helpers ──────────────────────────────────────────
class FakeTool(Tool):
"""用于测试的 Fake Tool"""
def __init__(
self,
name: str = "fake_tool",
description: str = "A fake tool for testing",
result: dict | None = None,
should_fail: bool = False,
):
super().__init__(name=name, description=description)
self._result = result or {"status": "ok"}
self._should_fail = should_fail
self.call_count = 0
self.last_kwargs: dict | None = None
async def execute(self, **kwargs) -> dict:
self.call_count += 1
self.last_kwargs = kwargs
if self._should_fail:
raise RuntimeError(f"Tool '{self.name}' execution failed")
return self._result
def make_mock_gateway(responses: list[LLMResponse]) -> LLMGateway:
"""创建一个 mock LLMGateway按顺序返回给定响应"""
gateway = MagicMock(spec=LLMGateway)
gateway.chat = AsyncMock(side_effect=responses)
return gateway
def make_response(
content: str = "",
tool_calls: list[ToolCall] | None = None,
prompt_tokens: int = 10,
completion_tokens: int = 20,
) -> LLMResponse:
"""快速构造 LLMResponse"""
return LLMResponse(
content=content,
model="test-model",
usage=TokenUsage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
),
tool_calls=tool_calls or [],
)
# ── Test Classes ──────────────────────────────────────────
class TestReActStepSingleCompletion:
"""单步完成LLM 直接返回最终答案,无工具调用"""
async def test_single_step_returns_final_answer(self):
from agentkit.core.react import ReActEngine, ReActResult
gateway = make_mock_gateway([
make_response(content="The answer is 42"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "What is the answer?"}],
)
assert isinstance(result, ReActResult)
assert result.output == "The answer is 42"
assert result.total_steps == 1
assert len(result.trajectory) == 1
assert result.trajectory[0].action == "final_answer"
assert result.trajectory[0].content == "The answer is 42"
class TestReActTwoStepCompletion:
"""两步完成LLM 先调用工具,然后返回最终答案"""
async def test_two_step_with_tool_call(self):
from agentkit.core.react import ReActEngine, ReActResult
tool = FakeTool(name="calculator", result={"value": 42})
gateway = make_mock_gateway([
make_response(
content="",
tool_calls=[ToolCall(id="tc_1", name="calculator", arguments={"expr": "6*7"})],
),
make_response(content="The result is 42"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Calculate 6*7"}],
tools=[tool],
)
assert result.output == "The result is 42"
assert result.total_steps == 2
assert len(result.trajectory) == 2
# Step 1: tool call
assert result.trajectory[0].action == "tool_call"
assert result.trajectory[0].tool_name == "calculator"
assert result.trajectory[0].arguments == {"expr": "6*7"}
assert result.trajectory[0].result == {"value": 42}
# Step 2: final answer
assert result.trajectory[1].action == "final_answer"
assert result.trajectory[1].content == "The result is 42"
class TestReActMultiStep:
"""多步推理3 步 ReAct 循环,每步调用不同工具"""
async def test_three_step_react_loop(self):
from agentkit.core.react import ReActEngine
search_tool = FakeTool(name="search", result={"results": ["Python is great"]})
calc_tool = FakeTool(name="calculator", result={"value": 100})
gateway = make_mock_gateway([
make_response(
content="",
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"query": "Python"})],
),
make_response(
content="",
tool_calls=[ToolCall(id="tc_2", name="calculator", arguments={"expr": "10*10"})],
),
make_response(content="Based on search and calculation, the answer is 100"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Search and calculate"}],
tools=[search_tool, calc_tool],
)
assert result.total_steps == 3
assert result.trajectory[0].tool_name == "search"
assert result.trajectory[1].tool_name == "calculator"
assert result.trajectory[2].action == "final_answer"
assert search_tool.call_count == 1
assert calc_tool.call_count == 1
class TestReActMaxSteps:
"""达到最大步数时返回当前最佳结果"""
async def test_max_steps_returns_current_best(self):
from agentkit.core.react import ReActEngine
tool = FakeTool(name="search", result={"results": ["data"]})
# LLM 一直返回 tool_calls不会给出 final answer
always_tool_response = make_response(
content="Thinking...",
tool_calls=[ToolCall(id="tc_loop", name="search", arguments={"query": "more"})],
)
gateway = make_mock_gateway([always_tool_response] * 20)
engine = ReActEngine(llm_gateway=gateway, max_steps=3)
result = await engine.execute(
messages=[{"role": "user", "content": "Keep searching"}],
tools=[tool],
)
assert result.total_steps == 3
# 当达到 max_steps 时,应返回最后一步的内容
assert result.output is not None
class TestReActToolCallFailure:
"""工具调用失败LLM 收到错误信息并调整策略"""
async def test_tool_failure_included_in_observation(self):
from agentkit.core.react import ReActEngine
failing_tool = FakeTool(name="broken_tool", should_fail=True)
gateway = make_mock_gateway([
make_response(
content="",
tool_calls=[ToolCall(id="tc_1", name="broken_tool", arguments={})],
),
make_response(content="The tool failed, but here is my best answer"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Use the broken tool"}],
tools=[failing_tool],
)
assert result.total_steps == 2
# 第一步 tool_call 应记录错误信息
assert result.trajectory[0].action == "tool_call"
assert result.trajectory[0].result is not None
# 错误信息应包含在结果中
assert "error" in str(result.trajectory[0].result).lower() or "failed" in str(result.trajectory[0].result).lower()
# 第二步 LLM 调整策略给出最终答案
assert result.trajectory[1].action == "final_answer"
assert result.output == "The tool failed, but here is my best answer"
class TestReActFunctionCallingMode:
"""Function Calling 模式LLM 返回 tool_calls"""
async def test_function_calling_tool_execution(self):
from agentkit.core.react import ReActEngine
tool = FakeTool(name="weather", result={"temp": 25, "city": "Shanghai"})
gateway = make_mock_gateway([
make_response(
content="",
tool_calls=[ToolCall(id="tc_1", name="weather", arguments={"city": "Shanghai"})],
),
make_response(content="Shanghai temperature is 25°C"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "What's the weather?"}],
tools=[tool],
)
assert result.trajectory[0].tool_name == "weather"
assert result.trajectory[0].result == {"temp": 25, "city": "Shanghai"}
# 验证 gateway.chat 被调用时传入了 tools 参数
first_call = gateway.chat.call_args_list[0]
assert first_call.kwargs.get("tools") is not None or first_call[1].get("tools") is not None
class TestReActTextParsingMode:
"""文本解析模式LLM 返回包含工具调用模式的文本"""
async def test_text_parsing_with_action_pattern(self):
from agentkit.core.react import ReActEngine
tool = FakeTool(name="search", result={"results": ["found"]})
# LLM 返回文本中包含 Action 模式
gateway = make_mock_gateway([
make_response(content='Action: search({"query": "test"})'),
make_response(content="Here is what I found"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Search for test"}],
tools=[tool],
)
# 文本解析模式应能识别 Action 模式并执行工具
assert result.total_steps == 2
assert result.trajectory[0].action == "tool_call"
assert result.trajectory[0].tool_name == "search"
async def test_text_parsing_with_code_block_pattern(self):
from agentkit.core.react import ReActEngine
tool = FakeTool(name="search", result={"results": ["found"]})
tool_call_text = '```tool\n{"name": "search", "arguments": {"query": "test"}}\n```'
gateway = make_mock_gateway([
make_response(content=tool_call_text),
make_response(content="Search results found"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Search for test"}],
tools=[tool],
)
assert result.total_steps == 2
assert result.trajectory[0].action == "tool_call"
assert result.trajectory[0].tool_name == "search"
class TestReActEmptyToolList:
"""空工具列表:直接生成答案"""
async def test_no_tools_direct_answer(self):
from agentkit.core.react import ReActEngine
gateway = make_mock_gateway([
make_response(content="Direct answer without tools"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Hello"}],
tools=None,
)
assert result.output == "Direct answer without tools"
assert result.total_steps == 1
assert result.trajectory[0].action == "final_answer"
class TestReActTrajectoryRecording:
"""轨迹记录:每步的 action、tool_name、result 正确记录"""
async def test_trajectory_records_all_steps(self):
from agentkit.core.react import ReActEngine, ReActStep
tool_a = FakeTool(name="tool_a", result={"a": 1})
tool_b = FakeTool(name="tool_b", result={"b": 2})
gateway = make_mock_gateway([
make_response(
content="Step 1",
tool_calls=[ToolCall(id="tc_1", name="tool_a", arguments={"x": 1})],
),
make_response(
content="Step 2",
tool_calls=[ToolCall(id="tc_2", name="tool_b", arguments={"y": 2})],
),
make_response(content="Final answer"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Multi-step task"}],
tools=[tool_a, tool_b],
)
assert len(result.trajectory) == 3
step1 = result.trajectory[0]
assert isinstance(step1, ReActStep)
assert step1.step == 1
assert step1.action == "tool_call"
assert step1.tool_name == "tool_a"
assert step1.arguments == {"x": 1}
assert step1.result == {"a": 1}
step2 = result.trajectory[1]
assert step2.step == 2
assert step2.action == "tool_call"
assert step2.tool_name == "tool_b"
assert step2.arguments == {"y": 2}
assert step2.result == {"b": 2}
step3 = result.trajectory[2]
assert step3.step == 3
assert step3.action == "final_answer"
assert step3.content == "Final answer"
class TestReActTokenAccumulation:
"""Token 累积:所有步骤的 token 数应累加"""
async def test_total_tokens_accumulated(self):
from agentkit.core.react import ReActEngine
tool = FakeTool(name="search", result={"results": ["data"]})
gateway = make_mock_gateway([
make_response(
content="",
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})],
prompt_tokens=100,
completion_tokens=50,
),
make_response(
content="Final answer",
prompt_tokens=200,
completion_tokens=30,
),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Search"}],
tools=[tool],
)
# 100+50 + 200+30 = 380
assert result.total_tokens == 380
# 每步的 tokens 也应记录
assert result.trajectory[0].tokens == 150
assert result.trajectory[1].tokens == 230
class TestReActSystemPrompt:
"""System prompt 包含在初始消息中"""
async def test_system_prompt_included(self):
from agentkit.core.react import ReActEngine
gateway = make_mock_gateway([
make_response(content="Response"),
])
engine = ReActEngine(llm_gateway=gateway)
await engine.execute(
messages=[{"role": "user", "content": "Hello"}],
system_prompt="You are a helpful assistant",
)
# 验证第一次调用 gateway.chat 时 messages 包含 system prompt
first_call = gateway.chat.call_args_list[0]
call_kwargs = first_call.kwargs
messages = call_kwargs.get("messages", first_call[1].get("messages", []))
assert messages[0]["role"] == "system"
assert messages[0]["content"] == "You are a helpful assistant"
class TestReActMultipleToolCallsInOneStep:
"""单步多个工具调用LLM 在一次响应中返回多个 tool_calls"""
async def test_multiple_tool_calls_executed(self):
from agentkit.core.react import ReActEngine
tool_a = FakeTool(name="tool_a", result={"a": 1})
tool_b = FakeTool(name="tool_b", result={"b": 2})
gateway = make_mock_gateway([
make_response(
content="",
tool_calls=[
ToolCall(id="tc_1", name="tool_a", arguments={"x": 1}),
ToolCall(id="tc_2", name="tool_b", arguments={"y": 2}),
],
),
make_response(content="Both tools executed"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Run both tools"}],
tools=[tool_a, tool_b],
)
# 两个工具都应被执行
assert tool_a.call_count == 1
assert tool_b.call_count == 1
assert result.output == "Both tools executed"
class TestReActToolNotFound:
"""工具未找到LLM 调用了不存在的工具"""
async def test_unknown_tool_returns_error_observation(self):
from agentkit.core.react import ReActEngine
gateway = make_mock_gateway([
make_response(
content="",
tool_calls=[ToolCall(id="tc_1", name="nonexistent_tool", arguments={})],
),
make_response(content="Tool not found, here is my answer anyway"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Use unknown tool"}],
tools=[], # 空工具列表
)
# 第一步应记录工具未找到错误
assert result.trajectory[0].action == "tool_call"
assert "error" in str(result.trajectory[0].result).lower() or "not found" in str(result.trajectory[0].result).lower()
# LLM 应收到错误信息并调整
assert result.total_steps == 2
assert result.output == "Tool not found, here is my answer anyway"
class TestReActTimeout:
"""ReAct 循环超时:超过 timeout_seconds 后抛出 TaskTimeoutError"""
async def test_timeout_raises_task_timeout_error(self):
import asyncio
from agentkit.core.react import ReActEngine
from agentkit.core.exceptions import TaskTimeoutError
# LLM 每次调用延迟 0.5s,设置 0.3s 超时
async def slow_chat(**kwargs):
await asyncio.sleep(0.5)
return make_response(content="slow response")
gateway = MagicMock(spec=LLMGateway)
gateway.chat = AsyncMock(side_effect=slow_chat)
engine = ReActEngine(llm_gateway=gateway)
with pytest.raises(TaskTimeoutError):
await engine.execute(
messages=[{"role": "user", "content": "Slow task"}],
timeout_seconds=0.3,
)
async def test_timeout_zero_means_no_timeout(self):
import asyncio
from agentkit.core.react import ReActEngine
# LLM 延迟 0.1stimeout=0 表示无超时
async def slightly_slow_chat(**kwargs):
await asyncio.sleep(0.1)
return make_response(content="done")
gateway = MagicMock(spec=LLMGateway)
gateway.chat = AsyncMock(side_effect=slightly_slow_chat)
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Task"}],
timeout_seconds=0,
)
assert result.output == "done"
assert result.status == "success"
async def test_default_timeout_used_when_none(self):
import asyncio
from agentkit.core.react import ReActEngine
from agentkit.core.exceptions import TaskTimeoutError
async def slow_chat(**kwargs):
await asyncio.sleep(0.5)
return make_response(content="slow")
gateway = MagicMock(spec=LLMGateway)
gateway.chat = AsyncMock(side_effect=slow_chat)
# default_timeout=0.3s
engine = ReActEngine(llm_gateway=gateway, default_timeout=0.3)
with pytest.raises(TaskTimeoutError):
await engine.execute(
messages=[{"role": "user", "content": "Task"}],
timeout_seconds=None, # should use default_timeout
)
async def test_normal_completion_unaffected_by_timeout(self):
from agentkit.core.react import ReActEngine
gateway = make_mock_gateway([
make_response(content="Quick answer"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Quick task"}],
timeout_seconds=300,
)
assert result.output == "Quick answer"
assert result.status == "success"
class TestReActCancellation:
"""ReAct 循环取消CancellationToken 取消后抛出 TaskCancelledError"""
async def test_cancel_raises_task_cancelled_error(self):
import asyncio
from agentkit.core.react import ReActEngine
from agentkit.core.protocol import CancellationToken
from agentkit.core.exceptions import TaskCancelledError
call_count = 0
async def counting_chat(**kwargs):
nonlocal call_count
call_count += 1
if call_count >= 2:
# Simulate cancel after second LLM call
pass
return make_response(content="response")
gateway = MagicMock(spec=LLMGateway)
gateway.chat = AsyncMock(side_effect=counting_chat)
engine = ReActEngine(llm_gateway=gateway)
token = CancellationToken()
# Cancel before execution starts
token.cancel()
with pytest.raises(TaskCancelledError):
await engine.execute(
messages=[{"role": "user", "content": "Task"}],
cancellation_token=token,
)
async def test_cancel_mid_execution(self):
import asyncio
from agentkit.core.react import ReActEngine
from agentkit.core.protocol import CancellationToken
from agentkit.core.exceptions import TaskCancelledError
token = CancellationToken()
call_count = 0
async def chat_with_cancel(**kwargs):
nonlocal call_count
call_count += 1
# Cancel after first call
if call_count >= 1:
token.cancel()
# First call returns tool call, second would be final
return make_response(
content="",
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})],
)
tool = FakeTool(name="search", result={"results": ["data"]})
gateway = MagicMock(spec=LLMGateway)
gateway.chat = AsyncMock(side_effect=chat_with_cancel)
engine = ReActEngine(llm_gateway=gateway)
with pytest.raises(TaskCancelledError):
await engine.execute(
messages=[{"role": "user", "content": "Search"}],
tools=[tool],
cancellation_token=token,
)
async def test_no_cancel_token_works_normally(self):
from agentkit.core.react import ReActEngine
gateway = make_mock_gateway([
make_response(content="Normal answer"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Normal task"}],
# No cancellation_token
)
assert result.output == "Normal answer"
assert result.status == "success"
async def test_uncancelled_token_works_normally(self):
from agentkit.core.react import ReActEngine
from agentkit.core.protocol import CancellationToken
gateway = make_mock_gateway([
make_response(content="Answer"),
])
engine = ReActEngine(llm_gateway=gateway)
token = CancellationToken() # Not cancelled
result = await engine.execute(
messages=[{"role": "user", "content": "Task"}],
cancellation_token=token,
)
assert result.output == "Answer"
assert result.status == "success"