1083 lines
40 KiB
Python
1083 lines
40 KiB
Python
"""ReAct Engine 单元测试 - TDD 第一步"""
|
||
|
||
import json
|
||
from unittest.mock import AsyncMock, MagicMock
|
||
|
||
import pytest
|
||
|
||
from agentkit.llm.gateway import LLMGateway
|
||
from agentkit.llm.protocol import LLMResponse, TokenUsage, ToolCall
|
||
from agentkit.tools.base import Tool
|
||
|
||
|
||
# ── Test Helpers ──────────────────────────────────────────
|
||
|
||
|
||
class FakeTool(Tool):
|
||
"""用于测试的 Fake Tool"""
|
||
|
||
def __init__(
|
||
self,
|
||
name: str = "fake_tool",
|
||
description: str = "A fake tool for testing",
|
||
result: dict | None = None,
|
||
should_fail: bool = False,
|
||
):
|
||
super().__init__(name=name, description=description)
|
||
self._result = result or {"status": "ok"}
|
||
self._should_fail = should_fail
|
||
self.call_count = 0
|
||
self.last_kwargs: dict | None = None
|
||
|
||
async def execute(self, **kwargs) -> dict:
|
||
self.call_count += 1
|
||
self.last_kwargs = kwargs
|
||
if self._should_fail:
|
||
raise RuntimeError(f"Tool '{self.name}' execution failed")
|
||
return self._result
|
||
|
||
|
||
def make_mock_gateway(responses: list[LLMResponse]) -> LLMGateway:
|
||
"""创建一个 mock LLMGateway,按顺序返回给定响应"""
|
||
gateway = MagicMock(spec=LLMGateway)
|
||
gateway.chat = AsyncMock(side_effect=responses)
|
||
return gateway
|
||
|
||
|
||
def make_response(
|
||
content: str = "",
|
||
tool_calls: list[ToolCall] | None = None,
|
||
prompt_tokens: int = 10,
|
||
completion_tokens: int = 20,
|
||
) -> LLMResponse:
|
||
"""快速构造 LLMResponse"""
|
||
return LLMResponse(
|
||
content=content,
|
||
model="test-model",
|
||
usage=TokenUsage(
|
||
prompt_tokens=prompt_tokens,
|
||
completion_tokens=completion_tokens,
|
||
),
|
||
tool_calls=tool_calls or [],
|
||
)
|
||
|
||
|
||
# ── Test Classes ──────────────────────────────────────────
|
||
|
||
|
||
class TestReActStepSingleCompletion:
|
||
"""单步完成:LLM 直接返回最终答案,无工具调用"""
|
||
|
||
async def test_single_step_returns_final_answer(self):
|
||
from agentkit.core.react import ReActEngine, ReActResult
|
||
|
||
gateway = make_mock_gateway([
|
||
make_response(content="The answer is 42"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "What is the answer?"}],
|
||
)
|
||
|
||
assert isinstance(result, ReActResult)
|
||
assert result.output == "The answer is 42"
|
||
assert result.total_steps == 1
|
||
assert len(result.trajectory) == 1
|
||
assert result.trajectory[0].action == "final_answer"
|
||
assert result.trajectory[0].content == "The answer is 42"
|
||
|
||
|
||
class TestReActTwoStepCompletion:
|
||
"""两步完成:LLM 先调用工具,然后返回最终答案"""
|
||
|
||
async def test_two_step_with_tool_call(self):
|
||
from agentkit.core.react import ReActEngine, ReActResult
|
||
|
||
tool = FakeTool(name="calculator", result={"value": 42})
|
||
gateway = make_mock_gateway([
|
||
make_response(
|
||
content="",
|
||
tool_calls=[ToolCall(id="tc_1", name="calculator", arguments={"expr": "6*7"})],
|
||
),
|
||
make_response(content="The result is 42"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Calculate 6*7"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
assert result.output == "The result is 42"
|
||
assert result.total_steps == 2
|
||
assert len(result.trajectory) == 2
|
||
# Step 1: tool call
|
||
assert result.trajectory[0].action == "tool_call"
|
||
assert result.trajectory[0].tool_name == "calculator"
|
||
assert result.trajectory[0].arguments == {"expr": "6*7"}
|
||
assert result.trajectory[0].result == {"value": 42}
|
||
# Step 2: final answer
|
||
assert result.trajectory[1].action == "final_answer"
|
||
assert result.trajectory[1].content == "The result is 42"
|
||
|
||
|
||
class TestReActMultiStep:
|
||
"""多步推理:3 步 ReAct 循环,每步调用不同工具"""
|
||
|
||
async def test_three_step_react_loop(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
search_tool = FakeTool(name="search", result={"results": ["Python is great"]})
|
||
calc_tool = FakeTool(name="calculator", result={"value": 100})
|
||
|
||
gateway = make_mock_gateway([
|
||
make_response(
|
||
content="",
|
||
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"query": "Python"})],
|
||
),
|
||
make_response(
|
||
content="",
|
||
tool_calls=[ToolCall(id="tc_2", name="calculator", arguments={"expr": "10*10"})],
|
||
),
|
||
make_response(content="Based on search and calculation, the answer is 100"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Search and calculate"}],
|
||
tools=[search_tool, calc_tool],
|
||
)
|
||
|
||
assert result.total_steps == 3
|
||
assert result.trajectory[0].tool_name == "search"
|
||
assert result.trajectory[1].tool_name == "calculator"
|
||
assert result.trajectory[2].action == "final_answer"
|
||
assert search_tool.call_count == 1
|
||
assert calc_tool.call_count == 1
|
||
|
||
|
||
class TestReActMaxSteps:
|
||
"""达到最大步数时返回当前最佳结果"""
|
||
|
||
async def test_max_steps_returns_current_best(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
tool = FakeTool(name="search", result={"results": ["data"]})
|
||
|
||
# LLM 一直返回 tool_calls(参数递增以避免循环检测),不会给出 final answer
|
||
responses = [
|
||
make_response(
|
||
content="Thinking...",
|
||
tool_calls=[ToolCall(id=f"tc_{i}", name="search", arguments={"query": f"attempt_{i}"})],
|
||
)
|
||
for i in range(20)
|
||
]
|
||
gateway = make_mock_gateway(responses)
|
||
engine = ReActEngine(llm_gateway=gateway, max_steps=3)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Keep searching"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
assert result.total_steps == 3
|
||
# 当达到 max_steps 时,应返回最后一步的内容
|
||
assert result.output is not None
|
||
|
||
|
||
class TestReActToolCallFailure:
|
||
"""工具调用失败:LLM 收到错误信息并调整策略"""
|
||
|
||
async def test_tool_failure_included_in_observation(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
failing_tool = FakeTool(name="broken_tool", should_fail=True)
|
||
gateway = make_mock_gateway([
|
||
make_response(
|
||
content="",
|
||
tool_calls=[ToolCall(id="tc_1", name="broken_tool", arguments={})],
|
||
),
|
||
make_response(content="The tool failed, but here is my best answer"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Use the broken tool"}],
|
||
tools=[failing_tool],
|
||
)
|
||
|
||
assert result.total_steps == 2
|
||
# 第一步 tool_call 应记录错误信息
|
||
assert result.trajectory[0].action == "tool_call"
|
||
assert result.trajectory[0].result is not None
|
||
# 错误信息应包含在结果中
|
||
assert "error" in str(result.trajectory[0].result).lower() or "failed" in str(result.trajectory[0].result).lower()
|
||
# 第二步 LLM 调整策略给出最终答案
|
||
assert result.trajectory[1].action == "final_answer"
|
||
assert result.output == "The tool failed, but here is my best answer"
|
||
|
||
|
||
class TestReActFunctionCallingMode:
|
||
"""Function Calling 模式:LLM 返回 tool_calls"""
|
||
|
||
async def test_function_calling_tool_execution(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
tool = FakeTool(name="weather", result={"temp": 25, "city": "Shanghai"})
|
||
gateway = make_mock_gateway([
|
||
make_response(
|
||
content="",
|
||
tool_calls=[ToolCall(id="tc_1", name="weather", arguments={"city": "Shanghai"})],
|
||
),
|
||
make_response(content="Shanghai temperature is 25°C"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "What's the weather?"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
assert result.trajectory[0].tool_name == "weather"
|
||
assert result.trajectory[0].result == {"temp": 25, "city": "Shanghai"}
|
||
# 验证 gateway.chat 被调用时传入了 tools 参数
|
||
first_call = gateway.chat.call_args_list[0]
|
||
assert first_call.kwargs.get("tools") is not None or first_call[1].get("tools") is not None
|
||
|
||
|
||
class TestReActTextParsingMode:
|
||
"""文本解析模式:LLM 返回包含工具调用模式的文本"""
|
||
|
||
async def test_text_parsing_with_action_pattern(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
tool = FakeTool(name="search", result={"results": ["found"]})
|
||
# LLM 返回文本中包含 Action 模式
|
||
gateway = make_mock_gateway([
|
||
make_response(content='Action: search({"query": "test"})'),
|
||
make_response(content="Here is what I found"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Search for test"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
# 文本解析模式应能识别 Action 模式并执行工具
|
||
assert result.total_steps == 2
|
||
assert result.trajectory[0].action == "tool_call"
|
||
assert result.trajectory[0].tool_name == "search"
|
||
|
||
async def test_text_parsing_with_code_block_pattern(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
tool = FakeTool(name="search", result={"results": ["found"]})
|
||
tool_call_text = '```tool\n{"name": "search", "arguments": {"query": "test"}}\n```'
|
||
gateway = make_mock_gateway([
|
||
make_response(content=tool_call_text),
|
||
make_response(content="Search results found"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Search for test"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
assert result.total_steps == 2
|
||
assert result.trajectory[0].action == "tool_call"
|
||
assert result.trajectory[0].tool_name == "search"
|
||
|
||
|
||
class TestReActEmptyToolList:
|
||
"""空工具列表:直接生成答案"""
|
||
|
||
async def test_no_tools_direct_answer(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
gateway = make_mock_gateway([
|
||
make_response(content="Direct answer without tools"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Hello"}],
|
||
tools=None,
|
||
)
|
||
|
||
assert result.output == "Direct answer without tools"
|
||
assert result.total_steps == 1
|
||
assert result.trajectory[0].action == "final_answer"
|
||
|
||
|
||
class TestReActTrajectoryRecording:
|
||
"""轨迹记录:每步的 action、tool_name、result 正确记录"""
|
||
|
||
async def test_trajectory_records_all_steps(self):
|
||
from agentkit.core.react import ReActEngine, ReActStep
|
||
|
||
tool_a = FakeTool(name="tool_a", result={"a": 1})
|
||
tool_b = FakeTool(name="tool_b", result={"b": 2})
|
||
|
||
gateway = make_mock_gateway([
|
||
make_response(
|
||
content="Step 1",
|
||
tool_calls=[ToolCall(id="tc_1", name="tool_a", arguments={"x": 1})],
|
||
),
|
||
make_response(
|
||
content="Step 2",
|
||
tool_calls=[ToolCall(id="tc_2", name="tool_b", arguments={"y": 2})],
|
||
),
|
||
make_response(content="Final answer"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Multi-step task"}],
|
||
tools=[tool_a, tool_b],
|
||
)
|
||
|
||
assert len(result.trajectory) == 3
|
||
|
||
step1 = result.trajectory[0]
|
||
assert isinstance(step1, ReActStep)
|
||
assert step1.step == 1
|
||
assert step1.action == "tool_call"
|
||
assert step1.tool_name == "tool_a"
|
||
assert step1.arguments == {"x": 1}
|
||
assert step1.result == {"a": 1}
|
||
|
||
step2 = result.trajectory[1]
|
||
assert step2.step == 2
|
||
assert step2.action == "tool_call"
|
||
assert step2.tool_name == "tool_b"
|
||
assert step2.arguments == {"y": 2}
|
||
assert step2.result == {"b": 2}
|
||
|
||
step3 = result.trajectory[2]
|
||
assert step3.step == 3
|
||
assert step3.action == "final_answer"
|
||
assert step3.content == "Final answer"
|
||
|
||
|
||
class TestReActTokenAccumulation:
|
||
"""Token 累积:所有步骤的 token 数应累加"""
|
||
|
||
async def test_total_tokens_accumulated(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
tool = FakeTool(name="search", result={"results": ["data"]})
|
||
gateway = make_mock_gateway([
|
||
make_response(
|
||
content="",
|
||
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})],
|
||
prompt_tokens=100,
|
||
completion_tokens=50,
|
||
),
|
||
make_response(
|
||
content="Final answer",
|
||
prompt_tokens=200,
|
||
completion_tokens=30,
|
||
),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Search"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
# 100+50 + 200+30 = 380
|
||
assert result.total_tokens == 380
|
||
# 每步的 tokens 也应记录
|
||
assert result.trajectory[0].tokens == 150
|
||
assert result.trajectory[1].tokens == 230
|
||
|
||
|
||
class TestReActSystemPrompt:
|
||
"""System prompt 包含在初始消息中"""
|
||
|
||
async def test_system_prompt_included(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
gateway = make_mock_gateway([
|
||
make_response(content="Response"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
await engine.execute(
|
||
messages=[{"role": "user", "content": "Hello"}],
|
||
system_prompt="You are a helpful assistant",
|
||
)
|
||
|
||
# 验证第一次调用 gateway.chat 时 messages 包含 system prompt
|
||
first_call = gateway.chat.call_args_list[0]
|
||
call_kwargs = first_call.kwargs
|
||
messages = call_kwargs.get("messages", first_call[1].get("messages", []))
|
||
assert messages[0]["role"] == "system"
|
||
assert messages[0]["content"] == "You are a helpful assistant"
|
||
|
||
|
||
class TestReActMultipleToolCallsInOneStep:
|
||
"""单步多个工具调用:LLM 在一次响应中返回多个 tool_calls"""
|
||
|
||
async def test_multiple_tool_calls_executed(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
tool_a = FakeTool(name="tool_a", result={"a": 1})
|
||
tool_b = FakeTool(name="tool_b", result={"b": 2})
|
||
|
||
gateway = make_mock_gateway([
|
||
make_response(
|
||
content="",
|
||
tool_calls=[
|
||
ToolCall(id="tc_1", name="tool_a", arguments={"x": 1}),
|
||
ToolCall(id="tc_2", name="tool_b", arguments={"y": 2}),
|
||
],
|
||
),
|
||
make_response(content="Both tools executed"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Run both tools"}],
|
||
tools=[tool_a, tool_b],
|
||
)
|
||
|
||
# 两个工具都应被执行
|
||
assert tool_a.call_count == 1
|
||
assert tool_b.call_count == 1
|
||
assert result.output == "Both tools executed"
|
||
|
||
|
||
class TestReActToolNotFound:
|
||
"""工具未找到:LLM 调用了不存在的工具"""
|
||
|
||
async def test_unknown_tool_returns_error_observation(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
gateway = make_mock_gateway([
|
||
make_response(
|
||
content="",
|
||
tool_calls=[ToolCall(id="tc_1", name="nonexistent_tool", arguments={})],
|
||
),
|
||
make_response(content="Tool not found, here is my answer anyway"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Use unknown tool"}],
|
||
tools=[], # 空工具列表
|
||
)
|
||
|
||
# 第一步应记录工具未找到错误
|
||
assert result.trajectory[0].action == "tool_call"
|
||
assert "error" in str(result.trajectory[0].result).lower() or "not found" in str(result.trajectory[0].result).lower()
|
||
# LLM 应收到错误信息并调整
|
||
assert result.total_steps == 2
|
||
assert result.output == "Tool not found, here is my answer anyway"
|
||
|
||
|
||
class TestReActTimeout:
|
||
"""ReAct 循环超时:超过 timeout_seconds 后抛出 TaskTimeoutError"""
|
||
|
||
async def test_timeout_raises_task_timeout_error(self):
|
||
import asyncio
|
||
from agentkit.core.react import ReActEngine
|
||
from agentkit.core.exceptions import TaskTimeoutError
|
||
|
||
# LLM 每次调用延迟 0.5s,设置 0.3s 超时
|
||
async def slow_chat(**kwargs):
|
||
await asyncio.sleep(0.5)
|
||
return make_response(content="slow response")
|
||
|
||
gateway = MagicMock(spec=LLMGateway)
|
||
gateway.chat = AsyncMock(side_effect=slow_chat)
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
with pytest.raises(TaskTimeoutError):
|
||
await engine.execute(
|
||
messages=[{"role": "user", "content": "Slow task"}],
|
||
timeout_seconds=0.3,
|
||
)
|
||
|
||
async def test_timeout_zero_means_no_timeout(self):
|
||
import asyncio
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
# LLM 延迟 0.1s,timeout=0 表示无超时
|
||
async def slightly_slow_chat(**kwargs):
|
||
await asyncio.sleep(0.1)
|
||
return make_response(content="done")
|
||
|
||
gateway = MagicMock(spec=LLMGateway)
|
||
gateway.chat = AsyncMock(side_effect=slightly_slow_chat)
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Task"}],
|
||
timeout_seconds=0,
|
||
)
|
||
assert result.output == "done"
|
||
assert result.status == "success"
|
||
|
||
async def test_default_timeout_used_when_none(self):
|
||
import asyncio
|
||
from agentkit.core.react import ReActEngine
|
||
from agentkit.core.exceptions import TaskTimeoutError
|
||
|
||
async def slow_chat(**kwargs):
|
||
await asyncio.sleep(0.5)
|
||
return make_response(content="slow")
|
||
|
||
gateway = MagicMock(spec=LLMGateway)
|
||
gateway.chat = AsyncMock(side_effect=slow_chat)
|
||
# default_timeout=0.3s
|
||
engine = ReActEngine(llm_gateway=gateway, default_timeout=0.3)
|
||
|
||
with pytest.raises(TaskTimeoutError):
|
||
await engine.execute(
|
||
messages=[{"role": "user", "content": "Task"}],
|
||
timeout_seconds=None, # should use default_timeout
|
||
)
|
||
|
||
async def test_normal_completion_unaffected_by_timeout(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
gateway = make_mock_gateway([
|
||
make_response(content="Quick answer"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Quick task"}],
|
||
timeout_seconds=300,
|
||
)
|
||
assert result.output == "Quick answer"
|
||
assert result.status == "success"
|
||
|
||
|
||
class TestReActCancellation:
|
||
"""ReAct 循环取消:CancellationToken 取消后抛出 TaskCancelledError"""
|
||
|
||
async def test_cancel_raises_task_cancelled_error(self):
|
||
import asyncio
|
||
from agentkit.core.react import ReActEngine
|
||
from agentkit.core.protocol import CancellationToken
|
||
from agentkit.core.exceptions import TaskCancelledError
|
||
|
||
call_count = 0
|
||
|
||
async def counting_chat(**kwargs):
|
||
nonlocal call_count
|
||
call_count += 1
|
||
if call_count >= 2:
|
||
# Simulate cancel after second LLM call
|
||
pass
|
||
return make_response(content="response")
|
||
|
||
gateway = MagicMock(spec=LLMGateway)
|
||
gateway.chat = AsyncMock(side_effect=counting_chat)
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
token = CancellationToken()
|
||
# Cancel before execution starts
|
||
token.cancel()
|
||
|
||
with pytest.raises(TaskCancelledError):
|
||
await engine.execute(
|
||
messages=[{"role": "user", "content": "Task"}],
|
||
cancellation_token=token,
|
||
)
|
||
|
||
async def test_cancel_mid_execution(self):
|
||
import asyncio
|
||
from agentkit.core.react import ReActEngine
|
||
from agentkit.core.protocol import CancellationToken
|
||
from agentkit.core.exceptions import TaskCancelledError
|
||
|
||
token = CancellationToken()
|
||
call_count = 0
|
||
|
||
async def chat_with_cancel(**kwargs):
|
||
nonlocal call_count
|
||
call_count += 1
|
||
# Cancel after first call
|
||
if call_count >= 1:
|
||
token.cancel()
|
||
# First call returns tool call, second would be final
|
||
return make_response(
|
||
content="",
|
||
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})],
|
||
)
|
||
|
||
tool = FakeTool(name="search", result={"results": ["data"]})
|
||
gateway = MagicMock(spec=LLMGateway)
|
||
gateway.chat = AsyncMock(side_effect=chat_with_cancel)
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
with pytest.raises(TaskCancelledError):
|
||
await engine.execute(
|
||
messages=[{"role": "user", "content": "Search"}],
|
||
tools=[tool],
|
||
cancellation_token=token,
|
||
)
|
||
|
||
async def test_no_cancel_token_works_normally(self):
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
gateway = make_mock_gateway([
|
||
make_response(content="Normal answer"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Normal task"}],
|
||
# No cancellation_token
|
||
)
|
||
assert result.output == "Normal answer"
|
||
assert result.status == "success"
|
||
|
||
async def test_uncancelled_token_works_normally(self):
|
||
from agentkit.core.react import ReActEngine
|
||
from agentkit.core.protocol import CancellationToken
|
||
|
||
gateway = make_mock_gateway([
|
||
make_response(content="Answer"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
token = CancellationToken() # Not cancelled
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Task"}],
|
||
cancellation_token=token,
|
||
)
|
||
assert result.output == "Answer"
|
||
assert result.status == "success"
|
||
|
||
|
||
class TestLoopDetection:
|
||
"""循环检测:ReAct 循环内滑动窗口 hash 检测重复工具调用"""
|
||
|
||
async def test_normal_different_tools_no_detection(self):
|
||
"""不同工具调用不触发检测"""
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
tool1 = FakeTool(name="search", result={"results": ["a"]})
|
||
tool2 = FakeTool(name="calculator", result={"value": 42})
|
||
gateway = make_mock_gateway([
|
||
make_response(
|
||
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})],
|
||
),
|
||
make_response(
|
||
tool_calls=[ToolCall(id="tc_2", name="calculator", arguments={"expr": "6*7"})],
|
||
),
|
||
make_response(content="Done"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Search and calculate"}],
|
||
tools=[tool1, tool2],
|
||
)
|
||
assert result.status == "success"
|
||
assert result.total_steps == 3
|
||
|
||
async def test_same_tool_different_args_no_detection(self):
|
||
"""相同工具不同参数不触发检测"""
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
tool = FakeTool(name="search", result={"results": []})
|
||
gateway = make_mock_gateway([
|
||
make_response(
|
||
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "hello"})],
|
||
),
|
||
make_response(
|
||
tool_calls=[ToolCall(id="tc_2", name="search", arguments={"q": "world"})],
|
||
),
|
||
make_response(content="Done"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Search twice"}],
|
||
tools=[tool],
|
||
)
|
||
assert result.status == "success"
|
||
assert result.total_steps == 3
|
||
|
||
async def test_loop_detected_injects_correction_then_raises(self):
|
||
"""连续重复调用相同工具+参数:第一次注入纠正,第二次抛 LoopDetectedError"""
|
||
from agentkit.core.react import ReActEngine
|
||
from agentkit.core.exceptions import LoopDetectedError
|
||
|
||
tool = FakeTool(name="search", result={"results": []})
|
||
# Step 1: tool call (executed, window=[hash])
|
||
# Step 2: same tool call (detected, correction injected, continue)
|
||
# Step 3: same tool call again (detected, already corrected → raise)
|
||
gateway = make_mock_gateway([
|
||
make_response(
|
||
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})],
|
||
),
|
||
make_response(
|
||
tool_calls=[ToolCall(id="tc_2", name="search", arguments={"q": "test"})],
|
||
),
|
||
make_response(
|
||
tool_calls=[ToolCall(id="tc_3", name="search", arguments={"q": "test"})],
|
||
),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway, max_steps=10)
|
||
|
||
with pytest.raises(LoopDetectedError) as exc_info:
|
||
await engine.execute(
|
||
messages=[{"role": "user", "content": "Search"}],
|
||
tools=[tool],
|
||
)
|
||
assert "search" in str(exc_info.value)
|
||
|
||
async def test_loop_correction_allows_recovery(self):
|
||
"""循环检测注入纠正后,LLM 改变策略则正常完成"""
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
tool = FakeTool(name="search", result={"results": []})
|
||
# Step 1: tool call (executed)
|
||
# Step 2: same tool call (detected, correction injected)
|
||
# Step 3: LLM changes strategy → final answer
|
||
gateway = make_mock_gateway([
|
||
make_response(
|
||
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})],
|
||
),
|
||
make_response(
|
||
tool_calls=[ToolCall(id="tc_2", name="search", arguments={"q": "test"})],
|
||
),
|
||
make_response(content="I found the answer after changing strategy"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway, max_steps=10)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Search"}],
|
||
tools=[tool],
|
||
)
|
||
assert result.status == "success"
|
||
assert "changing strategy" in result.output
|
||
|
||
async def test_reset_clears_loop_state(self):
|
||
"""reset() 清除循环检测状态"""
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
gateway = make_mock_gateway([make_response(content="Done")])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
engine._loop_window.append("some_hash")
|
||
engine._loop_corrected = True
|
||
|
||
engine.reset()
|
||
|
||
assert len(engine._loop_window) == 0
|
||
assert engine._loop_corrected is False
|
||
|
||
|
||
# ── U3: Headroom 压缩测试 ─────────────────────────────────
|
||
|
||
|
||
def _make_messages(token_count: int) -> list[dict]:
|
||
"""Create messages with approximately the given token count (4 chars = 1 token)."""
|
||
char_count = token_count * 4
|
||
return [{"role": "user", "content": "x" * char_count}]
|
||
|
||
|
||
class TestHeadroomCompression:
|
||
"""U3: 主动压缩触发 — 基于 token 用量预测主动触发压缩"""
|
||
|
||
def test_headroom_triggers_when_ratio_exceeds_threshold(self):
|
||
"""Happy path: 110K tokens, model_limit 128K → 0.86 > 0.8 → 触发"""
|
||
from agentkit.core.compressor import ContextCompressor
|
||
|
||
compressor = ContextCompressor(model_context_limit=128_000)
|
||
messages = _make_messages(110_000)
|
||
|
||
assert compressor.should_compress(messages) is True
|
||
|
||
def test_headroom_does_not_trigger_below_min_tokens(self):
|
||
"""Edge case: 5K tokens, model_limit 128K → 不触发(低于 min_tokens 8000)"""
|
||
from agentkit.core.compressor import ContextCompressor
|
||
|
||
compressor = ContextCompressor(model_context_limit=128_000)
|
||
messages = _make_messages(5_000)
|
||
|
||
assert compressor.should_compress(messages) is False
|
||
|
||
def test_headroom_triggers_for_small_model(self):
|
||
"""Edge case: model_limit 8K, conversation 7K → 0.875 > 0.8 → 触发"""
|
||
from agentkit.core.compressor import ContextCompressor
|
||
|
||
compressor = ContextCompressor(model_context_limit=8_000)
|
||
messages = _make_messages(7_000)
|
||
|
||
assert compressor.should_compress(messages) is True
|
||
|
||
def test_react_skips_compression_when_unavailable(self):
|
||
"""Error path: 压缩器 is_available()=False → 跳过压缩"""
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
gateway = make_mock_gateway([make_response(content="Done")])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
compressor = MagicMock()
|
||
compressor.is_available.return_value = False
|
||
compressor.should_compress = MagicMock(return_value=True)
|
||
|
||
result = engine._should_compress(
|
||
[{"role": "user", "content": "x" * 100000}], compressor
|
||
)
|
||
|
||
assert result is False
|
||
|
||
def test_react_delegates_to_compressor_should_compress(self):
|
||
"""ReActEngine._should_compress delegates to compressor.should_compress"""
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
gateway = make_mock_gateway([make_response(content="Done")])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
compressor = ContextCompressorStub(available=True, compress=True)
|
||
result = engine._should_compress([{"role": "user", "content": "test"}], compressor)
|
||
assert result is True
|
||
|
||
compressor = ContextCompressorStub(available=True, compress=False)
|
||
result = engine._should_compress([{"role": "user", "content": "test"}], compressor)
|
||
assert result is False
|
||
|
||
def test_react_fallback_for_compressors_without_should_compress(self):
|
||
"""Fallback: compressors without should_compress use fixed threshold"""
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
gateway = make_mock_gateway([make_response(content="Done")])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
# Compressor with is_available but no should_compress method
|
||
compressor = MagicMock()
|
||
compressor.is_available.return_value = True
|
||
# Remove should_compress attribute to test fallback
|
||
del compressor.should_compress
|
||
|
||
# Below threshold → no compression
|
||
small_msgs = _make_messages(4_000)
|
||
assert engine._should_compress(small_msgs, compressor) is False
|
||
|
||
# Above threshold → compression
|
||
large_msgs = _make_messages(10_000)
|
||
assert engine._should_compress(large_msgs, compressor) is True
|
||
|
||
|
||
class ContextCompressorStub:
|
||
"""Stub compressor for testing _should_compress delegation."""
|
||
|
||
def __init__(self, available: bool, compress: bool):
|
||
self._available = available
|
||
self._compress = compress
|
||
|
||
def is_available(self) -> bool:
|
||
return self._available
|
||
|
||
def should_compress(self, messages: list[dict]) -> bool:
|
||
return self._compress
|
||
|
||
|
||
class TestParseMalformedToolUse:
|
||
"""畸形 <tool_use> 文本的容错解析"""
|
||
|
||
def test_missing_closing_tag_still_parses(self):
|
||
"""模型输出 <tool_use> 但没有 </tool_use> 闭合标签"""
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
engine = ReActEngine(llm_gateway=MagicMock(spec=LLMGateway))
|
||
content = (
|
||
'<tool_use>\n'
|
||
'{"name": "shell", "arguments": {"command": "ls -la"}}\n'
|
||
)
|
||
calls = engine._parse_text_tool_calls(content)
|
||
assert len(calls) == 1
|
||
assert calls[0]["name"] == "shell"
|
||
assert calls[0]["arguments"]["command"] == "ls -la"
|
||
|
||
def test_malformed_json_with_stray_tags(self):
|
||
"""JSON 中混入 <parameter> 等标签时仍能提取工具名和参数"""
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
engine = ReActEngine(llm_gateway=MagicMock(spec=LLMGateway))
|
||
content = (
|
||
'<tool_use>\n'
|
||
'{"name": "shell", "arguments": {"command": "sudo chown -R $USER /tmp"}}\n'
|
||
'</parameter>\n'
|
||
'<parameter=timeout>30</parameter>\n'
|
||
'<function>\n'
|
||
)
|
||
calls = engine._parse_text_tool_calls(content)
|
||
assert len(calls) == 1
|
||
assert calls[0]["name"] == "shell"
|
||
assert "chown" in calls[0]["arguments"]["command"]
|
||
|
||
def test_truncated_json_still_extracts_name(self):
|
||
"""JSON 被截断时仍能提取工具名"""
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
engine = ReActEngine(llm_gateway=MagicMock(spec=LLMGateway))
|
||
content = '<tool_use>\n{"name": "web_search", "arguments": {"query": "test"'
|
||
calls = engine._parse_text_tool_calls(content)
|
||
assert len(calls) == 1
|
||
assert calls[0]["name"] == "web_search"
|
||
|
||
def test_completely_unparseable_tool_use_returns_empty(self):
|
||
"""完全无法解析的 <tool_use> 返回空列表"""
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
engine = ReActEngine(llm_gateway=MagicMock(spec=LLMGateway))
|
||
content = '<tool_use>\ngarbage not json at all\n'
|
||
calls = engine._parse_text_tool_calls(content)
|
||
assert calls == []
|
||
|
||
|
||
class TestMalformedToolUseNotLeakedAsFinalAnswer:
|
||
"""畸形 <tool_use> 不应作为 final_answer 泄漏给用户"""
|
||
|
||
async def test_malformed_tool_use_triggers_correction_not_leak(self):
|
||
"""模型输出畸形 <tool_use> 时,不应把原始 XML 作为最终答案返回"""
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
tool = FakeTool(name="shell", result={"output": "done", "exit_code": 0})
|
||
malformed_content = (
|
||
'<tool_use>\n'
|
||
'{"name": "shell", "arguments": {"command": "ls"}}\n'
|
||
'</parameter>\n<function>\n'
|
||
)
|
||
gateway = make_mock_gateway([
|
||
make_response(content=malformed_content),
|
||
make_response(content="Done successfully"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "list files"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
# 不应把原始 XML 作为最终答案
|
||
assert "<tool_use>" not in result.output
|
||
assert "</parameter>" not in result.output
|
||
assert "<function>" not in result.output
|
||
|
||
async def test_completely_unparseable_tool_use_injects_correction(self):
|
||
"""<tool_use> 完全无法解析时,注入纠正消息让模型重试"""
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
tool = FakeTool(name="search", result={"results": ["data"]})
|
||
gateway = make_mock_gateway([
|
||
# 第一次:完全无法解析的 <tool_use>
|
||
make_response(content="<tool_use>\nnot json at all just words\n"),
|
||
# 第二次:模型纠正后正常回答
|
||
make_response(content="Search completed"),
|
||
])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "search something"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
# 不应把原始 XML 作为最终答案
|
||
assert "<tool_use>" not in result.output
|
||
assert result.output == "Search completed"
|
||
|
||
|
||
class TestReActToolUsePromptRules:
|
||
"""_build_tool_use_prompt 规则文本断言(U4 / Bug 2 L0)"""
|
||
|
||
def test_new_rule_1_present_at_top(self):
|
||
"""新规则 1 '涉及外部信息...' 出现在规则列表头部"""
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
gateway = make_mock_gateway([])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
prompt = engine._build_tool_use_prompt([])
|
||
|
||
assert "1. 涉及外部信息、实时数据、多步骤分析或你不确定的事实时必须使用工具" in prompt
|
||
|
||
def test_old_rule_3_absent(self):
|
||
"""旧规则 3 '如果不需要工具就能回答,直接回答即可' 不再出现"""
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
gateway = make_mock_gateway([])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
prompt = engine._build_tool_use_prompt([])
|
||
|
||
assert "如果不需要工具就能回答,直接回答即可" not in prompt
|
||
|
||
def test_rules_in_correct_order(self):
|
||
"""规则序号 1-5 按预期顺序排列"""
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
gateway = make_mock_gateway([])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
prompt = engine._build_tool_use_prompt([])
|
||
|
||
# 规则 1 在规则 2 之前,规则 2 在规则 3 之前,以此类推
|
||
r1 = prompt.index("1. 涉及外部信息")
|
||
r2 = prompt.index("2. 每次只调用一个工具")
|
||
r3 = prompt.index("3. 等待工具返回结果")
|
||
r4 = prompt.index("4. 仅在确实无需工具时")
|
||
r5 = prompt.index("5. 不要在回答中重复工具的输出")
|
||
assert r1 < r2 < r3 < r4 < r5
|
||
|
||
def test_tool_use_xml_format_preserved(self):
|
||
"""<tool_use> XML 格式示例保持向后兼容"""
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
gateway = make_mock_gateway([])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
prompt = engine._build_tool_use_prompt([])
|
||
|
||
assert "<tool_use>" in prompt
|
||
assert "</tool_use>" in prompt
|
||
|
||
|
||
class TestBug2L0PromptRules:
|
||
"""Bug 2 L0 端到端验证:_build_tool_use_prompt 包含工具描述 + 新规则
|
||
|
||
Bug 2 状态:hypothesis applied, pending L4 verification(非 fixed)。
|
||
L0 仅做文本断言,真实 LLM smoke test 在 L1/L2 独立 plan 中执行。
|
||
"""
|
||
|
||
def test_web_search_description_in_prompt(self):
|
||
"""注册 web_search 工具后,prompt 包含其描述文本"""
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
web_search = FakeTool(
|
||
name="web_search",
|
||
description="搜索互联网信息,获取实时数据、新闻、趋势等",
|
||
)
|
||
gateway = make_mock_gateway([])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
prompt = engine._build_tool_use_prompt([web_search])
|
||
|
||
# web_search 不是 core tool,作为 extended tool 渲染
|
||
# extended tool 渲染格式: "- name: first_line_of_description"
|
||
assert "web_search" in prompt
|
||
assert "搜索互联网信息" in prompt
|
||
|
||
def test_new_rule_1_present_with_tools(self):
|
||
"""有工具注册时,prompt 仍包含新规则 1"""
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
web_search = FakeTool(
|
||
name="web_search",
|
||
description="搜索互联网信息",
|
||
)
|
||
gateway = make_mock_gateway([])
|
||
engine = ReActEngine(llm_gateway=gateway)
|
||
prompt = engine._build_tool_use_prompt([web_search])
|
||
|
||
assert "1. 涉及外部信息、实时数据、多步骤分析或你不确定的事实时必须使用工具" in prompt
|
||
assert "如果不需要工具就能回答,直接回答即可" not in prompt
|