fischer-agentkit/tests/unit/test_react_engine.py

1083 lines
40 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""ReAct Engine 单元测试 - TDD 第一步"""
import json
from unittest.mock import AsyncMock, MagicMock
import pytest
from agentkit.llm.gateway import LLMGateway
from agentkit.llm.protocol import LLMResponse, TokenUsage, ToolCall
from agentkit.tools.base import Tool
# ── Test Helpers ──────────────────────────────────────────
class FakeTool(Tool):
"""用于测试的 Fake Tool"""
def __init__(
self,
name: str = "fake_tool",
description: str = "A fake tool for testing",
result: dict | None = None,
should_fail: bool = False,
):
super().__init__(name=name, description=description)
self._result = result or {"status": "ok"}
self._should_fail = should_fail
self.call_count = 0
self.last_kwargs: dict | None = None
async def execute(self, **kwargs) -> dict:
self.call_count += 1
self.last_kwargs = kwargs
if self._should_fail:
raise RuntimeError(f"Tool '{self.name}' execution failed")
return self._result
def make_mock_gateway(responses: list[LLMResponse]) -> LLMGateway:
"""创建一个 mock LLMGateway按顺序返回给定响应"""
gateway = MagicMock(spec=LLMGateway)
gateway.chat = AsyncMock(side_effect=responses)
return gateway
def make_response(
content: str = "",
tool_calls: list[ToolCall] | None = None,
prompt_tokens: int = 10,
completion_tokens: int = 20,
) -> LLMResponse:
"""快速构造 LLMResponse"""
return LLMResponse(
content=content,
model="test-model",
usage=TokenUsage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
),
tool_calls=tool_calls or [],
)
# ── Test Classes ──────────────────────────────────────────
class TestReActStepSingleCompletion:
"""单步完成LLM 直接返回最终答案,无工具调用"""
async def test_single_step_returns_final_answer(self):
from agentkit.core.react import ReActEngine, ReActResult
gateway = make_mock_gateway([
make_response(content="The answer is 42"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "What is the answer?"}],
)
assert isinstance(result, ReActResult)
assert result.output == "The answer is 42"
assert result.total_steps == 1
assert len(result.trajectory) == 1
assert result.trajectory[0].action == "final_answer"
assert result.trajectory[0].content == "The answer is 42"
class TestReActTwoStepCompletion:
"""两步完成LLM 先调用工具,然后返回最终答案"""
async def test_two_step_with_tool_call(self):
from agentkit.core.react import ReActEngine, ReActResult
tool = FakeTool(name="calculator", result={"value": 42})
gateway = make_mock_gateway([
make_response(
content="",
tool_calls=[ToolCall(id="tc_1", name="calculator", arguments={"expr": "6*7"})],
),
make_response(content="The result is 42"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Calculate 6*7"}],
tools=[tool],
)
assert result.output == "The result is 42"
assert result.total_steps == 2
assert len(result.trajectory) == 2
# Step 1: tool call
assert result.trajectory[0].action == "tool_call"
assert result.trajectory[0].tool_name == "calculator"
assert result.trajectory[0].arguments == {"expr": "6*7"}
assert result.trajectory[0].result == {"value": 42}
# Step 2: final answer
assert result.trajectory[1].action == "final_answer"
assert result.trajectory[1].content == "The result is 42"
class TestReActMultiStep:
"""多步推理3 步 ReAct 循环,每步调用不同工具"""
async def test_three_step_react_loop(self):
from agentkit.core.react import ReActEngine
search_tool = FakeTool(name="search", result={"results": ["Python is great"]})
calc_tool = FakeTool(name="calculator", result={"value": 100})
gateway = make_mock_gateway([
make_response(
content="",
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"query": "Python"})],
),
make_response(
content="",
tool_calls=[ToolCall(id="tc_2", name="calculator", arguments={"expr": "10*10"})],
),
make_response(content="Based on search and calculation, the answer is 100"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Search and calculate"}],
tools=[search_tool, calc_tool],
)
assert result.total_steps == 3
assert result.trajectory[0].tool_name == "search"
assert result.trajectory[1].tool_name == "calculator"
assert result.trajectory[2].action == "final_answer"
assert search_tool.call_count == 1
assert calc_tool.call_count == 1
class TestReActMaxSteps:
"""达到最大步数时返回当前最佳结果"""
async def test_max_steps_returns_current_best(self):
from agentkit.core.react import ReActEngine
tool = FakeTool(name="search", result={"results": ["data"]})
# LLM 一直返回 tool_calls参数递增以避免循环检测不会给出 final answer
responses = [
make_response(
content="Thinking...",
tool_calls=[ToolCall(id=f"tc_{i}", name="search", arguments={"query": f"attempt_{i}"})],
)
for i in range(20)
]
gateway = make_mock_gateway(responses)
engine = ReActEngine(llm_gateway=gateway, max_steps=3)
result = await engine.execute(
messages=[{"role": "user", "content": "Keep searching"}],
tools=[tool],
)
assert result.total_steps == 3
# 当达到 max_steps 时,应返回最后一步的内容
assert result.output is not None
class TestReActToolCallFailure:
"""工具调用失败LLM 收到错误信息并调整策略"""
async def test_tool_failure_included_in_observation(self):
from agentkit.core.react import ReActEngine
failing_tool = FakeTool(name="broken_tool", should_fail=True)
gateway = make_mock_gateway([
make_response(
content="",
tool_calls=[ToolCall(id="tc_1", name="broken_tool", arguments={})],
),
make_response(content="The tool failed, but here is my best answer"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Use the broken tool"}],
tools=[failing_tool],
)
assert result.total_steps == 2
# 第一步 tool_call 应记录错误信息
assert result.trajectory[0].action == "tool_call"
assert result.trajectory[0].result is not None
# 错误信息应包含在结果中
assert "error" in str(result.trajectory[0].result).lower() or "failed" in str(result.trajectory[0].result).lower()
# 第二步 LLM 调整策略给出最终答案
assert result.trajectory[1].action == "final_answer"
assert result.output == "The tool failed, but here is my best answer"
class TestReActFunctionCallingMode:
"""Function Calling 模式LLM 返回 tool_calls"""
async def test_function_calling_tool_execution(self):
from agentkit.core.react import ReActEngine
tool = FakeTool(name="weather", result={"temp": 25, "city": "Shanghai"})
gateway = make_mock_gateway([
make_response(
content="",
tool_calls=[ToolCall(id="tc_1", name="weather", arguments={"city": "Shanghai"})],
),
make_response(content="Shanghai temperature is 25°C"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "What's the weather?"}],
tools=[tool],
)
assert result.trajectory[0].tool_name == "weather"
assert result.trajectory[0].result == {"temp": 25, "city": "Shanghai"}
# 验证 gateway.chat 被调用时传入了 tools 参数
first_call = gateway.chat.call_args_list[0]
assert first_call.kwargs.get("tools") is not None or first_call[1].get("tools") is not None
class TestReActTextParsingMode:
"""文本解析模式LLM 返回包含工具调用模式的文本"""
async def test_text_parsing_with_action_pattern(self):
from agentkit.core.react import ReActEngine
tool = FakeTool(name="search", result={"results": ["found"]})
# LLM 返回文本中包含 Action 模式
gateway = make_mock_gateway([
make_response(content='Action: search({"query": "test"})'),
make_response(content="Here is what I found"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Search for test"}],
tools=[tool],
)
# 文本解析模式应能识别 Action 模式并执行工具
assert result.total_steps == 2
assert result.trajectory[0].action == "tool_call"
assert result.trajectory[0].tool_name == "search"
async def test_text_parsing_with_code_block_pattern(self):
from agentkit.core.react import ReActEngine
tool = FakeTool(name="search", result={"results": ["found"]})
tool_call_text = '```tool\n{"name": "search", "arguments": {"query": "test"}}\n```'
gateway = make_mock_gateway([
make_response(content=tool_call_text),
make_response(content="Search results found"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Search for test"}],
tools=[tool],
)
assert result.total_steps == 2
assert result.trajectory[0].action == "tool_call"
assert result.trajectory[0].tool_name == "search"
class TestReActEmptyToolList:
"""空工具列表:直接生成答案"""
async def test_no_tools_direct_answer(self):
from agentkit.core.react import ReActEngine
gateway = make_mock_gateway([
make_response(content="Direct answer without tools"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Hello"}],
tools=None,
)
assert result.output == "Direct answer without tools"
assert result.total_steps == 1
assert result.trajectory[0].action == "final_answer"
class TestReActTrajectoryRecording:
"""轨迹记录:每步的 action、tool_name、result 正确记录"""
async def test_trajectory_records_all_steps(self):
from agentkit.core.react import ReActEngine, ReActStep
tool_a = FakeTool(name="tool_a", result={"a": 1})
tool_b = FakeTool(name="tool_b", result={"b": 2})
gateway = make_mock_gateway([
make_response(
content="Step 1",
tool_calls=[ToolCall(id="tc_1", name="tool_a", arguments={"x": 1})],
),
make_response(
content="Step 2",
tool_calls=[ToolCall(id="tc_2", name="tool_b", arguments={"y": 2})],
),
make_response(content="Final answer"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Multi-step task"}],
tools=[tool_a, tool_b],
)
assert len(result.trajectory) == 3
step1 = result.trajectory[0]
assert isinstance(step1, ReActStep)
assert step1.step == 1
assert step1.action == "tool_call"
assert step1.tool_name == "tool_a"
assert step1.arguments == {"x": 1}
assert step1.result == {"a": 1}
step2 = result.trajectory[1]
assert step2.step == 2
assert step2.action == "tool_call"
assert step2.tool_name == "tool_b"
assert step2.arguments == {"y": 2}
assert step2.result == {"b": 2}
step3 = result.trajectory[2]
assert step3.step == 3
assert step3.action == "final_answer"
assert step3.content == "Final answer"
class TestReActTokenAccumulation:
"""Token 累积:所有步骤的 token 数应累加"""
async def test_total_tokens_accumulated(self):
from agentkit.core.react import ReActEngine
tool = FakeTool(name="search", result={"results": ["data"]})
gateway = make_mock_gateway([
make_response(
content="",
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})],
prompt_tokens=100,
completion_tokens=50,
),
make_response(
content="Final answer",
prompt_tokens=200,
completion_tokens=30,
),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Search"}],
tools=[tool],
)
# 100+50 + 200+30 = 380
assert result.total_tokens == 380
# 每步的 tokens 也应记录
assert result.trajectory[0].tokens == 150
assert result.trajectory[1].tokens == 230
class TestReActSystemPrompt:
"""System prompt 包含在初始消息中"""
async def test_system_prompt_included(self):
from agentkit.core.react import ReActEngine
gateway = make_mock_gateway([
make_response(content="Response"),
])
engine = ReActEngine(llm_gateway=gateway)
await engine.execute(
messages=[{"role": "user", "content": "Hello"}],
system_prompt="You are a helpful assistant",
)
# 验证第一次调用 gateway.chat 时 messages 包含 system prompt
first_call = gateway.chat.call_args_list[0]
call_kwargs = first_call.kwargs
messages = call_kwargs.get("messages", first_call[1].get("messages", []))
assert messages[0]["role"] == "system"
assert messages[0]["content"] == "You are a helpful assistant"
class TestReActMultipleToolCallsInOneStep:
"""单步多个工具调用LLM 在一次响应中返回多个 tool_calls"""
async def test_multiple_tool_calls_executed(self):
from agentkit.core.react import ReActEngine
tool_a = FakeTool(name="tool_a", result={"a": 1})
tool_b = FakeTool(name="tool_b", result={"b": 2})
gateway = make_mock_gateway([
make_response(
content="",
tool_calls=[
ToolCall(id="tc_1", name="tool_a", arguments={"x": 1}),
ToolCall(id="tc_2", name="tool_b", arguments={"y": 2}),
],
),
make_response(content="Both tools executed"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Run both tools"}],
tools=[tool_a, tool_b],
)
# 两个工具都应被执行
assert tool_a.call_count == 1
assert tool_b.call_count == 1
assert result.output == "Both tools executed"
class TestReActToolNotFound:
"""工具未找到LLM 调用了不存在的工具"""
async def test_unknown_tool_returns_error_observation(self):
from agentkit.core.react import ReActEngine
gateway = make_mock_gateway([
make_response(
content="",
tool_calls=[ToolCall(id="tc_1", name="nonexistent_tool", arguments={})],
),
make_response(content="Tool not found, here is my answer anyway"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Use unknown tool"}],
tools=[], # 空工具列表
)
# 第一步应记录工具未找到错误
assert result.trajectory[0].action == "tool_call"
assert "error" in str(result.trajectory[0].result).lower() or "not found" in str(result.trajectory[0].result).lower()
# LLM 应收到错误信息并调整
assert result.total_steps == 2
assert result.output == "Tool not found, here is my answer anyway"
class TestReActTimeout:
"""ReAct 循环超时:超过 timeout_seconds 后抛出 TaskTimeoutError"""
async def test_timeout_raises_task_timeout_error(self):
import asyncio
from agentkit.core.react import ReActEngine
from agentkit.core.exceptions import TaskTimeoutError
# LLM 每次调用延迟 0.5s,设置 0.3s 超时
async def slow_chat(**kwargs):
await asyncio.sleep(0.5)
return make_response(content="slow response")
gateway = MagicMock(spec=LLMGateway)
gateway.chat = AsyncMock(side_effect=slow_chat)
engine = ReActEngine(llm_gateway=gateway)
with pytest.raises(TaskTimeoutError):
await engine.execute(
messages=[{"role": "user", "content": "Slow task"}],
timeout_seconds=0.3,
)
async def test_timeout_zero_means_no_timeout(self):
import asyncio
from agentkit.core.react import ReActEngine
# LLM 延迟 0.1stimeout=0 表示无超时
async def slightly_slow_chat(**kwargs):
await asyncio.sleep(0.1)
return make_response(content="done")
gateway = MagicMock(spec=LLMGateway)
gateway.chat = AsyncMock(side_effect=slightly_slow_chat)
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Task"}],
timeout_seconds=0,
)
assert result.output == "done"
assert result.status == "success"
async def test_default_timeout_used_when_none(self):
import asyncio
from agentkit.core.react import ReActEngine
from agentkit.core.exceptions import TaskTimeoutError
async def slow_chat(**kwargs):
await asyncio.sleep(0.5)
return make_response(content="slow")
gateway = MagicMock(spec=LLMGateway)
gateway.chat = AsyncMock(side_effect=slow_chat)
# default_timeout=0.3s
engine = ReActEngine(llm_gateway=gateway, default_timeout=0.3)
with pytest.raises(TaskTimeoutError):
await engine.execute(
messages=[{"role": "user", "content": "Task"}],
timeout_seconds=None, # should use default_timeout
)
async def test_normal_completion_unaffected_by_timeout(self):
from agentkit.core.react import ReActEngine
gateway = make_mock_gateway([
make_response(content="Quick answer"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Quick task"}],
timeout_seconds=300,
)
assert result.output == "Quick answer"
assert result.status == "success"
class TestReActCancellation:
"""ReAct 循环取消CancellationToken 取消后抛出 TaskCancelledError"""
async def test_cancel_raises_task_cancelled_error(self):
import asyncio
from agentkit.core.react import ReActEngine
from agentkit.core.protocol import CancellationToken
from agentkit.core.exceptions import TaskCancelledError
call_count = 0
async def counting_chat(**kwargs):
nonlocal call_count
call_count += 1
if call_count >= 2:
# Simulate cancel after second LLM call
pass
return make_response(content="response")
gateway = MagicMock(spec=LLMGateway)
gateway.chat = AsyncMock(side_effect=counting_chat)
engine = ReActEngine(llm_gateway=gateway)
token = CancellationToken()
# Cancel before execution starts
token.cancel()
with pytest.raises(TaskCancelledError):
await engine.execute(
messages=[{"role": "user", "content": "Task"}],
cancellation_token=token,
)
async def test_cancel_mid_execution(self):
import asyncio
from agentkit.core.react import ReActEngine
from agentkit.core.protocol import CancellationToken
from agentkit.core.exceptions import TaskCancelledError
token = CancellationToken()
call_count = 0
async def chat_with_cancel(**kwargs):
nonlocal call_count
call_count += 1
# Cancel after first call
if call_count >= 1:
token.cancel()
# First call returns tool call, second would be final
return make_response(
content="",
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})],
)
tool = FakeTool(name="search", result={"results": ["data"]})
gateway = MagicMock(spec=LLMGateway)
gateway.chat = AsyncMock(side_effect=chat_with_cancel)
engine = ReActEngine(llm_gateway=gateway)
with pytest.raises(TaskCancelledError):
await engine.execute(
messages=[{"role": "user", "content": "Search"}],
tools=[tool],
cancellation_token=token,
)
async def test_no_cancel_token_works_normally(self):
from agentkit.core.react import ReActEngine
gateway = make_mock_gateway([
make_response(content="Normal answer"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Normal task"}],
# No cancellation_token
)
assert result.output == "Normal answer"
assert result.status == "success"
async def test_uncancelled_token_works_normally(self):
from agentkit.core.react import ReActEngine
from agentkit.core.protocol import CancellationToken
gateway = make_mock_gateway([
make_response(content="Answer"),
])
engine = ReActEngine(llm_gateway=gateway)
token = CancellationToken() # Not cancelled
result = await engine.execute(
messages=[{"role": "user", "content": "Task"}],
cancellation_token=token,
)
assert result.output == "Answer"
assert result.status == "success"
class TestLoopDetection:
"""循环检测ReAct 循环内滑动窗口 hash 检测重复工具调用"""
async def test_normal_different_tools_no_detection(self):
"""不同工具调用不触发检测"""
from agentkit.core.react import ReActEngine
tool1 = FakeTool(name="search", result={"results": ["a"]})
tool2 = FakeTool(name="calculator", result={"value": 42})
gateway = make_mock_gateway([
make_response(
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})],
),
make_response(
tool_calls=[ToolCall(id="tc_2", name="calculator", arguments={"expr": "6*7"})],
),
make_response(content="Done"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Search and calculate"}],
tools=[tool1, tool2],
)
assert result.status == "success"
assert result.total_steps == 3
async def test_same_tool_different_args_no_detection(self):
"""相同工具不同参数不触发检测"""
from agentkit.core.react import ReActEngine
tool = FakeTool(name="search", result={"results": []})
gateway = make_mock_gateway([
make_response(
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "hello"})],
),
make_response(
tool_calls=[ToolCall(id="tc_2", name="search", arguments={"q": "world"})],
),
make_response(content="Done"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "Search twice"}],
tools=[tool],
)
assert result.status == "success"
assert result.total_steps == 3
async def test_loop_detected_injects_correction_then_raises(self):
"""连续重复调用相同工具+参数:第一次注入纠正,第二次抛 LoopDetectedError"""
from agentkit.core.react import ReActEngine
from agentkit.core.exceptions import LoopDetectedError
tool = FakeTool(name="search", result={"results": []})
# Step 1: tool call (executed, window=[hash])
# Step 2: same tool call (detected, correction injected, continue)
# Step 3: same tool call again (detected, already corrected → raise)
gateway = make_mock_gateway([
make_response(
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})],
),
make_response(
tool_calls=[ToolCall(id="tc_2", name="search", arguments={"q": "test"})],
),
make_response(
tool_calls=[ToolCall(id="tc_3", name="search", arguments={"q": "test"})],
),
])
engine = ReActEngine(llm_gateway=gateway, max_steps=10)
with pytest.raises(LoopDetectedError) as exc_info:
await engine.execute(
messages=[{"role": "user", "content": "Search"}],
tools=[tool],
)
assert "search" in str(exc_info.value)
async def test_loop_correction_allows_recovery(self):
"""循环检测注入纠正后LLM 改变策略则正常完成"""
from agentkit.core.react import ReActEngine
tool = FakeTool(name="search", result={"results": []})
# Step 1: tool call (executed)
# Step 2: same tool call (detected, correction injected)
# Step 3: LLM changes strategy → final answer
gateway = make_mock_gateway([
make_response(
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})],
),
make_response(
tool_calls=[ToolCall(id="tc_2", name="search", arguments={"q": "test"})],
),
make_response(content="I found the answer after changing strategy"),
])
engine = ReActEngine(llm_gateway=gateway, max_steps=10)
result = await engine.execute(
messages=[{"role": "user", "content": "Search"}],
tools=[tool],
)
assert result.status == "success"
assert "changing strategy" in result.output
async def test_reset_clears_loop_state(self):
"""reset() 清除循环检测状态"""
from agentkit.core.react import ReActEngine
gateway = make_mock_gateway([make_response(content="Done")])
engine = ReActEngine(llm_gateway=gateway)
engine._loop_window.append("some_hash")
engine._loop_corrected = True
engine.reset()
assert len(engine._loop_window) == 0
assert engine._loop_corrected is False
# ── U3: Headroom 压缩测试 ─────────────────────────────────
def _make_messages(token_count: int) -> list[dict]:
"""Create messages with approximately the given token count (4 chars = 1 token)."""
char_count = token_count * 4
return [{"role": "user", "content": "x" * char_count}]
class TestHeadroomCompression:
"""U3: 主动压缩触发 — 基于 token 用量预测主动触发压缩"""
def test_headroom_triggers_when_ratio_exceeds_threshold(self):
"""Happy path: 110K tokens, model_limit 128K → 0.86 > 0.8 → 触发"""
from agentkit.core.compressor import ContextCompressor
compressor = ContextCompressor(model_context_limit=128_000)
messages = _make_messages(110_000)
assert compressor.should_compress(messages) is True
def test_headroom_does_not_trigger_below_min_tokens(self):
"""Edge case: 5K tokens, model_limit 128K → 不触发(低于 min_tokens 8000"""
from agentkit.core.compressor import ContextCompressor
compressor = ContextCompressor(model_context_limit=128_000)
messages = _make_messages(5_000)
assert compressor.should_compress(messages) is False
def test_headroom_triggers_for_small_model(self):
"""Edge case: model_limit 8K, conversation 7K → 0.875 > 0.8 → 触发"""
from agentkit.core.compressor import ContextCompressor
compressor = ContextCompressor(model_context_limit=8_000)
messages = _make_messages(7_000)
assert compressor.should_compress(messages) is True
def test_react_skips_compression_when_unavailable(self):
"""Error path: 压缩器 is_available()=False → 跳过压缩"""
from agentkit.core.react import ReActEngine
gateway = make_mock_gateway([make_response(content="Done")])
engine = ReActEngine(llm_gateway=gateway)
compressor = MagicMock()
compressor.is_available.return_value = False
compressor.should_compress = MagicMock(return_value=True)
result = engine._should_compress(
[{"role": "user", "content": "x" * 100000}], compressor
)
assert result is False
def test_react_delegates_to_compressor_should_compress(self):
"""ReActEngine._should_compress delegates to compressor.should_compress"""
from agentkit.core.react import ReActEngine
gateway = make_mock_gateway([make_response(content="Done")])
engine = ReActEngine(llm_gateway=gateway)
compressor = ContextCompressorStub(available=True, compress=True)
result = engine._should_compress([{"role": "user", "content": "test"}], compressor)
assert result is True
compressor = ContextCompressorStub(available=True, compress=False)
result = engine._should_compress([{"role": "user", "content": "test"}], compressor)
assert result is False
def test_react_fallback_for_compressors_without_should_compress(self):
"""Fallback: compressors without should_compress use fixed threshold"""
from agentkit.core.react import ReActEngine
gateway = make_mock_gateway([make_response(content="Done")])
engine = ReActEngine(llm_gateway=gateway)
# Compressor with is_available but no should_compress method
compressor = MagicMock()
compressor.is_available.return_value = True
# Remove should_compress attribute to test fallback
del compressor.should_compress
# Below threshold → no compression
small_msgs = _make_messages(4_000)
assert engine._should_compress(small_msgs, compressor) is False
# Above threshold → compression
large_msgs = _make_messages(10_000)
assert engine._should_compress(large_msgs, compressor) is True
class ContextCompressorStub:
"""Stub compressor for testing _should_compress delegation."""
def __init__(self, available: bool, compress: bool):
self._available = available
self._compress = compress
def is_available(self) -> bool:
return self._available
def should_compress(self, messages: list[dict]) -> bool:
return self._compress
class TestParseMalformedToolUse:
"""畸形 <tool_use> 文本的容错解析"""
def test_missing_closing_tag_still_parses(self):
"""模型输出 <tool_use> 但没有 </tool_use> 闭合标签"""
from agentkit.core.react import ReActEngine
engine = ReActEngine(llm_gateway=MagicMock(spec=LLMGateway))
content = (
'<tool_use>\n'
'{"name": "shell", "arguments": {"command": "ls -la"}}\n'
)
calls = engine._parse_text_tool_calls(content)
assert len(calls) == 1
assert calls[0]["name"] == "shell"
assert calls[0]["arguments"]["command"] == "ls -la"
def test_malformed_json_with_stray_tags(self):
"""JSON 中混入 <parameter> 等标签时仍能提取工具名和参数"""
from agentkit.core.react import ReActEngine
engine = ReActEngine(llm_gateway=MagicMock(spec=LLMGateway))
content = (
'<tool_use>\n'
'{"name": "shell", "arguments": {"command": "sudo chown -R $USER /tmp"}}\n'
'</parameter>\n'
'<parameter=timeout>30</parameter>\n'
'<function>\n'
)
calls = engine._parse_text_tool_calls(content)
assert len(calls) == 1
assert calls[0]["name"] == "shell"
assert "chown" in calls[0]["arguments"]["command"]
def test_truncated_json_still_extracts_name(self):
"""JSON 被截断时仍能提取工具名"""
from agentkit.core.react import ReActEngine
engine = ReActEngine(llm_gateway=MagicMock(spec=LLMGateway))
content = '<tool_use>\n{"name": "web_search", "arguments": {"query": "test"'
calls = engine._parse_text_tool_calls(content)
assert len(calls) == 1
assert calls[0]["name"] == "web_search"
def test_completely_unparseable_tool_use_returns_empty(self):
"""完全无法解析的 <tool_use> 返回空列表"""
from agentkit.core.react import ReActEngine
engine = ReActEngine(llm_gateway=MagicMock(spec=LLMGateway))
content = '<tool_use>\ngarbage not json at all\n'
calls = engine._parse_text_tool_calls(content)
assert calls == []
class TestMalformedToolUseNotLeakedAsFinalAnswer:
"""畸形 <tool_use> 不应作为 final_answer 泄漏给用户"""
async def test_malformed_tool_use_triggers_correction_not_leak(self):
"""模型输出畸形 <tool_use> 时,不应把原始 XML 作为最终答案返回"""
from agentkit.core.react import ReActEngine
tool = FakeTool(name="shell", result={"output": "done", "exit_code": 0})
malformed_content = (
'<tool_use>\n'
'{"name": "shell", "arguments": {"command": "ls"}}\n'
'</parameter>\n<function>\n'
)
gateway = make_mock_gateway([
make_response(content=malformed_content),
make_response(content="Done successfully"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "list files"}],
tools=[tool],
)
# 不应把原始 XML 作为最终答案
assert "<tool_use>" not in result.output
assert "</parameter>" not in result.output
assert "<function>" not in result.output
async def test_completely_unparseable_tool_use_injects_correction(self):
"""<tool_use> 完全无法解析时,注入纠正消息让模型重试"""
from agentkit.core.react import ReActEngine
tool = FakeTool(name="search", result={"results": ["data"]})
gateway = make_mock_gateway([
# 第一次:完全无法解析的 <tool_use>
make_response(content="<tool_use>\nnot json at all just words\n"),
# 第二次:模型纠正后正常回答
make_response(content="Search completed"),
])
engine = ReActEngine(llm_gateway=gateway)
result = await engine.execute(
messages=[{"role": "user", "content": "search something"}],
tools=[tool],
)
# 不应把原始 XML 作为最终答案
assert "<tool_use>" not in result.output
assert result.output == "Search completed"
class TestReActToolUsePromptRules:
"""_build_tool_use_prompt 规则文本断言U4 / Bug 2 L0"""
def test_new_rule_1_present_at_top(self):
"""新规则 1 '涉及外部信息...' 出现在规则列表头部"""
from agentkit.core.react import ReActEngine
gateway = make_mock_gateway([])
engine = ReActEngine(llm_gateway=gateway)
prompt = engine._build_tool_use_prompt([])
assert "1. 涉及外部信息、实时数据、多步骤分析或你不确定的事实时必须使用工具" in prompt
def test_old_rule_3_absent(self):
"""旧规则 3 '如果不需要工具就能回答,直接回答即可' 不再出现"""
from agentkit.core.react import ReActEngine
gateway = make_mock_gateway([])
engine = ReActEngine(llm_gateway=gateway)
prompt = engine._build_tool_use_prompt([])
assert "如果不需要工具就能回答,直接回答即可" not in prompt
def test_rules_in_correct_order(self):
"""规则序号 1-5 按预期顺序排列"""
from agentkit.core.react import ReActEngine
gateway = make_mock_gateway([])
engine = ReActEngine(llm_gateway=gateway)
prompt = engine._build_tool_use_prompt([])
# 规则 1 在规则 2 之前,规则 2 在规则 3 之前,以此类推
r1 = prompt.index("1. 涉及外部信息")
r2 = prompt.index("2. 每次只调用一个工具")
r3 = prompt.index("3. 等待工具返回结果")
r4 = prompt.index("4. 仅在确实无需工具时")
r5 = prompt.index("5. 不要在回答中重复工具的输出")
assert r1 < r2 < r3 < r4 < r5
def test_tool_use_xml_format_preserved(self):
"""<tool_use> XML 格式示例保持向后兼容"""
from agentkit.core.react import ReActEngine
gateway = make_mock_gateway([])
engine = ReActEngine(llm_gateway=gateway)
prompt = engine._build_tool_use_prompt([])
assert "<tool_use>" in prompt
assert "</tool_use>" in prompt
class TestBug2L0PromptRules:
"""Bug 2 L0 端到端验证_build_tool_use_prompt 包含工具描述 + 新规则
Bug 2 状态hypothesis applied, pending L4 verification非 fixed
L0 仅做文本断言,真实 LLM smoke test 在 L1/L2 独立 plan 中执行。
"""
def test_web_search_description_in_prompt(self):
"""注册 web_search 工具后prompt 包含其描述文本"""
from agentkit.core.react import ReActEngine
web_search = FakeTool(
name="web_search",
description="搜索互联网信息,获取实时数据、新闻、趋势等",
)
gateway = make_mock_gateway([])
engine = ReActEngine(llm_gateway=gateway)
prompt = engine._build_tool_use_prompt([web_search])
# web_search 不是 core tool作为 extended tool 渲染
# extended tool 渲染格式: "- name: first_line_of_description"
assert "web_search" in prompt
assert "搜索互联网信息" in prompt
def test_new_rule_1_present_with_tools(self):
"""有工具注册时prompt 仍包含新规则 1"""
from agentkit.core.react import ReActEngine
web_search = FakeTool(
name="web_search",
description="搜索互联网信息",
)
gateway = make_mock_gateway([])
engine = ReActEngine(llm_gateway=gateway)
prompt = engine._build_tool_use_prompt([web_search])
assert "1. 涉及外部信息、实时数据、多步骤分析或你不确定的事实时必须使用工具" in prompt
assert "如果不需要工具就能回答,直接回答即可" not in prompt