fischer-agentkit/tests/unit/test_react_engine.py

"""ReAct Engine 单元测试 - TDD 第一步"""

import json
from unittest.mock import AsyncMock, MagicMock

import pytest

from agentkit.llm.gateway import LLMGateway
from agentkit.llm.protocol import LLMResponse, TokenUsage, ToolCall
from agentkit.tools.base import Tool


# ── Test Helpers ──────────────────────────────────────────


class FakeTool(Tool):
    """用于测试的 Fake Tool"""

    def __init__(
        self,
        name: str = "fake_tool",
        description: str = "A fake tool for testing",
        result: dict | None = None,
        should_fail: bool = False,
    ):
        super().__init__(name=name, description=description)
        self._result = result or {"status": "ok"}
        self._should_fail = should_fail
        self.call_count = 0
        self.last_kwargs: dict | None = None

    async def execute(self, **kwargs) -> dict:
        self.call_count += 1
        self.last_kwargs = kwargs
        if self._should_fail:
            raise RuntimeError(f"Tool '{self.name}' execution failed")
        return self._result


def make_mock_gateway(responses: list[LLMResponse]) -> LLMGateway:
    """创建一个 mock LLMGateway，按顺序返回给定响应"""
    gateway = MagicMock(spec=LLMGateway)
    gateway.chat = AsyncMock(side_effect=responses)
    return gateway


def make_response(
    content: str = "",
    tool_calls: list[ToolCall] | None = None,
    prompt_tokens: int = 10,
    completion_tokens: int = 20,
) -> LLMResponse:
    """快速构造 LLMResponse"""
    return LLMResponse(
        content=content,
        model="test-model",
        usage=TokenUsage(
            prompt_tokens=prompt_tokens,
            completion_tokens=completion_tokens,
        ),
        tool_calls=tool_calls or [],
    )


# ── Test Classes ──────────────────────────────────────────


class TestReActStepSingleCompletion:
    """单步完成：LLM 直接返回最终答案，无工具调用"""

    async def test_single_step_returns_final_answer(self):
        from agentkit.core.react import ReActEngine, ReActResult

        gateway = make_mock_gateway([
            make_response(content="The answer is 42"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "What is the answer?"}],
        )

        assert isinstance(result, ReActResult)
        assert result.output == "The answer is 42"
        assert result.total_steps == 1
        assert len(result.trajectory) == 1
        assert result.trajectory[0].action == "final_answer"
        assert result.trajectory[0].content == "The answer is 42"


class TestReActTwoStepCompletion:
    """两步完成：LLM 先调用工具，然后返回最终答案"""

    async def test_two_step_with_tool_call(self):
        from agentkit.core.react import ReActEngine, ReActResult

        tool = FakeTool(name="calculator", result={"value": 42})
        gateway = make_mock_gateway([
            make_response(
                content="",
                tool_calls=[ToolCall(id="tc_1", name="calculator", arguments={"expr": "6*7"})],
            ),
            make_response(content="The result is 42"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Calculate 6*7"}],
            tools=[tool],
        )

        assert result.output == "The result is 42"
        assert result.total_steps == 2
        assert len(result.trajectory) == 2
        # Step 1: tool call
        assert result.trajectory[0].action == "tool_call"
        assert result.trajectory[0].tool_name == "calculator"
        assert result.trajectory[0].arguments == {"expr": "6*7"}
        assert result.trajectory[0].result == {"value": 42}
        # Step 2: final answer
        assert result.trajectory[1].action == "final_answer"
        assert result.trajectory[1].content == "The result is 42"


class TestReActMultiStep:
    """多步推理：3 步 ReAct 循环，每步调用不同工具"""

    async def test_three_step_react_loop(self):
        from agentkit.core.react import ReActEngine

        search_tool = FakeTool(name="search", result={"results": ["Python is great"]})
        calc_tool = FakeTool(name="calculator", result={"value": 100})

        gateway = make_mock_gateway([
            make_response(
                content="",
                tool_calls=[ToolCall(id="tc_1", name="search", arguments={"query": "Python"})],
            ),
            make_response(
                content="",
                tool_calls=[ToolCall(id="tc_2", name="calculator", arguments={"expr": "10*10"})],
            ),
            make_response(content="Based on search and calculation, the answer is 100"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Search and calculate"}],
            tools=[search_tool, calc_tool],
        )

        assert result.total_steps == 3
        assert result.trajectory[0].tool_name == "search"
        assert result.trajectory[1].tool_name == "calculator"
        assert result.trajectory[2].action == "final_answer"
        assert search_tool.call_count == 1
        assert calc_tool.call_count == 1


class TestReActMaxSteps:
    """达到最大步数时返回当前最佳结果"""

    async def test_max_steps_returns_current_best(self):
        from agentkit.core.react import ReActEngine

        tool = FakeTool(name="search", result={"results": ["data"]})

        # LLM 一直返回 tool_calls，不会给出 final answer
        always_tool_response = make_response(
            content="Thinking...",
            tool_calls=[ToolCall(id="tc_loop", name="search", arguments={"query": "more"})],
        )
        gateway = make_mock_gateway([always_tool_response] * 20)
        engine = ReActEngine(llm_gateway=gateway, max_steps=3)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Keep searching"}],
            tools=[tool],
        )

        assert result.total_steps == 3
        # 当达到 max_steps 时，应返回最后一步的内容
        assert result.output is not None


class TestReActToolCallFailure:
    """工具调用失败：LLM 收到错误信息并调整策略"""

    async def test_tool_failure_included_in_observation(self):
        from agentkit.core.react import ReActEngine

        failing_tool = FakeTool(name="broken_tool", should_fail=True)
        gateway = make_mock_gateway([
            make_response(
                content="",
                tool_calls=[ToolCall(id="tc_1", name="broken_tool", arguments={})],
            ),
            make_response(content="The tool failed, but here is my best answer"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Use the broken tool"}],
            tools=[failing_tool],
        )

        assert result.total_steps == 2
        # 第一步 tool_call 应记录错误信息
        assert result.trajectory[0].action == "tool_call"
        assert result.trajectory[0].result is not None
        # 错误信息应包含在结果中
        assert "error" in str(result.trajectory[0].result).lower() or "failed" in str(result.trajectory[0].result).lower()
        # 第二步 LLM 调整策略给出最终答案
        assert result.trajectory[1].action == "final_answer"
        assert result.output == "The tool failed, but here is my best answer"


class TestReActFunctionCallingMode:
    """Function Calling 模式：LLM 返回 tool_calls"""

    async def test_function_calling_tool_execution(self):
        from agentkit.core.react import ReActEngine

        tool = FakeTool(name="weather", result={"temp": 25, "city": "Shanghai"})
        gateway = make_mock_gateway([
            make_response(
                content="",
                tool_calls=[ToolCall(id="tc_1", name="weather", arguments={"city": "Shanghai"})],
            ),
            make_response(content="Shanghai temperature is 25°C"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "What's the weather?"}],
            tools=[tool],
        )

        assert result.trajectory[0].tool_name == "weather"
        assert result.trajectory[0].result == {"temp": 25, "city": "Shanghai"}
        # 验证 gateway.chat 被调用时传入了 tools 参数
        first_call = gateway.chat.call_args_list[0]
        assert first_call.kwargs.get("tools") is not None or first_call[1].get("tools") is not None


class TestReActTextParsingMode:
    """文本解析模式：LLM 返回包含工具调用模式的文本"""

    async def test_text_parsing_with_action_pattern(self):
        from agentkit.core.react import ReActEngine

        tool = FakeTool(name="search", result={"results": ["found"]})
        # LLM 返回文本中包含 Action 模式
        gateway = make_mock_gateway([
            make_response(content='Action: search({"query": "test"})'),
            make_response(content="Here is what I found"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Search for test"}],
            tools=[tool],
        )

        # 文本解析模式应能识别 Action 模式并执行工具
        assert result.total_steps == 2
        assert result.trajectory[0].action == "tool_call"
        assert result.trajectory[0].tool_name == "search"

    async def test_text_parsing_with_code_block_pattern(self):
        from agentkit.core.react import ReActEngine

        tool = FakeTool(name="search", result={"results": ["found"]})
        tool_call_text = '```tool\n{"name": "search", "arguments": {"query": "test"}}\n```'
        gateway = make_mock_gateway([
            make_response(content=tool_call_text),
            make_response(content="Search results found"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Search for test"}],
            tools=[tool],
        )

        assert result.total_steps == 2
        assert result.trajectory[0].action == "tool_call"
        assert result.trajectory[0].tool_name == "search"


class TestReActEmptyToolList:
    """空工具列表：直接生成答案"""

    async def test_no_tools_direct_answer(self):
        from agentkit.core.react import ReActEngine

        gateway = make_mock_gateway([
            make_response(content="Direct answer without tools"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Hello"}],
            tools=None,
        )

        assert result.output == "Direct answer without tools"
        assert result.total_steps == 1
        assert result.trajectory[0].action == "final_answer"


class TestReActTrajectoryRecording:
    """轨迹记录：每步的 action、tool_name、result 正确记录"""

    async def test_trajectory_records_all_steps(self):
        from agentkit.core.react import ReActEngine, ReActStep

        tool_a = FakeTool(name="tool_a", result={"a": 1})
        tool_b = FakeTool(name="tool_b", result={"b": 2})

        gateway = make_mock_gateway([
            make_response(
                content="Step 1",
                tool_calls=[ToolCall(id="tc_1", name="tool_a", arguments={"x": 1})],
            ),
            make_response(
                content="Step 2",
                tool_calls=[ToolCall(id="tc_2", name="tool_b", arguments={"y": 2})],
            ),
            make_response(content="Final answer"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Multi-step task"}],
            tools=[tool_a, tool_b],
        )

        assert len(result.trajectory) == 3

        step1 = result.trajectory[0]
        assert isinstance(step1, ReActStep)
        assert step1.step == 1
        assert step1.action == "tool_call"
        assert step1.tool_name == "tool_a"
        assert step1.arguments == {"x": 1}
        assert step1.result == {"a": 1}

        step2 = result.trajectory[1]
        assert step2.step == 2
        assert step2.action == "tool_call"
        assert step2.tool_name == "tool_b"
        assert step2.arguments == {"y": 2}
        assert step2.result == {"b": 2}

        step3 = result.trajectory[2]
        assert step3.step == 3
        assert step3.action == "final_answer"
        assert step3.content == "Final answer"


class TestReActTokenAccumulation:
    """Token 累积：所有步骤的 token 数应累加"""

    async def test_total_tokens_accumulated(self):
        from agentkit.core.react import ReActEngine

        tool = FakeTool(name="search", result={"results": ["data"]})
        gateway = make_mock_gateway([
            make_response(
                content="",
                tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})],
                prompt_tokens=100,
                completion_tokens=50,
            ),
            make_response(
                content="Final answer",
                prompt_tokens=200,
                completion_tokens=30,
            ),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Search"}],
            tools=[tool],
        )

        # 100+50 + 200+30 = 380
        assert result.total_tokens == 380
        # 每步的 tokens 也应记录
        assert result.trajectory[0].tokens == 150
        assert result.trajectory[1].tokens == 230


class TestReActSystemPrompt:
    """System prompt 包含在初始消息中"""

    async def test_system_prompt_included(self):
        from agentkit.core.react import ReActEngine

        gateway = make_mock_gateway([
            make_response(content="Response"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        await engine.execute(
            messages=[{"role": "user", "content": "Hello"}],
            system_prompt="You are a helpful assistant",
        )

        # 验证第一次调用 gateway.chat 时 messages 包含 system prompt
        first_call = gateway.chat.call_args_list[0]
        call_kwargs = first_call.kwargs
        messages = call_kwargs.get("messages", first_call[1].get("messages", []))
        assert messages[0]["role"] == "system"
        assert messages[0]["content"] == "You are a helpful assistant"


class TestReActMultipleToolCallsInOneStep:
    """单步多个工具调用：LLM 在一次响应中返回多个 tool_calls"""

    async def test_multiple_tool_calls_executed(self):
        from agentkit.core.react import ReActEngine

        tool_a = FakeTool(name="tool_a", result={"a": 1})
        tool_b = FakeTool(name="tool_b", result={"b": 2})

        gateway = make_mock_gateway([
            make_response(
                content="",
                tool_calls=[
                    ToolCall(id="tc_1", name="tool_a", arguments={"x": 1}),
                    ToolCall(id="tc_2", name="tool_b", arguments={"y": 2}),
                ],
            ),
            make_response(content="Both tools executed"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Run both tools"}],
            tools=[tool_a, tool_b],
        )

        # 两个工具都应被执行
        assert tool_a.call_count == 1
        assert tool_b.call_count == 1
        assert result.output == "Both tools executed"


class TestReActToolNotFound:
    """工具未找到：LLM 调用了不存在的工具"""

    async def test_unknown_tool_returns_error_observation(self):
        from agentkit.core.react import ReActEngine

        gateway = make_mock_gateway([
            make_response(
                content="",
                tool_calls=[ToolCall(id="tc_1", name="nonexistent_tool", arguments={})],
            ),
            make_response(content="Tool not found, here is my answer anyway"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Use unknown tool"}],
            tools=[],  # 空工具列表
        )

        # 第一步应记录工具未找到错误
        assert result.trajectory[0].action == "tool_call"
        assert "error" in str(result.trajectory[0].result).lower() or "not found" in str(result.trajectory[0].result).lower()
        # LLM 应收到错误信息并调整
        assert result.total_steps == 2
        assert result.output == "Tool not found, here is my answer anyway"