fischer-agentkit/tests/unit/test_react_engine.py

"""ReAct Engine 单元测试 - TDD 第一步"""

import json
from unittest.mock import AsyncMock, MagicMock

import pytest

from agentkit.llm.gateway import LLMGateway
from agentkit.llm.protocol import LLMResponse, TokenUsage, ToolCall
from agentkit.tools.base import Tool


# ── Test Helpers ──────────────────────────────────────────


class FakeTool(Tool):
    """用于测试的 Fake Tool"""

    def __init__(
        self,
        name: str = "fake_tool",
        description: str = "A fake tool for testing",
        result: dict | None = None,
        should_fail: bool = False,
    ):
        super().__init__(name=name, description=description)
        self._result = result or {"status": "ok"}
        self._should_fail = should_fail
        self.call_count = 0
        self.last_kwargs: dict | None = None

    async def execute(self, **kwargs) -> dict:
        self.call_count += 1
        self.last_kwargs = kwargs
        if self._should_fail:
            raise RuntimeError(f"Tool '{self.name}' execution failed")
        return self._result


def make_mock_gateway(responses: list[LLMResponse]) -> LLMGateway:
    """创建一个 mock LLMGateway，按顺序返回给定响应"""
    gateway = MagicMock(spec=LLMGateway)
    gateway.chat = AsyncMock(side_effect=responses)
    return gateway


def make_response(
    content: str = "",
    tool_calls: list[ToolCall] | None = None,
    prompt_tokens: int = 10,
    completion_tokens: int = 20,
) -> LLMResponse:
    """快速构造 LLMResponse"""
    return LLMResponse(
        content=content,
        model="test-model",
        usage=TokenUsage(
            prompt_tokens=prompt_tokens,
            completion_tokens=completion_tokens,
        ),
        tool_calls=tool_calls or [],
    )


# ── Test Classes ──────────────────────────────────────────


class TestReActStepSingleCompletion:
    """单步完成：LLM 直接返回最终答案，无工具调用"""

    async def test_single_step_returns_final_answer(self):
        from agentkit.core.react import ReActEngine, ReActResult

        gateway = make_mock_gateway([
            make_response(content="The answer is 42"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "What is the answer?"}],
        )

        assert isinstance(result, ReActResult)
        assert result.output == "The answer is 42"
        assert result.total_steps == 1
        assert len(result.trajectory) == 1
        assert result.trajectory[0].action == "final_answer"
        assert result.trajectory[0].content == "The answer is 42"


class TestReActTwoStepCompletion:
    """两步完成：LLM 先调用工具，然后返回最终答案"""

    async def test_two_step_with_tool_call(self):
        from agentkit.core.react import ReActEngine, ReActResult

        tool = FakeTool(name="calculator", result={"value": 42})
        gateway = make_mock_gateway([
            make_response(
                content="",
                tool_calls=[ToolCall(id="tc_1", name="calculator", arguments={"expr": "6*7"})],
            ),
            make_response(content="The result is 42"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Calculate 6*7"}],
            tools=[tool],
        )

        assert result.output == "The result is 42"
        assert result.total_steps == 2
        assert len(result.trajectory) == 2
        # Step 1: tool call
        assert result.trajectory[0].action == "tool_call"
        assert result.trajectory[0].tool_name == "calculator"
        assert result.trajectory[0].arguments == {"expr": "6*7"}
        assert result.trajectory[0].result == {"value": 42}
        # Step 2: final answer
        assert result.trajectory[1].action == "final_answer"
        assert result.trajectory[1].content == "The result is 42"


class TestReActMultiStep:
    """多步推理：3 步 ReAct 循环，每步调用不同工具"""

    async def test_three_step_react_loop(self):
        from agentkit.core.react import ReActEngine

        search_tool = FakeTool(name="search", result={"results": ["Python is great"]})
        calc_tool = FakeTool(name="calculator", result={"value": 100})

        gateway = make_mock_gateway([
            make_response(
                content="",
                tool_calls=[ToolCall(id="tc_1", name="search", arguments={"query": "Python"})],
            ),
            make_response(
                content="",
                tool_calls=[ToolCall(id="tc_2", name="calculator", arguments={"expr": "10*10"})],
            ),
            make_response(content="Based on search and calculation, the answer is 100"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Search and calculate"}],
            tools=[search_tool, calc_tool],
        )

        assert result.total_steps == 3
        assert result.trajectory[0].tool_name == "search"
        assert result.trajectory[1].tool_name == "calculator"
        assert result.trajectory[2].action == "final_answer"
        assert search_tool.call_count == 1
        assert calc_tool.call_count == 1


class TestReActMaxSteps:
    """达到最大步数时返回当前最佳结果"""

    async def test_max_steps_returns_current_best(self):
        from agentkit.core.react import ReActEngine

        tool = FakeTool(name="search", result={"results": ["data"]})

        # LLM 一直返回 tool_calls（参数递增以避免循环检测），不会给出 final answer
        responses = [
            make_response(
                content="Thinking...",
                tool_calls=[ToolCall(id=f"tc_{i}", name="search", arguments={"query": f"attempt_{i}"})],
            )
            for i in range(20)
        ]
        gateway = make_mock_gateway(responses)
        engine = ReActEngine(llm_gateway=gateway, max_steps=3)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Keep searching"}],
            tools=[tool],
        )

        assert result.total_steps == 3
        # 当达到 max_steps 时，应返回最后一步的内容
        assert result.output is not None


class TestReActToolCallFailure:
    """工具调用失败：LLM 收到错误信息并调整策略"""

    async def test_tool_failure_included_in_observation(self):
        from agentkit.core.react import ReActEngine

        failing_tool = FakeTool(name="broken_tool", should_fail=True)
        gateway = make_mock_gateway([
            make_response(
                content="",
                tool_calls=[ToolCall(id="tc_1", name="broken_tool", arguments={})],
            ),
            make_response(content="The tool failed, but here is my best answer"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Use the broken tool"}],
            tools=[failing_tool],
        )

        assert result.total_steps == 2
        # 第一步 tool_call 应记录错误信息
        assert result.trajectory[0].action == "tool_call"
        assert result.trajectory[0].result is not None
        # 错误信息应包含在结果中
        assert "error" in str(result.trajectory[0].result).lower() or "failed" in str(result.trajectory[0].result).lower()
        # 第二步 LLM 调整策略给出最终答案
        assert result.trajectory[1].action == "final_answer"
        assert result.output == "The tool failed, but here is my best answer"


class TestReActFunctionCallingMode:
    """Function Calling 模式：LLM 返回 tool_calls"""

    async def test_function_calling_tool_execution(self):
        from agentkit.core.react import ReActEngine

        tool = FakeTool(name="weather", result={"temp": 25, "city": "Shanghai"})
        gateway = make_mock_gateway([
            make_response(
                content="",
                tool_calls=[ToolCall(id="tc_1", name="weather", arguments={"city": "Shanghai"})],
            ),
            make_response(content="Shanghai temperature is 25°C"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "What's the weather?"}],
            tools=[tool],
        )

        assert result.trajectory[0].tool_name == "weather"
        assert result.trajectory[0].result == {"temp": 25, "city": "Shanghai"}
        # 验证 gateway.chat 被调用时传入了 tools 参数
        first_call = gateway.chat.call_args_list[0]
        assert first_call.kwargs.get("tools") is not None or first_call[1].get("tools") is not None


class TestReActTextParsingMode:
    """文本解析模式：LLM 返回包含工具调用模式的文本"""

    async def test_text_parsing_with_action_pattern(self):
        from agentkit.core.react import ReActEngine

        tool = FakeTool(name="search", result={"results": ["found"]})
        # LLM 返回文本中包含 Action 模式
        gateway = make_mock_gateway([
            make_response(content='Action: search({"query": "test"})'),
            make_response(content="Here is what I found"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Search for test"}],
            tools=[tool],
        )

        # 文本解析模式应能识别 Action 模式并执行工具
        assert result.total_steps == 2
        assert result.trajectory[0].action == "tool_call"
        assert result.trajectory[0].tool_name == "search"

    async def test_text_parsing_with_code_block_pattern(self):
        from agentkit.core.react import ReActEngine

        tool = FakeTool(name="search", result={"results": ["found"]})
        tool_call_text = '```tool\n{"name": "search", "arguments": {"query": "test"}}\n```'
        gateway = make_mock_gateway([
            make_response(content=tool_call_text),
            make_response(content="Search results found"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Search for test"}],
            tools=[tool],
        )

        assert result.total_steps == 2
        assert result.trajectory[0].action == "tool_call"
        assert result.trajectory[0].tool_name == "search"


class TestReActEmptyToolList:
    """空工具列表：直接生成答案"""

    async def test_no_tools_direct_answer(self):
        from agentkit.core.react import ReActEngine

        gateway = make_mock_gateway([
            make_response(content="Direct answer without tools"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Hello"}],
            tools=None,
        )

        assert result.output == "Direct answer without tools"
        assert result.total_steps == 1
        assert result.trajectory[0].action == "final_answer"


class TestReActTrajectoryRecording:
    """轨迹记录：每步的 action、tool_name、result 正确记录"""

    async def test_trajectory_records_all_steps(self):
        from agentkit.core.react import ReActEngine, ReActStep

        tool_a = FakeTool(name="tool_a", result={"a": 1})
        tool_b = FakeTool(name="tool_b", result={"b": 2})

        gateway = make_mock_gateway([
            make_response(
                content="Step 1",
                tool_calls=[ToolCall(id="tc_1", name="tool_a", arguments={"x": 1})],
            ),
            make_response(
                content="Step 2",
                tool_calls=[ToolCall(id="tc_2", name="tool_b", arguments={"y": 2})],
            ),
            make_response(content="Final answer"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Multi-step task"}],
            tools=[tool_a, tool_b],
        )

        assert len(result.trajectory) == 3

        step1 = result.trajectory[0]
        assert isinstance(step1, ReActStep)
        assert step1.step == 1
        assert step1.action == "tool_call"
        assert step1.tool_name == "tool_a"
        assert step1.arguments == {"x": 1}
        assert step1.result == {"a": 1}

        step2 = result.trajectory[1]
        assert step2.step == 2
        assert step2.action == "tool_call"
        assert step2.tool_name == "tool_b"
        assert step2.arguments == {"y": 2}
        assert step2.result == {"b": 2}

        step3 = result.trajectory[2]
        assert step3.step == 3
        assert step3.action == "final_answer"
        assert step3.content == "Final answer"


class TestReActTokenAccumulation:
    """Token 累积：所有步骤的 token 数应累加"""

    async def test_total_tokens_accumulated(self):
        from agentkit.core.react import ReActEngine

        tool = FakeTool(name="search", result={"results": ["data"]})
        gateway = make_mock_gateway([
            make_response(
                content="",
                tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})],
                prompt_tokens=100,
                completion_tokens=50,
            ),
            make_response(
                content="Final answer",
                prompt_tokens=200,
                completion_tokens=30,
            ),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Search"}],
            tools=[tool],
        )

        # 100+50 + 200+30 = 380
        assert result.total_tokens == 380
        # 每步的 tokens 也应记录
        assert result.trajectory[0].tokens == 150
        assert result.trajectory[1].tokens == 230


class TestReActSystemPrompt:
    """System prompt 包含在初始消息中"""

    async def test_system_prompt_included(self):
        from agentkit.core.react import ReActEngine

        gateway = make_mock_gateway([
            make_response(content="Response"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        await engine.execute(
            messages=[{"role": "user", "content": "Hello"}],
            system_prompt="You are a helpful assistant",
        )

        # 验证第一次调用 gateway.chat 时 messages 包含 system prompt
        first_call = gateway.chat.call_args_list[0]
        call_kwargs = first_call.kwargs
        messages = call_kwargs.get("messages", first_call[1].get("messages", []))
        assert messages[0]["role"] == "system"
        assert messages[0]["content"] == "You are a helpful assistant"


class TestReActMultipleToolCallsInOneStep:
    """单步多个工具调用：LLM 在一次响应中返回多个 tool_calls"""

    async def test_multiple_tool_calls_executed(self):
        from agentkit.core.react import ReActEngine

        tool_a = FakeTool(name="tool_a", result={"a": 1})
        tool_b = FakeTool(name="tool_b", result={"b": 2})

        gateway = make_mock_gateway([
            make_response(
                content="",
                tool_calls=[
                    ToolCall(id="tc_1", name="tool_a", arguments={"x": 1}),
                    ToolCall(id="tc_2", name="tool_b", arguments={"y": 2}),
                ],
            ),
            make_response(content="Both tools executed"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Run both tools"}],
            tools=[tool_a, tool_b],
        )

        # 两个工具都应被执行
        assert tool_a.call_count == 1
        assert tool_b.call_count == 1
        assert result.output == "Both tools executed"


class TestReActToolNotFound:
    """工具未找到：LLM 调用了不存在的工具"""

    async def test_unknown_tool_returns_error_observation(self):
        from agentkit.core.react import ReActEngine

        gateway = make_mock_gateway([
            make_response(
                content="",
                tool_calls=[ToolCall(id="tc_1", name="nonexistent_tool", arguments={})],
            ),
            make_response(content="Tool not found, here is my answer anyway"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Use unknown tool"}],
            tools=[],  # 空工具列表
        )

        # 第一步应记录工具未找到错误
        assert result.trajectory[0].action == "tool_call"
        assert "error" in str(result.trajectory[0].result).lower() or "not found" in str(result.trajectory[0].result).lower()
        # LLM 应收到错误信息并调整
        assert result.total_steps == 2
        assert result.output == "Tool not found, here is my answer anyway"


class TestReActTimeout:
    """ReAct 循环超时：超过 timeout_seconds 后抛出 TaskTimeoutError"""

    async def test_timeout_raises_task_timeout_error(self):
        import asyncio
        from agentkit.core.react import ReActEngine
        from agentkit.core.exceptions import TaskTimeoutError

        # LLM 每次调用延迟 0.5s，设置 0.3s 超时
        async def slow_chat(**kwargs):
            await asyncio.sleep(0.5)
            return make_response(content="slow response")

        gateway = MagicMock(spec=LLMGateway)
        gateway.chat = AsyncMock(side_effect=slow_chat)
        engine = ReActEngine(llm_gateway=gateway)

        with pytest.raises(TaskTimeoutError):
            await engine.execute(
                messages=[{"role": "user", "content": "Slow task"}],
                timeout_seconds=0.3,
            )

    async def test_timeout_zero_means_no_timeout(self):
        import asyncio
        from agentkit.core.react import ReActEngine

        # LLM 延迟 0.1s，timeout=0 表示无超时
        async def slightly_slow_chat(**kwargs):
            await asyncio.sleep(0.1)
            return make_response(content="done")

        gateway = MagicMock(spec=LLMGateway)
        gateway.chat = AsyncMock(side_effect=slightly_slow_chat)
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Task"}],
            timeout_seconds=0,
        )
        assert result.output == "done"
        assert result.status == "success"

    async def test_default_timeout_used_when_none(self):
        import asyncio
        from agentkit.core.react import ReActEngine
        from agentkit.core.exceptions import TaskTimeoutError

        async def slow_chat(**kwargs):
            await asyncio.sleep(0.5)
            return make_response(content="slow")

        gateway = MagicMock(spec=LLMGateway)
        gateway.chat = AsyncMock(side_effect=slow_chat)
        # default_timeout=0.3s
        engine = ReActEngine(llm_gateway=gateway, default_timeout=0.3)

        with pytest.raises(TaskTimeoutError):
            await engine.execute(
                messages=[{"role": "user", "content": "Task"}],
                timeout_seconds=None,  # should use default_timeout
            )

    async def test_normal_completion_unaffected_by_timeout(self):
        from agentkit.core.react import ReActEngine

        gateway = make_mock_gateway([
            make_response(content="Quick answer"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Quick task"}],
            timeout_seconds=300,
        )
        assert result.output == "Quick answer"
        assert result.status == "success"


class TestReActCancellation:
    """ReAct 循环取消：CancellationToken 取消后抛出 TaskCancelledError"""

    async def test_cancel_raises_task_cancelled_error(self):
        import asyncio
        from agentkit.core.react import ReActEngine
        from agentkit.core.protocol import CancellationToken
        from agentkit.core.exceptions import TaskCancelledError

        call_count = 0

        async def counting_chat(**kwargs):
            nonlocal call_count
            call_count += 1
            if call_count >= 2:
                # Simulate cancel after second LLM call
                pass
            return make_response(content="response")

        gateway = MagicMock(spec=LLMGateway)
        gateway.chat = AsyncMock(side_effect=counting_chat)
        engine = ReActEngine(llm_gateway=gateway)

        token = CancellationToken()
        # Cancel before execution starts
        token.cancel()

        with pytest.raises(TaskCancelledError):
            await engine.execute(
                messages=[{"role": "user", "content": "Task"}],
                cancellation_token=token,
            )

    async def test_cancel_mid_execution(self):
        import asyncio
        from agentkit.core.react import ReActEngine
        from agentkit.core.protocol import CancellationToken
        from agentkit.core.exceptions import TaskCancelledError

        token = CancellationToken()
        call_count = 0

        async def chat_with_cancel(**kwargs):
            nonlocal call_count
            call_count += 1
            # Cancel after first call
            if call_count >= 1:
                token.cancel()
            # First call returns tool call, second would be final
            return make_response(
                content="",
                tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})],
            )

        tool = FakeTool(name="search", result={"results": ["data"]})
        gateway = MagicMock(spec=LLMGateway)
        gateway.chat = AsyncMock(side_effect=chat_with_cancel)
        engine = ReActEngine(llm_gateway=gateway)

        with pytest.raises(TaskCancelledError):
            await engine.execute(
                messages=[{"role": "user", "content": "Search"}],
                tools=[tool],
                cancellation_token=token,
            )

    async def test_no_cancel_token_works_normally(self):
        from agentkit.core.react import ReActEngine

        gateway = make_mock_gateway([
            make_response(content="Normal answer"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Normal task"}],
            # No cancellation_token
        )
        assert result.output == "Normal answer"
        assert result.status == "success"

    async def test_uncancelled_token_works_normally(self):
        from agentkit.core.react import ReActEngine
        from agentkit.core.protocol import CancellationToken

        gateway = make_mock_gateway([
            make_response(content="Answer"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        token = CancellationToken()  # Not cancelled
        result = await engine.execute(
            messages=[{"role": "user", "content": "Task"}],
            cancellation_token=token,
        )
        assert result.output == "Answer"
        assert result.status == "success"


class TestLoopDetection:
    """循环检测：ReAct 循环内滑动窗口 hash 检测重复工具调用"""

    async def test_normal_different_tools_no_detection(self):
        """不同工具调用不触发检测"""
        from agentkit.core.react import ReActEngine

        tool1 = FakeTool(name="search", result={"results": ["a"]})
        tool2 = FakeTool(name="calculator", result={"value": 42})
        gateway = make_mock_gateway([
            make_response(
                tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})],
            ),
            make_response(
                tool_calls=[ToolCall(id="tc_2", name="calculator", arguments={"expr": "6*7"})],
            ),
            make_response(content="Done"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Search and calculate"}],
            tools=[tool1, tool2],
        )
        assert result.status == "success"
        assert result.total_steps == 3

    async def test_same_tool_different_args_no_detection(self):
        """相同工具不同参数不触发检测"""
        from agentkit.core.react import ReActEngine

        tool = FakeTool(name="search", result={"results": []})
        gateway = make_mock_gateway([
            make_response(
                tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "hello"})],
            ),
            make_response(
                tool_calls=[ToolCall(id="tc_2", name="search", arguments={"q": "world"})],
            ),
            make_response(content="Done"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Search twice"}],
            tools=[tool],
        )
        assert result.status == "success"
        assert result.total_steps == 3

    async def test_loop_detected_injects_correction_then_raises(self):
        """连续重复调用相同工具+参数：第一次注入纠正，第二次抛 LoopDetectedError"""
        from agentkit.core.react import ReActEngine
        from agentkit.core.exceptions import LoopDetectedError

        tool = FakeTool(name="search", result={"results": []})
        # Step 1: tool call (executed, window=[hash])
        # Step 2: same tool call (detected, correction injected, continue)
        # Step 3: same tool call again (detected, already corrected → raise)
        gateway = make_mock_gateway([
            make_response(
                tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})],
            ),
            make_response(
                tool_calls=[ToolCall(id="tc_2", name="search", arguments={"q": "test"})],
            ),
            make_response(
                tool_calls=[ToolCall(id="tc_3", name="search", arguments={"q": "test"})],
            ),
        ])
        engine = ReActEngine(llm_gateway=gateway, max_steps=10)

        with pytest.raises(LoopDetectedError) as exc_info:
            await engine.execute(
                messages=[{"role": "user", "content": "Search"}],
                tools=[tool],
            )
        assert "search" in str(exc_info.value)

    async def test_loop_correction_allows_recovery(self):
        """循环检测注入纠正后，LLM 改变策略则正常完成"""
        from agentkit.core.react import ReActEngine

        tool = FakeTool(name="search", result={"results": []})
        # Step 1: tool call (executed)
        # Step 2: same tool call (detected, correction injected)
        # Step 3: LLM changes strategy → final answer
        gateway = make_mock_gateway([
            make_response(
                tool_calls=[ToolCall(id="tc_1", name="search", arguments={"q": "test"})],
            ),
            make_response(
                tool_calls=[ToolCall(id="tc_2", name="search", arguments={"q": "test"})],
            ),
            make_response(content="I found the answer after changing strategy"),
        ])
        engine = ReActEngine(llm_gateway=gateway, max_steps=10)

        result = await engine.execute(
            messages=[{"role": "user", "content": "Search"}],
            tools=[tool],
        )
        assert result.status == "success"
        assert "changing strategy" in result.output

    async def test_reset_clears_loop_state(self):
        """reset() 清除循环检测状态"""
        from agentkit.core.react import ReActEngine

        gateway = make_mock_gateway([make_response(content="Done")])
        engine = ReActEngine(llm_gateway=gateway)
        engine._loop_window.append("some_hash")
        engine._loop_corrected = True

        engine.reset()

        assert len(engine._loop_window) == 0
        assert engine._loop_corrected is False


# ── U3: Headroom 压缩测试 ─────────────────────────────────


def _make_messages(token_count: int) -> list[dict]:
    """Create messages with approximately the given token count (4 chars = 1 token)."""
    char_count = token_count * 4
    return [{"role": "user", "content": "x" * char_count}]


class TestHeadroomCompression:
    """U3: 主动压缩触发 — 基于 token 用量预测主动触发压缩"""

    def test_headroom_triggers_when_ratio_exceeds_threshold(self):
        """Happy path: 110K tokens, model_limit 128K → 0.86 > 0.8 → 触发"""
        from agentkit.core.compressor import ContextCompressor

        compressor = ContextCompressor(model_context_limit=128_000)
        messages = _make_messages(110_000)

        assert compressor.should_compress(messages) is True

    def test_headroom_does_not_trigger_below_min_tokens(self):
        """Edge case: 5K tokens, model_limit 128K → 不触发（低于 min_tokens 8000）"""
        from agentkit.core.compressor import ContextCompressor

        compressor = ContextCompressor(model_context_limit=128_000)
        messages = _make_messages(5_000)

        assert compressor.should_compress(messages) is False

    def test_headroom_triggers_for_small_model(self):
        """Edge case: model_limit 8K, conversation 7K → 0.875 > 0.8 → 触发"""
        from agentkit.core.compressor import ContextCompressor

        compressor = ContextCompressor(model_context_limit=8_000)
        messages = _make_messages(7_000)

        assert compressor.should_compress(messages) is True

    def test_react_skips_compression_when_unavailable(self):
        """Error path: 压缩器 is_available()=False → 跳过压缩"""
        from agentkit.core.react import ReActEngine

        gateway = make_mock_gateway([make_response(content="Done")])
        engine = ReActEngine(llm_gateway=gateway)

        compressor = MagicMock()
        compressor.is_available.return_value = False
        compressor.should_compress = MagicMock(return_value=True)

        result = engine._should_compress(
            [{"role": "user", "content": "x" * 100000}], compressor
        )

        assert result is False

    def test_react_delegates_to_compressor_should_compress(self):
        """ReActEngine._should_compress delegates to compressor.should_compress"""
        from agentkit.core.react import ReActEngine

        gateway = make_mock_gateway([make_response(content="Done")])
        engine = ReActEngine(llm_gateway=gateway)

        compressor = ContextCompressorStub(available=True, compress=True)
        result = engine._should_compress([{"role": "user", "content": "test"}], compressor)
        assert result is True

        compressor = ContextCompressorStub(available=True, compress=False)
        result = engine._should_compress([{"role": "user", "content": "test"}], compressor)
        assert result is False

    def test_react_fallback_for_compressors_without_should_compress(self):
        """Fallback: compressors without should_compress use fixed threshold"""
        from agentkit.core.react import ReActEngine

        gateway = make_mock_gateway([make_response(content="Done")])
        engine = ReActEngine(llm_gateway=gateway)

        # Compressor with is_available but no should_compress method
        compressor = MagicMock()
        compressor.is_available.return_value = True
        # Remove should_compress attribute to test fallback
        del compressor.should_compress

        # Below threshold → no compression
        small_msgs = _make_messages(4_000)
        assert engine._should_compress(small_msgs, compressor) is False

        # Above threshold → compression
        large_msgs = _make_messages(10_000)
        assert engine._should_compress(large_msgs, compressor) is True


class ContextCompressorStub:
    """Stub compressor for testing _should_compress delegation."""

    def __init__(self, available: bool, compress: bool):
        self._available = available
        self._compress = compress

    def is_available(self) -> bool:
        return self._available

    def should_compress(self, messages: list[dict]) -> bool:
        return self._compress


class TestParseMalformedToolUse:
    """畸形 <tool_use> 文本的容错解析"""

    def test_missing_closing_tag_still_parses(self):
        """模型输出 <tool_use> 但没有 </tool_use> 闭合标签"""
        from agentkit.core.react import ReActEngine

        engine = ReActEngine(llm_gateway=MagicMock(spec=LLMGateway))
        content = (
            '<tool_use>\n'
            '{"name": "shell", "arguments": {"command": "ls -la"}}\n'
        )
        calls = engine._parse_text_tool_calls(content)
        assert len(calls) == 1
        assert calls[0]["name"] == "shell"
        assert calls[0]["arguments"]["command"] == "ls -la"

    def test_malformed_json_with_stray_tags(self):
        """JSON 中混入 <parameter> 等标签时仍能提取工具名和参数"""
        from agentkit.core.react import ReActEngine

        engine = ReActEngine(llm_gateway=MagicMock(spec=LLMGateway))
        content = (
            '<tool_use>\n'
            '{"name": "shell", "arguments": {"command": "sudo chown -R $USER /tmp"}}\n'
            '</parameter>\n'
            '<parameter=timeout>30</parameter>\n'
            '<function>\n'
        )
        calls = engine._parse_text_tool_calls(content)
        assert len(calls) == 1
        assert calls[0]["name"] == "shell"
        assert "chown" in calls[0]["arguments"]["command"]

    def test_truncated_json_still_extracts_name(self):
        """JSON 被截断时仍能提取工具名"""
        from agentkit.core.react import ReActEngine

        engine = ReActEngine(llm_gateway=MagicMock(spec=LLMGateway))
        content = '<tool_use>\n{"name": "web_search", "arguments": {"query": "test"'
        calls = engine._parse_text_tool_calls(content)
        assert len(calls) == 1
        assert calls[0]["name"] == "web_search"

    def test_completely_unparseable_tool_use_returns_empty(self):
        """完全无法解析的 <tool_use> 返回空列表"""
        from agentkit.core.react import ReActEngine

        engine = ReActEngine(llm_gateway=MagicMock(spec=LLMGateway))
        content = '<tool_use>\ngarbage not json at all\n'
        calls = engine._parse_text_tool_calls(content)
        assert calls == []


class TestMalformedToolUseNotLeakedAsFinalAnswer:
    """畸形 <tool_use> 不应作为 final_answer 泄漏给用户"""

    async def test_malformed_tool_use_triggers_correction_not_leak(self):
        """模型输出畸形 <tool_use> 时，不应把原始 XML 作为最终答案返回"""
        from agentkit.core.react import ReActEngine

        tool = FakeTool(name="shell", result={"output": "done", "exit_code": 0})
        malformed_content = (
            '<tool_use>\n'
            '{"name": "shell", "arguments": {"command": "ls"}}\n'
            '</parameter>\n<function>\n'
        )
        gateway = make_mock_gateway([
            make_response(content=malformed_content),
            make_response(content="Done successfully"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "list files"}],
            tools=[tool],
        )

        # 不应把原始 XML 作为最终答案
        assert "<tool_use>" not in result.output
        assert "</parameter>" not in result.output
        assert "<function>" not in result.output

    async def test_completely_unparseable_tool_use_injects_correction(self):
        """<tool_use> 完全无法解析时，注入纠正消息让模型重试"""
        from agentkit.core.react import ReActEngine

        tool = FakeTool(name="search", result={"results": ["data"]})
        gateway = make_mock_gateway([
            # 第一次：完全无法解析的 <tool_use>
            make_response(content="<tool_use>\nnot json at all just words\n"),
            # 第二次：模型纠正后正常回答
            make_response(content="Search completed"),
        ])
        engine = ReActEngine(llm_gateway=gateway)

        result = await engine.execute(
            messages=[{"role": "user", "content": "search something"}],
            tools=[tool],
        )

        # 不应把原始 XML 作为最终答案
        assert "<tool_use>" not in result.output
        assert result.output == "Search completed"


class TestReActToolUsePromptRules:
    """_build_tool_use_prompt 规则文本断言（U4 / Bug 2 L0）"""

    def test_new_rule_1_present_at_top(self):
        """新规则 1 '涉及外部信息...' 出现在规则列表头部"""
        from agentkit.core.react import ReActEngine

        gateway = make_mock_gateway([])
        engine = ReActEngine(llm_gateway=gateway)
        prompt = engine._build_tool_use_prompt([])

        assert "1. 涉及外部信息、实时数据、多步骤分析或你不确定的事实时必须使用工具" in prompt

    def test_old_rule_3_absent(self):
        """旧规则 3 '如果不需要工具就能回答，直接回答即可' 不再出现"""
        from agentkit.core.react import ReActEngine

        gateway = make_mock_gateway([])
        engine = ReActEngine(llm_gateway=gateway)
        prompt = engine._build_tool_use_prompt([])

        assert "如果不需要工具就能回答，直接回答即可" not in prompt

    def test_rules_in_correct_order(self):
        """规则序号 1-5 按预期顺序排列"""
        from agentkit.core.react import ReActEngine

        gateway = make_mock_gateway([])
        engine = ReActEngine(llm_gateway=gateway)
        prompt = engine._build_tool_use_prompt([])

        # 规则 1 在规则 2 之前，规则 2 在规则 3 之前，以此类推
        r1 = prompt.index("1. 涉及外部信息")
        r2 = prompt.index("2. 每次只调用一个工具")
        r3 = prompt.index("3. 等待工具返回结果")
        r4 = prompt.index("4. 仅在确实无需工具时")
        r5 = prompt.index("5. 不要在回答中重复工具的输出")
        assert r1 < r2 < r3 < r4 < r5

    def test_tool_use_xml_format_preserved(self):
        """<tool_use> XML 格式示例保持向后兼容"""
        from agentkit.core.react import ReActEngine

        gateway = make_mock_gateway([])
        engine = ReActEngine(llm_gateway=gateway)
        prompt = engine._build_tool_use_prompt([])

        assert "<tool_use>" in prompt
        assert "</tool_use>" in prompt


class TestBug2L0PromptRules:
    """Bug 2 L0 端到端验证：_build_tool_use_prompt 包含工具描述 + 新规则

    Bug 2 状态：hypothesis applied, pending L4 verification（非 fixed）。
    L0 仅做文本断言，真实 LLM smoke test 在 L1/L2 独立 plan 中执行。
    """

    def test_web_search_description_in_prompt(self):
        """注册 web_search 工具后，prompt 包含其描述文本"""
        from agentkit.core.react import ReActEngine

        web_search = FakeTool(
            name="web_search",
            description="搜索互联网信息，获取实时数据、新闻、趋势等",
        )
        gateway = make_mock_gateway([])
        engine = ReActEngine(llm_gateway=gateway)
        prompt = engine._build_tool_use_prompt([web_search])

        # web_search 不是 core tool，作为 extended tool 渲染
        # extended tool 渲染格式: "- name: first_line_of_description"
        assert "web_search" in prompt
        assert "搜索互联网信息" in prompt

    def test_new_rule_1_present_with_tools(self):
        """有工具注册时，prompt 仍包含新规则 1"""
        from agentkit.core.react import ReActEngine

        web_search = FakeTool(
            name="web_search",
            description="搜索互联网信息",
        )
        gateway = make_mock_gateway([])
        engine = ReActEngine(llm_gateway=gateway)
        prompt = engine._build_tool_use_prompt([web_search])

        assert "1. 涉及外部信息、实时数据、多步骤分析或你不确定的事实时必须使用工具" in prompt
        assert "如果不需要工具就能回答，直接回答即可" not in prompt