fischer-agentkit/tests/unit/tools/test_computer_use.py

"""ComputerUseTool / ComputerUseSession / ComputerUseRecorder 单元测试

测试场景：
- 截屏并识别 UI 元素 → 返回可操作区域列表
- 点击指定坐标 → 操作成功
- 输入文本到输入框 → 操作成功
- 多步骤 UI 操作 → 每步根据结果决定下一步
- API 不可用时降级到 ShellTool → 正确降级
- Covers AE2: Computer Use 失败 → 降级到 OA 系统 API
- 操作录制回放 → 可回放操作序列
"""

from __future__ import annotations

import json
import tempfile
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, patch

import pytest

from agentkit.tools.computer_use import ComputerUseTool
from agentkit.tools.computer_use_session import (
    ComputerUseSession,
    InMemoryComputerUseSession,
    DockerComputerUseSession,
    ComputerUseSessionManager,
    ActionResult,
    ScreenInfo,
)
from agentkit.tools.computer_use_recorder import ComputerUseRecorder, ActionRecord


# ============================================================
# ActionResult 测试
# ============================================================


class TestActionResult:
    """测试 ActionResult 数据类"""

    def test_success_result(self):
        result = ActionResult(success=True, action="click", output="Clicked at (100, 200)")
        assert result.success is True
        assert result.action == "click"
        assert result.output == "Clicked at (100, 200)"
        assert result.error == ""
        assert result.screenshot_base64 == ""

    def test_failure_result(self):
        result = ActionResult(success=False, action="click", error="Session not started")
        assert result.success is False
        assert result.error == "Session not started"

    def test_result_with_metadata(self):
        result = ActionResult(
            success=True,
            action="screenshot",
            metadata={"screen_state": {"cursor": (0, 0)}},
        )
        assert result.metadata["screen_state"]["cursor"] == (0, 0)


# ============================================================
# ScreenInfo 测试
# ============================================================


class TestScreenInfo:
    """测试 ScreenInfo 数据类"""

    def test_default_screen(self):
        screen = ScreenInfo()
        assert screen.width == 1280
        assert screen.height == 720

    def test_custom_screen(self):
        screen = ScreenInfo(width=1920, height=1080)
        assert screen.width == 1920
        assert screen.height == 1080


# ============================================================
# InMemoryComputerUseSession 测试
# ============================================================


class TestInMemoryComputerUseSession:
    """测试 InMemoryComputerUseSession 内存模拟会话"""

    def test_construction_default(self):
        session = InMemoryComputerUseSession()
        assert session.session_id is not None
        assert session.screen.width == 1280
        assert session.screen.height == 720
        assert session.is_started is False

    def test_construction_custom(self):
        session = InMemoryComputerUseSession(
            session_id="test-123",
            screen_width=1920,
            screen_height=1080,
        )
        assert session.session_id == "test-123"
        assert session.screen.width == 1920

    @pytest.mark.asyncio
    async def test_start_stop(self):
        session = InMemoryComputerUseSession()
        assert session.is_started is False
        await session.start()
        assert session.is_started is True
        await session.stop()
        assert session.is_started is False

    @pytest.mark.asyncio
    async def test_screenshot_not_started(self):
        """未启动时截屏失败"""
        session = InMemoryComputerUseSession()
        result = await session.screenshot()
        assert result.success is False
        assert "not started" in result.error

    @pytest.mark.asyncio
    async def test_screenshot_started(self):
        """启动后截屏成功"""
        session = InMemoryComputerUseSession()
        await session.start()
        result = await session.screenshot()
        assert result.success is True
        assert result.action == "screenshot"
        assert "1280x720" in result.output

    @pytest.mark.asyncio
    async def test_click_action(self):
        """点击操作"""
        session = InMemoryComputerUseSession()
        await session.start()
        result = await session.execute_action("click", x=100, y=200)
        assert result.success is True
        assert "(100, 200)" in result.output

    @pytest.mark.asyncio
    async def test_type_action(self):
        """输入文本操作"""
        session = InMemoryComputerUseSession()
        await session.start()
        result = await session.execute_action("type", text="hello world")
        assert result.success is True
        assert "hello world" in result.output

    @pytest.mark.asyncio
    async def test_scroll_action(self):
        """滚动操作"""
        session = InMemoryComputerUseSession()
        await session.start()
        result = await session.execute_action("scroll", direction="down", amount=5)
        assert result.success is True
        assert "down" in result.output
        assert "5" in result.output

    @pytest.mark.asyncio
    async def test_drag_action(self):
        """拖拽操作"""
        session = InMemoryComputerUseSession()
        await session.start()
        result = await session.execute_action(
            "drag", start_x=10, start_y=20, end_x=100, end_y=200
        )
        assert result.success is True
        assert "(10,20)" in result.output
        assert "(100,200)" in result.output

    @pytest.mark.asyncio
    async def test_key_action(self):
        """按键操作"""
        session = InMemoryComputerUseSession()
        await session.start()
        result = await session.execute_action("key", key_name="Enter")
        assert result.success is True
        assert "Enter" in result.output

    @pytest.mark.asyncio
    async def test_wait_action(self):
        """等待操作"""
        session = InMemoryComputerUseSession()
        await session.start()
        result = await session.execute_action("wait", duration=2.0)
        assert result.success is True
        assert "2.0" in result.output

    @pytest.mark.asyncio
    async def test_unknown_action(self):
        """未知操作类型"""
        session = InMemoryComputerUseSession()
        await session.start()
        result = await session.execute_action("unknown_action")
        assert result.success is False
        assert "Unknown" in result.error

    @pytest.mark.asyncio
    async def test_action_not_started(self):
        """未启动时执行操作失败"""
        session = InMemoryComputerUseSession()
        result = await session.execute_action("click", x=0, y=0)
        assert result.success is False
        assert "not started" in result.error

    @pytest.mark.asyncio
    async def test_action_history(self):
        """操作历史记录"""
        session = InMemoryComputerUseSession()
        await session.start()
        await session.execute_action("click", x=10, y=20)
        await session.execute_action("type", text="test")
        assert len(session.action_history) == 2
        assert session.action_history[0]["action"] == "click"
        assert session.action_history[1]["action"] == "type"

    @pytest.mark.asyncio
    async def test_action_history_is_copy(self):
        """操作历史返回副本"""
        session = InMemoryComputerUseSession()
        await session.start()
        await session.execute_action("click", x=0, y=0)
        history = session.action_history
        history.clear()
        assert len(session.action_history) == 1

    def test_repr(self):
        session = InMemoryComputerUseSession(session_id="test-123")
        r = repr(session)
        assert "InMemory" in r
        assert "stopped" in r


# ============================================================
# DockerComputerUseSession 测试
# ============================================================


class TestDockerComputerUseSession:
    """测试 DockerComputerUseSession（stub 实现）"""

    def test_construction(self):
        session = DockerComputerUseSession(
            session_id="docker-1",
            container_image="anthropic/computer-use-demo:latest",
        )
        assert session.session_id == "docker-1"
        assert session.container_id is None

    @pytest.mark.asyncio
    async def test_start_stop(self):
        session = DockerComputerUseSession(session_id="docker-1")
        await session.start()
        assert session.is_started is True
        assert session.container_id is not None
        await session.stop()
        assert session.is_started is False
        assert session.container_id is None

    @pytest.mark.asyncio
    async def test_screenshot_not_started(self):
        session = DockerComputerUseSession(session_id="docker-1")
        result = await session.screenshot()
        assert result.success is False

    @pytest.mark.asyncio
    async def test_screenshot_started(self):
        session = DockerComputerUseSession(session_id="docker-1")
        await session.start()
        result = await session.screenshot()
        assert result.success is True

    @pytest.mark.asyncio
    async def test_execute_action_not_started(self):
        session = DockerComputerUseSession(session_id="docker-1")
        result = await session.execute_action("click", x=0, y=0)
        assert result.success is False

    @pytest.mark.asyncio
    async def test_execute_action_started(self):
        session = DockerComputerUseSession(session_id="docker-1")
        await session.start()
        result = await session.execute_action("click", x=100, y=200)
        assert result.success is True


# ============================================================
# ComputerUseSessionManager 测试
# ============================================================


class TestComputerUseSessionManager:
    """测试 ComputerUseSessionManager 会话管理"""

    def test_get_or_create_new(self):
        manager = ComputerUseSessionManager()
        session = manager.get_or_create("s1")
        assert session.session_id == "s1"

    def test_get_or_create_existing(self):
        manager = ComputerUseSessionManager()
        s1 = manager.get_or_create("s1")
        s2 = manager.get_or_create("s1")
        assert s1 is s2

    def test_get_existing(self):
        manager = ComputerUseSessionManager()
        manager.get_or_create("s1")
        session = manager.get("s1")
        assert session is not None

    def test_get_nonexistent(self):
        manager = ComputerUseSessionManager()
        assert manager.get("nonexistent") is None

    def test_remove(self):
        manager = ComputerUseSessionManager()
        manager.get_or_create("s1")
        manager.remove("s1")
        assert manager.get("s1") is None

    def test_list_sessions(self):
        manager = ComputerUseSessionManager()
        manager.get_or_create("s1")
        manager.get_or_create("s2")
        assert sorted(manager.list_sessions()) == ["s1", "s2"]

    def test_has_session(self):
        manager = ComputerUseSessionManager()
        manager.get_or_create("s1")
        assert manager.has_session("s1") is True
        assert manager.has_session("s2") is False

    def test_max_sessions_eviction(self):
        manager = ComputerUseSessionManager(max_sessions=2)
        manager.get_or_create("s1")
        manager.get_or_create("s2")
        manager.get_or_create("s3")
        assert not manager.has_session("s1")
        assert manager.has_session("s2")
        assert manager.has_session("s3")

    @pytest.mark.asyncio
    async def test_close_all(self):
        manager = ComputerUseSessionManager()
        s1 = manager.get_or_create("s1")
        s2 = manager.get_or_create("s2")
        await s1.start()
        await s2.start()
        await manager.close_all()
        assert manager.list_sessions() == []
        assert s1.is_started is False
        assert s2.is_started is False

    def test_custom_session_factory(self):
        manager = ComputerUseSessionManager(
            session_factory=DockerComputerUseSession,
        )
        session = manager.get_or_create("docker-1")
        assert isinstance(session, DockerComputerUseSession)


# ============================================================
# ComputerUseRecorder 测试
# ============================================================


class TestActionRecord:
    """测试 ActionRecord 数据类"""

    def test_to_dict(self):
        record = ActionRecord(
            timestamp=1000.0,
            action="click",
            params={"x": 100, "y": 200},
            success=True,
            output="Clicked at (100, 200)",
        )
        d = record.to_dict()
        assert d["action"] == "click"
        assert d["params"]["x"] == 100
        assert d["success"] is True

    def test_from_dict(self):
        data = {
            "timestamp": 1000.0,
            "action": "type",
            "params": {"text": "hello"},
            "success": True,
            "output": "Typed: hello",
            "error": "",
            "screenshot_path": "",
        }
        record = ActionRecord.from_dict(data)
        assert record.action == "type"
        assert record.params["text"] == "hello"

    def test_roundtrip(self):
        record = ActionRecord(
            timestamp=1000.0,
            action="click",
            params={"x": 50, "y": 60},
            success=False,
            error="Timeout",
        )
        d = record.to_dict()
        restored = ActionRecord.from_dict(d)
        assert restored.action == record.action
        assert restored.success == record.success
        assert restored.error == record.error


class TestComputerUseRecorder:
    """测试 ComputerUseRecorder 操作录制器"""

    def test_record_action(self):
        recorder = ComputerUseRecorder()
        result = ActionResult(success=True, action="click", output="Clicked")
        record = recorder.record("click", {"x": 100, "y": 200}, result)
        assert record.action == "click"
        assert record.success is True

    def test_get_records(self):
        recorder = ComputerUseRecorder()
        r1 = ActionResult(success=True, action="click", output="ok")
        r2 = ActionResult(success=True, action="type", output="ok")
        recorder.record("click", {"x": 1, "y": 2}, r1)
        recorder.record("type", {"text": "hi"}, r2)
        records = recorder.get_records()
        assert len(records) == 2

    def test_get_records_is_copy(self):
        recorder = ComputerUseRecorder()
        result = ActionResult(success=True, action="click", output="ok")
        recorder.record("click", {"x": 1, "y": 2}, result)
        records = recorder.get_records()
        records.clear()
        assert len(recorder.get_records()) == 1

    def test_get_records_by_action(self):
        recorder = ComputerUseRecorder()
        r1 = ActionResult(success=True, action="click", output="ok")
        r2 = ActionResult(success=True, action="type", output="ok")
        r3 = ActionResult(success=True, action="click", output="ok")
        recorder.record("click", {"x": 1, "y": 2}, r1)
        recorder.record("type", {"text": "hi"}, r2)
        recorder.record("click", {"x": 3, "y": 4}, r3)
        click_records = recorder.get_records_by_action("click")
        assert len(click_records) == 2

    def test_get_failed_records(self):
        recorder = ComputerUseRecorder()
        r1 = ActionResult(success=True, action="click", output="ok")
        r2 = ActionResult(success=False, action="type", error="failed")
        recorder.record("click", {"x": 1, "y": 2}, r1)
        recorder.record("type", {"text": "hi"}, r2)
        failed = recorder.get_failed_records()
        assert len(failed) == 1
        assert failed[0].action == "type"

    def test_total_actions(self):
        recorder = ComputerUseRecorder()
        assert recorder.total_actions == 0
        result = ActionResult(success=True, action="click", output="ok")
        recorder.record("click", {"x": 1, "y": 2}, result)
        assert recorder.total_actions == 1

    def test_success_failure_counts(self):
        recorder = ComputerUseRecorder()
        r1 = ActionResult(success=True, action="click", output="ok")
        r2 = ActionResult(success=False, action="type", error="fail")
        r3 = ActionResult(success=True, action="scroll", output="ok")
        recorder.record("click", {}, r1)
        recorder.record("type", {}, r2)
        recorder.record("scroll", {}, r3)
        assert recorder.success_count == 2
        assert recorder.failure_count == 1

    def test_summary(self):
        recorder = ComputerUseRecorder()
        r1 = ActionResult(success=True, action="click", output="ok")
        r2 = ActionResult(success=False, action="type", error="fail")
        recorder.record("click", {}, r1)
        recorder.record("type", {}, r2)
        s = recorder.summary()
        assert s["total_actions"] == 2
        assert s["success_count"] == 1
        assert s["failure_count"] == 1
        assert "click" in s["action_types"]
        assert "type" in s["action_types"]

    def test_clear(self):
        recorder = ComputerUseRecorder()
        result = ActionResult(success=True, action="click", output="ok")
        recorder.record("click", {}, result)
        recorder.clear()
        assert recorder.total_actions == 0

    def test_save_and_load_recording(self):
        recorder = ComputerUseRecorder()
        r1 = ActionResult(success=True, action="click", output="Clicked at (10, 20)")
        r2 = ActionResult(success=True, action="type", output="Typed: hello")
        recorder.record("click", {"x": 10, "y": 20}, r1)
        recorder.record("type", {"text": "hello"}, r2)

        with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
            path = f.name

        try:
            recorder.save_recording(path)

            # 加载到新录制器
            recorder2 = ComputerUseRecorder()
            recorder2.load_recording(path)
            assert recorder2.total_actions == 2
            records = recorder2.get_records()
            assert records[0].action == "click"
            assert records[1].action == "type"
        finally:
            Path(path).unlink(missing_ok=True)

    def test_load_nonexistent_file(self):
        recorder = ComputerUseRecorder()
        with pytest.raises(FileNotFoundError):
            recorder.load_recording("/nonexistent/path/recording.json")

    def test_load_invalid_format(self):
        recorder = ComputerUseRecorder()
        with tempfile.NamedTemporaryFile(
            suffix=".json", delete=False, mode="w"
        ) as f:
            json.dump({"invalid": True}, f)
            path = f.name

        try:
            with pytest.raises(ValueError, match="Invalid recording format"):
                recorder.load_recording(path)
        finally:
            Path(path).unlink(missing_ok=True)

    @pytest.mark.asyncio
    async def test_replay(self):
        """回放操作序列"""
        recorder = ComputerUseRecorder()
        r1 = ActionResult(success=True, action="click", output="Clicked")
        r2 = ActionResult(success=True, action="type", output="Typed")
        recorder.record("click", {"x": 10, "y": 20}, r1)
        recorder.record("type", {"text": "hello"}, r2)

        session = InMemoryComputerUseSession(session_id="replay-test")
        results = await recorder.replay(session)
        assert len(results) == 2
        assert results[0].success is True
        assert results[1].success is True
        assert session.is_started is True

    @pytest.mark.asyncio
    async def test_replay_with_screenshot(self):
        """回放包含截屏的操作序列"""
        recorder = ComputerUseRecorder()
        r1 = ActionResult(success=True, action="screenshot", output="Screen captured")
        recorder.record("screenshot", {}, r1)

        session = InMemoryComputerUseSession(session_id="replay-screenshot")
        results = await recorder.replay(session)
        assert len(results) == 1
        assert results[0].success is True


# ============================================================
# ComputerUseTool 构造测试
# ============================================================


class TestComputerUseToolConstruction:
    """测试 ComputerUseTool 构造"""

    def test_default_construction(self):
        tool = ComputerUseTool()
        assert tool.name == "computer_use"
        assert tool.input_schema is not None
        assert "action" in tool.input_schema["properties"]
        assert tool.input_schema["required"] == ["action"]

    def test_custom_construction(self):
        tool = ComputerUseTool(name="my_cu", version="2.0.0")
        assert tool.name == "my_cu"
        assert tool.version == "2.0.0"

    def test_to_dict(self):
        tool = ComputerUseTool()
        d = tool.to_dict()
        assert d["name"] == "computer_use"
        assert "input_schema" in d

    def test_repr(self):
        tool = ComputerUseTool()
        r = repr(tool)
        assert "ComputerUseTool" in r
        assert "computer_use" in r

    def test_session_manager_accessible(self):
        tool = ComputerUseTool()
        assert tool.session_manager is not None

    def test_recorder_accessible(self):
        tool = ComputerUseTool()
        assert tool.recorder is not None

    def test_custom_recorder(self):
        recorder = ComputerUseRecorder()
        tool = ComputerUseTool(recorder=recorder)
        assert tool.recorder is recorder


# ============================================================
# ComputerUseTool 执行测试
# ============================================================


class TestComputerUseToolExecution:
    """测试 ComputerUseTool 操作执行"""

    @pytest.mark.asyncio
    async def test_screenshot_action(self):
        """截屏并识别 UI 元素"""
        tool = ComputerUseTool()
        result = await tool.execute(action="screenshot")
        assert result["success"] is True
        assert result["action"] == "screenshot"
        assert "output" in result

    @pytest.mark.asyncio
    async def test_click_action(self):
        """点击指定坐标"""
        tool = ComputerUseTool()
        result = await tool.execute(action="click", x=100, y=200)
        assert result["success"] is True
        assert result["action"] == "click"
        assert "(100, 200)" in result["output"]

    @pytest.mark.asyncio
    async def test_type_action(self):
        """输入文本到输入框"""
        tool = ComputerUseTool()
        result = await tool.execute(action="type", text="hello world")
        assert result["success"] is True
        assert result["action"] == "type"
        assert "hello world" in result["output"]

    @pytest.mark.asyncio
    async def test_scroll_action(self):
        """滚动操作"""
        tool = ComputerUseTool()
        result = await tool.execute(action="scroll", direction="down", amount=5)
        assert result["success"] is True
        assert "down" in result["output"]

    @pytest.mark.asyncio
    async def test_drag_action(self):
        """拖拽操作"""
        tool = ComputerUseTool()
        result = await tool.execute(
            action="drag",
            start_x=10, start_y=20,
            end_x=100, end_y=200,
        )
        assert result["success"] is True
        assert result["action"] == "drag"

    @pytest.mark.asyncio
    async def test_key_action(self):
        """按键操作"""
        tool = ComputerUseTool()
        result = await tool.execute(action="key", key_name="Enter")
        assert result["success"] is True
        assert "Enter" in result["output"]

    @pytest.mark.asyncio
    async def test_wait_action(self):
        """等待操作"""
        tool = ComputerUseTool()
        result = await tool.execute(action="wait", duration=0.1)
        assert result["success"] is True

    @pytest.mark.asyncio
    async def test_missing_action(self):
        """缺少 action 参数"""
        tool = ComputerUseTool()
        result = await tool.execute()
        assert result["success"] is False
        assert "action" in result["error"]

    @pytest.mark.asyncio
    async def test_invalid_action(self):
        """无效操作类型"""
        tool = ComputerUseTool()
        result = await tool.execute(action="invalid_action")
        assert result["success"] is False
        assert "无效" in result["error"]

    @pytest.mark.asyncio
    async def test_click_missing_coordinates(self):
        """click 缺少坐标参数"""
        tool = ComputerUseTool()
        result = await tool.execute(action="click")
        assert result["success"] is False
        assert "x" in result["error"]

    @pytest.mark.asyncio
    async def test_type_missing_text(self):
        """type 缺少文本参数"""
        tool = ComputerUseTool()
        result = await tool.execute(action="type")
        assert result["success"] is False
        assert "text" in result["error"]

    @pytest.mark.asyncio
    async def test_key_missing_key_name(self):
        """key 缺少按键名称参数"""
        tool = ComputerUseTool()
        result = await tool.execute(action="key")
        assert result["success"] is False
        assert "key_name" in result["error"]

    @pytest.mark.asyncio
    async def test_drag_missing_params(self):
        """drag 缺少参数"""
        tool = ComputerUseTool()
        result = await tool.execute(action="drag", start_x=0, start_y=0)
        assert result["success"] is False
        assert "end_x" in result["error"]

    @pytest.mark.asyncio
    async def test_session_id(self):
        """指定会话 ID"""
        tool = ComputerUseTool()
        result = await tool.execute(action="screenshot", session_id="my-session")
        assert result["success"] is True
        assert result["session_id"] == "my-session"

    @pytest.mark.asyncio
    async def test_session_reuse(self):
        """同一会话 ID 复用会话"""
        tool = ComputerUseTool()
        r1 = await tool.execute(action="click", x=10, y=20, session_id="reuse-test")
        r2 = await tool.execute(action="type", text="hello", session_id="reuse-test")
        assert r1["session_id"] == "reuse-test"
        assert r2["session_id"] == "reuse-test"
        assert tool.session_manager.has_session("reuse-test")


# ============================================================
# ComputerUseTool 多步骤 UI 操作测试
# ============================================================


class TestComputerUseToolMultiStep:
    """测试多步骤 UI 操作"""

    @pytest.mark.asyncio
    async def test_multi_step_ui_operation(self):
        """多步骤 UI 操作：截屏→点击→输入→按键"""
        tool = ComputerUseTool()
        session_id = "multi-step"

        # Step 1: 截屏识别
        r1 = await tool.execute(action="screenshot", session_id=session_id)
        assert r1["success"] is True

        # Step 2: 点击输入框
        r2 = await tool.execute(action="click", x=100, y=200, session_id=session_id)
        assert r2["success"] is True

        # Step 3: 输入文本
        r3 = await tool.execute(action="type", text="test input", session_id=session_id)
        assert r3["success"] is True

        # Step 4: 按回车提交
        r4 = await tool.execute(action="key", key_name="Enter", session_id=session_id)
        assert r4["success"] is True

        # 验证录制
        assert tool.recorder.total_actions == 4
        assert tool.recorder.success_count == 4

    @pytest.mark.asyncio
    async def test_each_step_informs_next(self):
        """每步根据结果决定下一步"""
        tool = ComputerUseTool()
        session_id = "adaptive"

        # Step 1: 截屏
        r1 = await tool.execute(action="screenshot", session_id=session_id)
        assert r1["success"] is True

        # 根据截屏结果决定下一步（模拟决策逻辑）
        if r1["success"]:
            r2 = await tool.execute(action="click", x=50, y=50, session_id=session_id)
        else:
            r2 = await tool.execute(action="wait", duration=1.0, session_id=session_id)

        assert r2["success"] is True


# ============================================================
# ComputerUseTool 降级测试
# ============================================================


class TestComputerUseToolFallback:
    """测试 ComputerUseTool 降级链"""

    @pytest.mark.asyncio
    async def test_fallback_without_api_key(self):
        """无 API Key 时降级到 Session 本地执行"""
        tool = ComputerUseTool()  # 无 api_key
        result = await tool.execute(action="click", x=100, y=200)
        assert result["success"] is True  # InMemory session 可以执行

    @pytest.mark.asyncio
    async def test_api_failure_fallback_to_session(self):
        """API 调用失败时降级到 Session 本地执行"""
        tool = ComputerUseTool(api_key="sk-test-key")

        with patch.object(
            tool, "_call_anthropic_api",
            new_callable=AsyncMock,
            side_effect=Exception("API connection failed"),
        ):
            result = await tool.execute(action="click", x=100, y=200)
            assert result["success"] is True  # 降级到 InMemory session

    @pytest.mark.asyncio
    async def test_api_and_session_failure_fallback_suggestion(self):
        """API 和 Session 都失败时返回降级建议"""
        tool = ComputerUseTool(api_key="sk-test-key")

        # Mock API 失败
        with patch.object(
            tool, "_call_anthropic_api",
            new_callable=AsyncMock,
            return_value=ActionResult(success=False, action="click", error="API error"),
        ):
            # Mock Session 也失败
            mock_session = AsyncMock(spec=ComputerUseSession)
            mock_session.session_id = "fallback-test"
            mock_session.screen = ScreenInfo()
            mock_session.is_started = True
            mock_session.screenshot.return_value = ActionResult(
                success=True, action="screenshot", screenshot_base64=""
            )
            mock_session.execute_action.return_value = ActionResult(
                success=False, action="click", error="Session error"
            )

            # 直接注入 mock session
            tool._session_manager._sessions["fallback-test"] = mock_session

            result = await tool.execute(
                action="click", x=100, y=200, session_id="fallback-test"
            )
            assert result["success"] is False
            assert "fallback_suggestion" in result

    @pytest.mark.asyncio
    async def test_custom_fallback_callback(self):
        """自定义降级回调"""
        async def fallback(action: str, params: dict) -> dict:
            return {"success": True, "output": f"Fallback executed: {action}"}

        tool = ComputerUseTool(fallback_callback=fallback)

        # Mock API 和 Session 都失败
        with patch.object(
            tool, "_call_anthropic_api",
            new_callable=AsyncMock,
            side_effect=Exception("API failed"),
        ):
            mock_session = AsyncMock(spec=ComputerUseSession)
            mock_session.session_id = "cb-test"
            mock_session.screen = ScreenInfo()
            mock_session.is_started = True
            mock_session.screenshot.return_value = ActionResult(
                success=True, action="screenshot", screenshot_base64=""
            )
            mock_session.execute_action.return_value = ActionResult(
                success=False, action="click", error="Session error"
            )
            tool._session_manager._sessions["cb-test"] = mock_session

            result = await tool.execute(
                action="click", x=100, y=200, session_id="cb-test"
            )
            assert result["success"] is True
            assert "Fallback" in result["output"]

    @pytest.mark.asyncio
    async def test_ae2_computer_use_fallback_to_oa_api(self):
        """AE2: Computer Use 失败 → 降级到 OA 系统 API

        模拟场景：Computer Use 无法操作 OA 系统 UI，
        降级到 OA 系统 API 完成操作。
        """
        oa_api_called = False

        async def oa_api_fallback(action: str, params: dict) -> dict:
            nonlocal oa_api_called
            oa_api_called = True
            return {
                "success": True,
                "output": f"OA API completed: {action} with {params}",
            }

        tool = ComputerUseTool(
            api_key="sk-test-key",
            fallback_callback=oa_api_fallback,
        )

        # Mock API 和 Session 都失败
        with patch.object(
            tool, "_call_anthropic_api",
            new_callable=AsyncMock,
            side_effect=Exception("API unavailable"),
        ):
            mock_session = AsyncMock(spec=ComputerUseSession)
            mock_session.session_id = "oa-test"
            mock_session.screen = ScreenInfo()
            mock_session.is_started = True
            mock_session.screenshot.return_value = ActionResult(
                success=True, action="screenshot", screenshot_base64=""
            )
            mock_session.execute_action.return_value = ActionResult(
                success=False, action="click", error="UI not accessible"
            )
            tool._session_manager._sessions["oa-test"] = mock_session

            result = await tool.execute(
                action="click", x=100, y=200, session_id="oa-test"
            )
            assert result["success"] is True
            assert oa_api_called is True


# ============================================================
# ComputerUseTool 录制集成测试
# ============================================================


class TestComputerUseToolRecording:
    """测试 ComputerUseTool 与 Recorder 的集成"""

    @pytest.mark.asyncio
    async def test_actions_recorded(self):
        """操作自动录制"""
        tool = ComputerUseTool()
        await tool.execute(action="click", x=10, y=20)
        await tool.execute(action="type", text="hello")
        assert tool.recorder.total_actions == 2

    @pytest.mark.asyncio
    async def test_recording_save_and_replay(self):
        """录制保存和回放"""
        tool = ComputerUseTool()
        await tool.execute(action="click", x=10, y=20, session_id="rec-1")
        await tool.execute(action="type", text="hello", session_id="rec-1")

        with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
            path = f.name

        try:
            tool.recorder.save_recording(path)

            # 加载到新录制器并回放
            recorder2 = ComputerUseRecorder()
            recorder2.load_recording(path)
            assert recorder2.total_actions == 2

            session = InMemoryComputerUseSession(session_id="replay-1")
            results = await recorder2.replay(session)
            assert len(results) == 2
            assert all(r.success for r in results)
        finally:
            Path(path).unlink(missing_ok=True)

    @pytest.mark.asyncio
    async def test_recording_summary(self):
        """录制摘要"""
        tool = ComputerUseTool()
        await tool.execute(action="click", x=10, y=20)
        await tool.execute(action="type", text="hello")
        summary = tool.recorder.summary()
        assert summary["total_actions"] == 2
        assert summary["success_count"] == 2
        assert summary["failure_count"] == 0


# ============================================================
# ComputerUseTool API 调用测试（Mock httpx）
# ============================================================


class TestComputerUseToolAPICall:
    """测试 ComputerUseTool Anthropic API 调用（Mock）"""

    @pytest.mark.asyncio
    async def test_api_call_success(self):
        """API 调用成功"""
        tool = ComputerUseTool(api_key="sk-test-key")

        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.json.return_value = {
            "content": [
                {
                    "type": "tool_use",
                    "name": "computer",
                    "input": {"action": "click"},
                }
            ]
        }

        with patch("httpx.AsyncClient") as mock_client_cls:
            mock_client = AsyncMock()
            mock_client.post.return_value = mock_response
            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
            mock_client.__aexit__ = AsyncMock(return_value=False)
            mock_client_cls.return_value = mock_client

            result = await tool.execute(action="click", x=100, y=200)
            assert result["success"] is True

    @pytest.mark.asyncio
    async def test_api_call_http_error(self):
        """API 调用 HTTP 错误"""
        tool = ComputerUseTool(api_key="sk-test-key")

        mock_response = MagicMock()
        mock_response.status_code = 429
        mock_response.text = "Rate limited"

        with patch("httpx.AsyncClient") as mock_client_cls:
            mock_client = AsyncMock()
            mock_client.post.return_value = mock_response
            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
            mock_client.__aexit__ = AsyncMock(return_value=False)
            mock_client_cls.return_value = mock_client

            # API 返回错误，降级到 session 本地执行
            result = await tool.execute(action="click", x=100, y=200)
            assert result["success"] is True  # 降级成功

    @pytest.mark.asyncio
    async def test_api_call_network_error(self):
        """API 调用网络错误"""
        tool = ComputerUseTool(api_key="sk-test-key")

        with patch("httpx.AsyncClient") as mock_client_cls:
            mock_client = AsyncMock()
            mock_client.post.side_effect = Exception("Connection refused")
            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
            mock_client.__aexit__ = AsyncMock(return_value=False)
            mock_client_cls.return_value = mock_client

            # 网络错误，降级到 session 本地执行
            result = await tool.execute(action="click", x=100, y=200)
            assert result["success"] is True  # 降级成功