"""ComputerUseTool / ComputerUseSession / ComputerUseRecorder 单元测试 测试场景: - 截屏并识别 UI 元素 → 返回可操作区域列表 - 点击指定坐标 → 操作成功 - 输入文本到输入框 → 操作成功 - 多步骤 UI 操作 → 每步根据结果决定下一步 - API 不可用时降级到 ShellTool → 正确降级 - Covers AE2: Computer Use 失败 → 降级到 OA 系统 API - 操作录制回放 → 可回放操作序列 """ from __future__ import annotations import json import tempfile from pathlib import Path from unittest.mock import AsyncMock, MagicMock, patch import pytest from agentkit.tools.computer_use import ComputerUseTool from agentkit.tools.computer_use_session import ( ComputerUseSession, InMemoryComputerUseSession, DockerComputerUseSession, ComputerUseSessionManager, ActionResult, ScreenInfo, ) from agentkit.tools.computer_use_recorder import ComputerUseRecorder, ActionRecord # ============================================================ # ActionResult 测试 # ============================================================ class TestActionResult: """测试 ActionResult 数据类""" def test_success_result(self): result = ActionResult(success=True, action="click", output="Clicked at (100, 200)") assert result.success is True assert result.action == "click" assert result.output == "Clicked at (100, 200)" assert result.error == "" assert result.screenshot_base64 == "" def test_failure_result(self): result = ActionResult(success=False, action="click", error="Session not started") assert result.success is False assert result.error == "Session not started" def test_result_with_metadata(self): result = ActionResult( success=True, action="screenshot", metadata={"screen_state": {"cursor": (0, 0)}}, ) assert result.metadata["screen_state"]["cursor"] == (0, 0) # ============================================================ # ScreenInfo 测试 # ============================================================ class TestScreenInfo: """测试 ScreenInfo 数据类""" def test_default_screen(self): screen = ScreenInfo() assert screen.width == 1280 assert screen.height == 720 def test_custom_screen(self): screen = ScreenInfo(width=1920, height=1080) assert screen.width == 1920 assert screen.height == 1080 # ============================================================ # InMemoryComputerUseSession 测试 # ============================================================ class TestInMemoryComputerUseSession: """测试 InMemoryComputerUseSession 内存模拟会话""" def test_construction_default(self): session = InMemoryComputerUseSession() assert session.session_id is not None assert session.screen.width == 1280 assert session.screen.height == 720 assert session.is_started is False def test_construction_custom(self): session = InMemoryComputerUseSession( session_id="test-123", screen_width=1920, screen_height=1080, ) assert session.session_id == "test-123" assert session.screen.width == 1920 @pytest.mark.asyncio async def test_start_stop(self): session = InMemoryComputerUseSession() assert session.is_started is False await session.start() assert session.is_started is True await session.stop() assert session.is_started is False @pytest.mark.asyncio async def test_screenshot_not_started(self): """未启动时截屏失败""" session = InMemoryComputerUseSession() result = await session.screenshot() assert result.success is False assert "not started" in result.error @pytest.mark.asyncio async def test_screenshot_started(self): """启动后截屏成功""" session = InMemoryComputerUseSession() await session.start() result = await session.screenshot() assert result.success is True assert result.action == "screenshot" assert "1280x720" in result.output @pytest.mark.asyncio async def test_click_action(self): """点击操作""" session = InMemoryComputerUseSession() await session.start() result = await session.execute_action("click", x=100, y=200) assert result.success is True assert "(100, 200)" in result.output @pytest.mark.asyncio async def test_type_action(self): """输入文本操作""" session = InMemoryComputerUseSession() await session.start() result = await session.execute_action("type", text="hello world") assert result.success is True assert "hello world" in result.output @pytest.mark.asyncio async def test_scroll_action(self): """滚动操作""" session = InMemoryComputerUseSession() await session.start() result = await session.execute_action("scroll", direction="down", amount=5) assert result.success is True assert "down" in result.output assert "5" in result.output @pytest.mark.asyncio async def test_drag_action(self): """拖拽操作""" session = InMemoryComputerUseSession() await session.start() result = await session.execute_action( "drag", start_x=10, start_y=20, end_x=100, end_y=200 ) assert result.success is True assert "(10,20)" in result.output assert "(100,200)" in result.output @pytest.mark.asyncio async def test_key_action(self): """按键操作""" session = InMemoryComputerUseSession() await session.start() result = await session.execute_action("key", key_name="Enter") assert result.success is True assert "Enter" in result.output @pytest.mark.asyncio async def test_wait_action(self): """等待操作""" session = InMemoryComputerUseSession() await session.start() result = await session.execute_action("wait", duration=2.0) assert result.success is True assert "2.0" in result.output @pytest.mark.asyncio async def test_unknown_action(self): """未知操作类型""" session = InMemoryComputerUseSession() await session.start() result = await session.execute_action("unknown_action") assert result.success is False assert "Unknown" in result.error @pytest.mark.asyncio async def test_action_not_started(self): """未启动时执行操作失败""" session = InMemoryComputerUseSession() result = await session.execute_action("click", x=0, y=0) assert result.success is False assert "not started" in result.error @pytest.mark.asyncio async def test_action_history(self): """操作历史记录""" session = InMemoryComputerUseSession() await session.start() await session.execute_action("click", x=10, y=20) await session.execute_action("type", text="test") assert len(session.action_history) == 2 assert session.action_history[0]["action"] == "click" assert session.action_history[1]["action"] == "type" @pytest.mark.asyncio async def test_action_history_is_copy(self): """操作历史返回副本""" session = InMemoryComputerUseSession() await session.start() await session.execute_action("click", x=0, y=0) history = session.action_history history.clear() assert len(session.action_history) == 1 def test_repr(self): session = InMemoryComputerUseSession(session_id="test-123") r = repr(session) assert "InMemory" in r assert "stopped" in r # ============================================================ # DockerComputerUseSession 测试 # ============================================================ class TestDockerComputerUseSession: """测试 DockerComputerUseSession(stub 实现)""" def test_construction(self): session = DockerComputerUseSession( session_id="docker-1", container_image="anthropic/computer-use-demo:latest", ) assert session.session_id == "docker-1" assert session.container_id is None @pytest.mark.asyncio async def test_start_stop(self): session = DockerComputerUseSession(session_id="docker-1") await session.start() assert session.is_started is True assert session.container_id is not None await session.stop() assert session.is_started is False assert session.container_id is None @pytest.mark.asyncio async def test_screenshot_not_started(self): session = DockerComputerUseSession(session_id="docker-1") result = await session.screenshot() assert result.success is False @pytest.mark.asyncio async def test_screenshot_started(self): session = DockerComputerUseSession(session_id="docker-1") await session.start() result = await session.screenshot() assert result.success is True @pytest.mark.asyncio async def test_execute_action_not_started(self): session = DockerComputerUseSession(session_id="docker-1") result = await session.execute_action("click", x=0, y=0) assert result.success is False @pytest.mark.asyncio async def test_execute_action_started(self): session = DockerComputerUseSession(session_id="docker-1") await session.start() result = await session.execute_action("click", x=100, y=200) assert result.success is True # ============================================================ # ComputerUseSessionManager 测试 # ============================================================ class TestComputerUseSessionManager: """测试 ComputerUseSessionManager 会话管理""" def test_get_or_create_new(self): manager = ComputerUseSessionManager() session = manager.get_or_create("s1") assert session.session_id == "s1" def test_get_or_create_existing(self): manager = ComputerUseSessionManager() s1 = manager.get_or_create("s1") s2 = manager.get_or_create("s1") assert s1 is s2 def test_get_existing(self): manager = ComputerUseSessionManager() manager.get_or_create("s1") session = manager.get("s1") assert session is not None def test_get_nonexistent(self): manager = ComputerUseSessionManager() assert manager.get("nonexistent") is None def test_remove(self): manager = ComputerUseSessionManager() manager.get_or_create("s1") manager.remove("s1") assert manager.get("s1") is None def test_list_sessions(self): manager = ComputerUseSessionManager() manager.get_or_create("s1") manager.get_or_create("s2") assert sorted(manager.list_sessions()) == ["s1", "s2"] def test_has_session(self): manager = ComputerUseSessionManager() manager.get_or_create("s1") assert manager.has_session("s1") is True assert manager.has_session("s2") is False def test_max_sessions_eviction(self): manager = ComputerUseSessionManager(max_sessions=2) manager.get_or_create("s1") manager.get_or_create("s2") manager.get_or_create("s3") assert not manager.has_session("s1") assert manager.has_session("s2") assert manager.has_session("s3") @pytest.mark.asyncio async def test_close_all(self): manager = ComputerUseSessionManager() s1 = manager.get_or_create("s1") s2 = manager.get_or_create("s2") await s1.start() await s2.start() await manager.close_all() assert manager.list_sessions() == [] assert s1.is_started is False assert s2.is_started is False def test_custom_session_factory(self): manager = ComputerUseSessionManager( session_factory=DockerComputerUseSession, ) session = manager.get_or_create("docker-1") assert isinstance(session, DockerComputerUseSession) # ============================================================ # ComputerUseRecorder 测试 # ============================================================ class TestActionRecord: """测试 ActionRecord 数据类""" def test_to_dict(self): record = ActionRecord( timestamp=1000.0, action="click", params={"x": 100, "y": 200}, success=True, output="Clicked at (100, 200)", ) d = record.to_dict() assert d["action"] == "click" assert d["params"]["x"] == 100 assert d["success"] is True def test_from_dict(self): data = { "timestamp": 1000.0, "action": "type", "params": {"text": "hello"}, "success": True, "output": "Typed: hello", "error": "", "screenshot_path": "", } record = ActionRecord.from_dict(data) assert record.action == "type" assert record.params["text"] == "hello" def test_roundtrip(self): record = ActionRecord( timestamp=1000.0, action="click", params={"x": 50, "y": 60}, success=False, error="Timeout", ) d = record.to_dict() restored = ActionRecord.from_dict(d) assert restored.action == record.action assert restored.success == record.success assert restored.error == record.error class TestComputerUseRecorder: """测试 ComputerUseRecorder 操作录制器""" def test_record_action(self): recorder = ComputerUseRecorder() result = ActionResult(success=True, action="click", output="Clicked") record = recorder.record("click", {"x": 100, "y": 200}, result) assert record.action == "click" assert record.success is True def test_get_records(self): recorder = ComputerUseRecorder() r1 = ActionResult(success=True, action="click", output="ok") r2 = ActionResult(success=True, action="type", output="ok") recorder.record("click", {"x": 1, "y": 2}, r1) recorder.record("type", {"text": "hi"}, r2) records = recorder.get_records() assert len(records) == 2 def test_get_records_is_copy(self): recorder = ComputerUseRecorder() result = ActionResult(success=True, action="click", output="ok") recorder.record("click", {"x": 1, "y": 2}, result) records = recorder.get_records() records.clear() assert len(recorder.get_records()) == 1 def test_get_records_by_action(self): recorder = ComputerUseRecorder() r1 = ActionResult(success=True, action="click", output="ok") r2 = ActionResult(success=True, action="type", output="ok") r3 = ActionResult(success=True, action="click", output="ok") recorder.record("click", {"x": 1, "y": 2}, r1) recorder.record("type", {"text": "hi"}, r2) recorder.record("click", {"x": 3, "y": 4}, r3) click_records = recorder.get_records_by_action("click") assert len(click_records) == 2 def test_get_failed_records(self): recorder = ComputerUseRecorder() r1 = ActionResult(success=True, action="click", output="ok") r2 = ActionResult(success=False, action="type", error="failed") recorder.record("click", {"x": 1, "y": 2}, r1) recorder.record("type", {"text": "hi"}, r2) failed = recorder.get_failed_records() assert len(failed) == 1 assert failed[0].action == "type" def test_total_actions(self): recorder = ComputerUseRecorder() assert recorder.total_actions == 0 result = ActionResult(success=True, action="click", output="ok") recorder.record("click", {"x": 1, "y": 2}, result) assert recorder.total_actions == 1 def test_success_failure_counts(self): recorder = ComputerUseRecorder() r1 = ActionResult(success=True, action="click", output="ok") r2 = ActionResult(success=False, action="type", error="fail") r3 = ActionResult(success=True, action="scroll", output="ok") recorder.record("click", {}, r1) recorder.record("type", {}, r2) recorder.record("scroll", {}, r3) assert recorder.success_count == 2 assert recorder.failure_count == 1 def test_summary(self): recorder = ComputerUseRecorder() r1 = ActionResult(success=True, action="click", output="ok") r2 = ActionResult(success=False, action="type", error="fail") recorder.record("click", {}, r1) recorder.record("type", {}, r2) s = recorder.summary() assert s["total_actions"] == 2 assert s["success_count"] == 1 assert s["failure_count"] == 1 assert "click" in s["action_types"] assert "type" in s["action_types"] def test_clear(self): recorder = ComputerUseRecorder() result = ActionResult(success=True, action="click", output="ok") recorder.record("click", {}, result) recorder.clear() assert recorder.total_actions == 0 def test_save_and_load_recording(self): recorder = ComputerUseRecorder() r1 = ActionResult(success=True, action="click", output="Clicked at (10, 20)") r2 = ActionResult(success=True, action="type", output="Typed: hello") recorder.record("click", {"x": 10, "y": 20}, r1) recorder.record("type", {"text": "hello"}, r2) with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f: path = f.name try: recorder.save_recording(path) # 加载到新录制器 recorder2 = ComputerUseRecorder() recorder2.load_recording(path) assert recorder2.total_actions == 2 records = recorder2.get_records() assert records[0].action == "click" assert records[1].action == "type" finally: Path(path).unlink(missing_ok=True) def test_load_nonexistent_file(self): recorder = ComputerUseRecorder() with pytest.raises(FileNotFoundError): recorder.load_recording("/nonexistent/path/recording.json") def test_load_invalid_format(self): recorder = ComputerUseRecorder() with tempfile.NamedTemporaryFile( suffix=".json", delete=False, mode="w" ) as f: json.dump({"invalid": True}, f) path = f.name try: with pytest.raises(ValueError, match="Invalid recording format"): recorder.load_recording(path) finally: Path(path).unlink(missing_ok=True) @pytest.mark.asyncio async def test_replay(self): """回放操作序列""" recorder = ComputerUseRecorder() r1 = ActionResult(success=True, action="click", output="Clicked") r2 = ActionResult(success=True, action="type", output="Typed") recorder.record("click", {"x": 10, "y": 20}, r1) recorder.record("type", {"text": "hello"}, r2) session = InMemoryComputerUseSession(session_id="replay-test") results = await recorder.replay(session) assert len(results) == 2 assert results[0].success is True assert results[1].success is True assert session.is_started is True @pytest.mark.asyncio async def test_replay_with_screenshot(self): """回放包含截屏的操作序列""" recorder = ComputerUseRecorder() r1 = ActionResult(success=True, action="screenshot", output="Screen captured") recorder.record("screenshot", {}, r1) session = InMemoryComputerUseSession(session_id="replay-screenshot") results = await recorder.replay(session) assert len(results) == 1 assert results[0].success is True # ============================================================ # ComputerUseTool 构造测试 # ============================================================ class TestComputerUseToolConstruction: """测试 ComputerUseTool 构造""" def test_default_construction(self): tool = ComputerUseTool() assert tool.name == "computer_use" assert tool.input_schema is not None assert "action" in tool.input_schema["properties"] assert tool.input_schema["required"] == ["action"] def test_custom_construction(self): tool = ComputerUseTool(name="my_cu", version="2.0.0") assert tool.name == "my_cu" assert tool.version == "2.0.0" def test_to_dict(self): tool = ComputerUseTool() d = tool.to_dict() assert d["name"] == "computer_use" assert "input_schema" in d def test_repr(self): tool = ComputerUseTool() r = repr(tool) assert "ComputerUseTool" in r assert "computer_use" in r def test_session_manager_accessible(self): tool = ComputerUseTool() assert tool.session_manager is not None def test_recorder_accessible(self): tool = ComputerUseTool() assert tool.recorder is not None def test_custom_recorder(self): recorder = ComputerUseRecorder() tool = ComputerUseTool(recorder=recorder) assert tool.recorder is recorder # ============================================================ # ComputerUseTool 执行测试 # ============================================================ class TestComputerUseToolExecution: """测试 ComputerUseTool 操作执行""" @pytest.mark.asyncio async def test_screenshot_action(self): """截屏并识别 UI 元素""" tool = ComputerUseTool() result = await tool.execute(action="screenshot") assert result["success"] is True assert result["action"] == "screenshot" assert "output" in result @pytest.mark.asyncio async def test_click_action(self): """点击指定坐标""" tool = ComputerUseTool() result = await tool.execute(action="click", x=100, y=200) assert result["success"] is True assert result["action"] == "click" assert "(100, 200)" in result["output"] @pytest.mark.asyncio async def test_type_action(self): """输入文本到输入框""" tool = ComputerUseTool() result = await tool.execute(action="type", text="hello world") assert result["success"] is True assert result["action"] == "type" assert "hello world" in result["output"] @pytest.mark.asyncio async def test_scroll_action(self): """滚动操作""" tool = ComputerUseTool() result = await tool.execute(action="scroll", direction="down", amount=5) assert result["success"] is True assert "down" in result["output"] @pytest.mark.asyncio async def test_drag_action(self): """拖拽操作""" tool = ComputerUseTool() result = await tool.execute( action="drag", start_x=10, start_y=20, end_x=100, end_y=200, ) assert result["success"] is True assert result["action"] == "drag" @pytest.mark.asyncio async def test_key_action(self): """按键操作""" tool = ComputerUseTool() result = await tool.execute(action="key", key_name="Enter") assert result["success"] is True assert "Enter" in result["output"] @pytest.mark.asyncio async def test_wait_action(self): """等待操作""" tool = ComputerUseTool() result = await tool.execute(action="wait", duration=0.1) assert result["success"] is True @pytest.mark.asyncio async def test_missing_action(self): """缺少 action 参数""" tool = ComputerUseTool() result = await tool.execute() assert result["success"] is False assert "action" in result["error"] @pytest.mark.asyncio async def test_invalid_action(self): """无效操作类型""" tool = ComputerUseTool() result = await tool.execute(action="invalid_action") assert result["success"] is False assert "无效" in result["error"] @pytest.mark.asyncio async def test_click_missing_coordinates(self): """click 缺少坐标参数""" tool = ComputerUseTool() result = await tool.execute(action="click") assert result["success"] is False assert "x" in result["error"] @pytest.mark.asyncio async def test_type_missing_text(self): """type 缺少文本参数""" tool = ComputerUseTool() result = await tool.execute(action="type") assert result["success"] is False assert "text" in result["error"] @pytest.mark.asyncio async def test_key_missing_key_name(self): """key 缺少按键名称参数""" tool = ComputerUseTool() result = await tool.execute(action="key") assert result["success"] is False assert "key_name" in result["error"] @pytest.mark.asyncio async def test_drag_missing_params(self): """drag 缺少参数""" tool = ComputerUseTool() result = await tool.execute(action="drag", start_x=0, start_y=0) assert result["success"] is False assert "end_x" in result["error"] @pytest.mark.asyncio async def test_session_id(self): """指定会话 ID""" tool = ComputerUseTool() result = await tool.execute(action="screenshot", session_id="my-session") assert result["success"] is True assert result["session_id"] == "my-session" @pytest.mark.asyncio async def test_session_reuse(self): """同一会话 ID 复用会话""" tool = ComputerUseTool() r1 = await tool.execute(action="click", x=10, y=20, session_id="reuse-test") r2 = await tool.execute(action="type", text="hello", session_id="reuse-test") assert r1["session_id"] == "reuse-test" assert r2["session_id"] == "reuse-test" assert tool.session_manager.has_session("reuse-test") # ============================================================ # ComputerUseTool 多步骤 UI 操作测试 # ============================================================ class TestComputerUseToolMultiStep: """测试多步骤 UI 操作""" @pytest.mark.asyncio async def test_multi_step_ui_operation(self): """多步骤 UI 操作:截屏→点击→输入→按键""" tool = ComputerUseTool() session_id = "multi-step" # Step 1: 截屏识别 r1 = await tool.execute(action="screenshot", session_id=session_id) assert r1["success"] is True # Step 2: 点击输入框 r2 = await tool.execute(action="click", x=100, y=200, session_id=session_id) assert r2["success"] is True # Step 3: 输入文本 r3 = await tool.execute(action="type", text="test input", session_id=session_id) assert r3["success"] is True # Step 4: 按回车提交 r4 = await tool.execute(action="key", key_name="Enter", session_id=session_id) assert r4["success"] is True # 验证录制 assert tool.recorder.total_actions == 4 assert tool.recorder.success_count == 4 @pytest.mark.asyncio async def test_each_step_informs_next(self): """每步根据结果决定下一步""" tool = ComputerUseTool() session_id = "adaptive" # Step 1: 截屏 r1 = await tool.execute(action="screenshot", session_id=session_id) assert r1["success"] is True # 根据截屏结果决定下一步(模拟决策逻辑) if r1["success"]: r2 = await tool.execute(action="click", x=50, y=50, session_id=session_id) else: r2 = await tool.execute(action="wait", duration=1.0, session_id=session_id) assert r2["success"] is True # ============================================================ # ComputerUseTool 降级测试 # ============================================================ class TestComputerUseToolFallback: """测试 ComputerUseTool 降级链""" @pytest.mark.asyncio async def test_fallback_without_api_key(self): """无 API Key 时降级到 Session 本地执行""" tool = ComputerUseTool() # 无 api_key result = await tool.execute(action="click", x=100, y=200) assert result["success"] is True # InMemory session 可以执行 @pytest.mark.asyncio async def test_api_failure_fallback_to_session(self): """API 调用失败时降级到 Session 本地执行""" tool = ComputerUseTool(api_key="sk-test-key") with patch.object( tool, "_call_anthropic_api", new_callable=AsyncMock, side_effect=Exception("API connection failed"), ): result = await tool.execute(action="click", x=100, y=200) assert result["success"] is True # 降级到 InMemory session @pytest.mark.asyncio async def test_api_and_session_failure_fallback_suggestion(self): """API 和 Session 都失败时返回降级建议""" tool = ComputerUseTool(api_key="sk-test-key") # Mock API 失败 with patch.object( tool, "_call_anthropic_api", new_callable=AsyncMock, return_value=ActionResult(success=False, action="click", error="API error"), ): # Mock Session 也失败 mock_session = AsyncMock(spec=ComputerUseSession) mock_session.session_id = "fallback-test" mock_session.screen = ScreenInfo() mock_session.is_started = True mock_session.screenshot.return_value = ActionResult( success=True, action="screenshot", screenshot_base64="" ) mock_session.execute_action.return_value = ActionResult( success=False, action="click", error="Session error" ) # 直接注入 mock session tool._session_manager._sessions["fallback-test"] = mock_session result = await tool.execute( action="click", x=100, y=200, session_id="fallback-test" ) assert result["success"] is False assert "fallback_suggestion" in result @pytest.mark.asyncio async def test_custom_fallback_callback(self): """自定义降级回调""" async def fallback(action: str, params: dict) -> dict: return {"success": True, "output": f"Fallback executed: {action}"} tool = ComputerUseTool(fallback_callback=fallback) # Mock API 和 Session 都失败 with patch.object( tool, "_call_anthropic_api", new_callable=AsyncMock, side_effect=Exception("API failed"), ): mock_session = AsyncMock(spec=ComputerUseSession) mock_session.session_id = "cb-test" mock_session.screen = ScreenInfo() mock_session.is_started = True mock_session.screenshot.return_value = ActionResult( success=True, action="screenshot", screenshot_base64="" ) mock_session.execute_action.return_value = ActionResult( success=False, action="click", error="Session error" ) tool._session_manager._sessions["cb-test"] = mock_session result = await tool.execute( action="click", x=100, y=200, session_id="cb-test" ) assert result["success"] is True assert "Fallback" in result["output"] @pytest.mark.asyncio async def test_ae2_computer_use_fallback_to_oa_api(self): """AE2: Computer Use 失败 → 降级到 OA 系统 API 模拟场景:Computer Use 无法操作 OA 系统 UI, 降级到 OA 系统 API 完成操作。 """ oa_api_called = False async def oa_api_fallback(action: str, params: dict) -> dict: nonlocal oa_api_called oa_api_called = True return { "success": True, "output": f"OA API completed: {action} with {params}", } tool = ComputerUseTool( api_key="sk-test-key", fallback_callback=oa_api_fallback, ) # Mock API 和 Session 都失败 with patch.object( tool, "_call_anthropic_api", new_callable=AsyncMock, side_effect=Exception("API unavailable"), ): mock_session = AsyncMock(spec=ComputerUseSession) mock_session.session_id = "oa-test" mock_session.screen = ScreenInfo() mock_session.is_started = True mock_session.screenshot.return_value = ActionResult( success=True, action="screenshot", screenshot_base64="" ) mock_session.execute_action.return_value = ActionResult( success=False, action="click", error="UI not accessible" ) tool._session_manager._sessions["oa-test"] = mock_session result = await tool.execute( action="click", x=100, y=200, session_id="oa-test" ) assert result["success"] is True assert oa_api_called is True # ============================================================ # ComputerUseTool 录制集成测试 # ============================================================ class TestComputerUseToolRecording: """测试 ComputerUseTool 与 Recorder 的集成""" @pytest.mark.asyncio async def test_actions_recorded(self): """操作自动录制""" tool = ComputerUseTool() await tool.execute(action="click", x=10, y=20) await tool.execute(action="type", text="hello") assert tool.recorder.total_actions == 2 @pytest.mark.asyncio async def test_recording_save_and_replay(self): """录制保存和回放""" tool = ComputerUseTool() await tool.execute(action="click", x=10, y=20, session_id="rec-1") await tool.execute(action="type", text="hello", session_id="rec-1") with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f: path = f.name try: tool.recorder.save_recording(path) # 加载到新录制器并回放 recorder2 = ComputerUseRecorder() recorder2.load_recording(path) assert recorder2.total_actions == 2 session = InMemoryComputerUseSession(session_id="replay-1") results = await recorder2.replay(session) assert len(results) == 2 assert all(r.success for r in results) finally: Path(path).unlink(missing_ok=True) @pytest.mark.asyncio async def test_recording_summary(self): """录制摘要""" tool = ComputerUseTool() await tool.execute(action="click", x=10, y=20) await tool.execute(action="type", text="hello") summary = tool.recorder.summary() assert summary["total_actions"] == 2 assert summary["success_count"] == 2 assert summary["failure_count"] == 0 # ============================================================ # ComputerUseTool API 调用测试(Mock httpx) # ============================================================ class TestComputerUseToolAPICall: """测试 ComputerUseTool Anthropic API 调用(Mock)""" @pytest.mark.asyncio async def test_api_call_success(self): """API 调用成功""" tool = ComputerUseTool(api_key="sk-test-key") mock_response = MagicMock() mock_response.status_code = 200 mock_response.json.return_value = { "content": [ { "type": "tool_use", "name": "computer", "input": {"action": "click"}, } ] } with patch("httpx.AsyncClient") as mock_client_cls: mock_client = AsyncMock() mock_client.post.return_value = mock_response mock_client.__aenter__ = AsyncMock(return_value=mock_client) mock_client.__aexit__ = AsyncMock(return_value=False) mock_client_cls.return_value = mock_client result = await tool.execute(action="click", x=100, y=200) assert result["success"] is True @pytest.mark.asyncio async def test_api_call_http_error(self): """API 调用 HTTP 错误""" tool = ComputerUseTool(api_key="sk-test-key") mock_response = MagicMock() mock_response.status_code = 429 mock_response.text = "Rate limited" with patch("httpx.AsyncClient") as mock_client_cls: mock_client = AsyncMock() mock_client.post.return_value = mock_response mock_client.__aenter__ = AsyncMock(return_value=mock_client) mock_client.__aexit__ = AsyncMock(return_value=False) mock_client_cls.return_value = mock_client # API 返回错误,降级到 session 本地执行 result = await tool.execute(action="click", x=100, y=200) assert result["success"] is True # 降级成功 @pytest.mark.asyncio async def test_api_call_network_error(self): """API 调用网络错误""" tool = ComputerUseTool(api_key="sk-test-key") with patch("httpx.AsyncClient") as mock_client_cls: mock_client = AsyncMock() mock_client.post.side_effect = Exception("Connection refused") mock_client.__aenter__ = AsyncMock(return_value=mock_client) mock_client.__aexit__ = AsyncMock(return_value=False) mock_client_cls.return_value = mock_client # 网络错误,降级到 session 本地执行 result = await tool.execute(action="click", x=100, y=200) assert result["success"] is True # 降级成功