fischer-agentkit/tests/unit/tools/test_computer_use.py

1071 lines
38 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""ComputerUseTool / ComputerUseSession / ComputerUseRecorder 单元测试
测试场景:
- 截屏并识别 UI 元素 → 返回可操作区域列表
- 点击指定坐标 → 操作成功
- 输入文本到输入框 → 操作成功
- 多步骤 UI 操作 → 每步根据结果决定下一步
- API 不可用时降级到 ShellTool → 正确降级
- Covers AE2: Computer Use 失败 → 降级到 OA 系统 API
- 操作录制回放 → 可回放操作序列
"""
from __future__ import annotations
import json
import tempfile
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from agentkit.tools.computer_use import ComputerUseTool
from agentkit.tools.computer_use_session import (
ComputerUseSession,
InMemoryComputerUseSession,
DockerComputerUseSession,
ComputerUseSessionManager,
ActionResult,
ScreenInfo,
)
from agentkit.tools.computer_use_recorder import ComputerUseRecorder, ActionRecord
# ============================================================
# ActionResult 测试
# ============================================================
class TestActionResult:
"""测试 ActionResult 数据类"""
def test_success_result(self):
result = ActionResult(success=True, action="click", output="Clicked at (100, 200)")
assert result.success is True
assert result.action == "click"
assert result.output == "Clicked at (100, 200)"
assert result.error == ""
assert result.screenshot_base64 == ""
def test_failure_result(self):
result = ActionResult(success=False, action="click", error="Session not started")
assert result.success is False
assert result.error == "Session not started"
def test_result_with_metadata(self):
result = ActionResult(
success=True,
action="screenshot",
metadata={"screen_state": {"cursor": (0, 0)}},
)
assert result.metadata["screen_state"]["cursor"] == (0, 0)
# ============================================================
# ScreenInfo 测试
# ============================================================
class TestScreenInfo:
"""测试 ScreenInfo 数据类"""
def test_default_screen(self):
screen = ScreenInfo()
assert screen.width == 1280
assert screen.height == 720
def test_custom_screen(self):
screen = ScreenInfo(width=1920, height=1080)
assert screen.width == 1920
assert screen.height == 1080
# ============================================================
# InMemoryComputerUseSession 测试
# ============================================================
class TestInMemoryComputerUseSession:
"""测试 InMemoryComputerUseSession 内存模拟会话"""
def test_construction_default(self):
session = InMemoryComputerUseSession()
assert session.session_id is not None
assert session.screen.width == 1280
assert session.screen.height == 720
assert session.is_started is False
def test_construction_custom(self):
session = InMemoryComputerUseSession(
session_id="test-123",
screen_width=1920,
screen_height=1080,
)
assert session.session_id == "test-123"
assert session.screen.width == 1920
@pytest.mark.asyncio
async def test_start_stop(self):
session = InMemoryComputerUseSession()
assert session.is_started is False
await session.start()
assert session.is_started is True
await session.stop()
assert session.is_started is False
@pytest.mark.asyncio
async def test_screenshot_not_started(self):
"""未启动时截屏失败"""
session = InMemoryComputerUseSession()
result = await session.screenshot()
assert result.success is False
assert "not started" in result.error
@pytest.mark.asyncio
async def test_screenshot_started(self):
"""启动后截屏成功"""
session = InMemoryComputerUseSession()
await session.start()
result = await session.screenshot()
assert result.success is True
assert result.action == "screenshot"
assert "1280x720" in result.output
@pytest.mark.asyncio
async def test_click_action(self):
"""点击操作"""
session = InMemoryComputerUseSession()
await session.start()
result = await session.execute_action("click", x=100, y=200)
assert result.success is True
assert "(100, 200)" in result.output
@pytest.mark.asyncio
async def test_type_action(self):
"""输入文本操作"""
session = InMemoryComputerUseSession()
await session.start()
result = await session.execute_action("type", text="hello world")
assert result.success is True
assert "hello world" in result.output
@pytest.mark.asyncio
async def test_scroll_action(self):
"""滚动操作"""
session = InMemoryComputerUseSession()
await session.start()
result = await session.execute_action("scroll", direction="down", amount=5)
assert result.success is True
assert "down" in result.output
assert "5" in result.output
@pytest.mark.asyncio
async def test_drag_action(self):
"""拖拽操作"""
session = InMemoryComputerUseSession()
await session.start()
result = await session.execute_action(
"drag", start_x=10, start_y=20, end_x=100, end_y=200
)
assert result.success is True
assert "(10,20)" in result.output
assert "(100,200)" in result.output
@pytest.mark.asyncio
async def test_key_action(self):
"""按键操作"""
session = InMemoryComputerUseSession()
await session.start()
result = await session.execute_action("key", key_name="Enter")
assert result.success is True
assert "Enter" in result.output
@pytest.mark.asyncio
async def test_wait_action(self):
"""等待操作"""
session = InMemoryComputerUseSession()
await session.start()
result = await session.execute_action("wait", duration=2.0)
assert result.success is True
assert "2.0" in result.output
@pytest.mark.asyncio
async def test_unknown_action(self):
"""未知操作类型"""
session = InMemoryComputerUseSession()
await session.start()
result = await session.execute_action("unknown_action")
assert result.success is False
assert "Unknown" in result.error
@pytest.mark.asyncio
async def test_action_not_started(self):
"""未启动时执行操作失败"""
session = InMemoryComputerUseSession()
result = await session.execute_action("click", x=0, y=0)
assert result.success is False
assert "not started" in result.error
@pytest.mark.asyncio
async def test_action_history(self):
"""操作历史记录"""
session = InMemoryComputerUseSession()
await session.start()
await session.execute_action("click", x=10, y=20)
await session.execute_action("type", text="test")
assert len(session.action_history) == 2
assert session.action_history[0]["action"] == "click"
assert session.action_history[1]["action"] == "type"
@pytest.mark.asyncio
async def test_action_history_is_copy(self):
"""操作历史返回副本"""
session = InMemoryComputerUseSession()
await session.start()
await session.execute_action("click", x=0, y=0)
history = session.action_history
history.clear()
assert len(session.action_history) == 1
def test_repr(self):
session = InMemoryComputerUseSession(session_id="test-123")
r = repr(session)
assert "InMemory" in r
assert "stopped" in r
# ============================================================
# DockerComputerUseSession 测试
# ============================================================
class TestDockerComputerUseSession:
"""测试 DockerComputerUseSessionstub 实现)"""
def test_construction(self):
session = DockerComputerUseSession(
session_id="docker-1",
container_image="anthropic/computer-use-demo:latest",
)
assert session.session_id == "docker-1"
assert session.container_id is None
@pytest.mark.asyncio
async def test_start_stop(self):
session = DockerComputerUseSession(session_id="docker-1")
await session.start()
assert session.is_started is True
assert session.container_id is not None
await session.stop()
assert session.is_started is False
assert session.container_id is None
@pytest.mark.asyncio
async def test_screenshot_not_started(self):
session = DockerComputerUseSession(session_id="docker-1")
result = await session.screenshot()
assert result.success is False
@pytest.mark.asyncio
async def test_screenshot_started(self):
session = DockerComputerUseSession(session_id="docker-1")
await session.start()
result = await session.screenshot()
assert result.success is True
@pytest.mark.asyncio
async def test_execute_action_not_started(self):
session = DockerComputerUseSession(session_id="docker-1")
result = await session.execute_action("click", x=0, y=0)
assert result.success is False
@pytest.mark.asyncio
async def test_execute_action_started(self):
session = DockerComputerUseSession(session_id="docker-1")
await session.start()
result = await session.execute_action("click", x=100, y=200)
assert result.success is True
# ============================================================
# ComputerUseSessionManager 测试
# ============================================================
class TestComputerUseSessionManager:
"""测试 ComputerUseSessionManager 会话管理"""
def test_get_or_create_new(self):
manager = ComputerUseSessionManager()
session = manager.get_or_create("s1")
assert session.session_id == "s1"
def test_get_or_create_existing(self):
manager = ComputerUseSessionManager()
s1 = manager.get_or_create("s1")
s2 = manager.get_or_create("s1")
assert s1 is s2
def test_get_existing(self):
manager = ComputerUseSessionManager()
manager.get_or_create("s1")
session = manager.get("s1")
assert session is not None
def test_get_nonexistent(self):
manager = ComputerUseSessionManager()
assert manager.get("nonexistent") is None
def test_remove(self):
manager = ComputerUseSessionManager()
manager.get_or_create("s1")
manager.remove("s1")
assert manager.get("s1") is None
def test_list_sessions(self):
manager = ComputerUseSessionManager()
manager.get_or_create("s1")
manager.get_or_create("s2")
assert sorted(manager.list_sessions()) == ["s1", "s2"]
def test_has_session(self):
manager = ComputerUseSessionManager()
manager.get_or_create("s1")
assert manager.has_session("s1") is True
assert manager.has_session("s2") is False
def test_max_sessions_eviction(self):
manager = ComputerUseSessionManager(max_sessions=2)
manager.get_or_create("s1")
manager.get_or_create("s2")
manager.get_or_create("s3")
assert not manager.has_session("s1")
assert manager.has_session("s2")
assert manager.has_session("s3")
@pytest.mark.asyncio
async def test_close_all(self):
manager = ComputerUseSessionManager()
s1 = manager.get_or_create("s1")
s2 = manager.get_or_create("s2")
await s1.start()
await s2.start()
await manager.close_all()
assert manager.list_sessions() == []
assert s1.is_started is False
assert s2.is_started is False
def test_custom_session_factory(self):
manager = ComputerUseSessionManager(
session_factory=DockerComputerUseSession,
)
session = manager.get_or_create("docker-1")
assert isinstance(session, DockerComputerUseSession)
# ============================================================
# ComputerUseRecorder 测试
# ============================================================
class TestActionRecord:
"""测试 ActionRecord 数据类"""
def test_to_dict(self):
record = ActionRecord(
timestamp=1000.0,
action="click",
params={"x": 100, "y": 200},
success=True,
output="Clicked at (100, 200)",
)
d = record.to_dict()
assert d["action"] == "click"
assert d["params"]["x"] == 100
assert d["success"] is True
def test_from_dict(self):
data = {
"timestamp": 1000.0,
"action": "type",
"params": {"text": "hello"},
"success": True,
"output": "Typed: hello",
"error": "",
"screenshot_path": "",
}
record = ActionRecord.from_dict(data)
assert record.action == "type"
assert record.params["text"] == "hello"
def test_roundtrip(self):
record = ActionRecord(
timestamp=1000.0,
action="click",
params={"x": 50, "y": 60},
success=False,
error="Timeout",
)
d = record.to_dict()
restored = ActionRecord.from_dict(d)
assert restored.action == record.action
assert restored.success == record.success
assert restored.error == record.error
class TestComputerUseRecorder:
"""测试 ComputerUseRecorder 操作录制器"""
def test_record_action(self):
recorder = ComputerUseRecorder()
result = ActionResult(success=True, action="click", output="Clicked")
record = recorder.record("click", {"x": 100, "y": 200}, result)
assert record.action == "click"
assert record.success is True
def test_get_records(self):
recorder = ComputerUseRecorder()
r1 = ActionResult(success=True, action="click", output="ok")
r2 = ActionResult(success=True, action="type", output="ok")
recorder.record("click", {"x": 1, "y": 2}, r1)
recorder.record("type", {"text": "hi"}, r2)
records = recorder.get_records()
assert len(records) == 2
def test_get_records_is_copy(self):
recorder = ComputerUseRecorder()
result = ActionResult(success=True, action="click", output="ok")
recorder.record("click", {"x": 1, "y": 2}, result)
records = recorder.get_records()
records.clear()
assert len(recorder.get_records()) == 1
def test_get_records_by_action(self):
recorder = ComputerUseRecorder()
r1 = ActionResult(success=True, action="click", output="ok")
r2 = ActionResult(success=True, action="type", output="ok")
r3 = ActionResult(success=True, action="click", output="ok")
recorder.record("click", {"x": 1, "y": 2}, r1)
recorder.record("type", {"text": "hi"}, r2)
recorder.record("click", {"x": 3, "y": 4}, r3)
click_records = recorder.get_records_by_action("click")
assert len(click_records) == 2
def test_get_failed_records(self):
recorder = ComputerUseRecorder()
r1 = ActionResult(success=True, action="click", output="ok")
r2 = ActionResult(success=False, action="type", error="failed")
recorder.record("click", {"x": 1, "y": 2}, r1)
recorder.record("type", {"text": "hi"}, r2)
failed = recorder.get_failed_records()
assert len(failed) == 1
assert failed[0].action == "type"
def test_total_actions(self):
recorder = ComputerUseRecorder()
assert recorder.total_actions == 0
result = ActionResult(success=True, action="click", output="ok")
recorder.record("click", {"x": 1, "y": 2}, result)
assert recorder.total_actions == 1
def test_success_failure_counts(self):
recorder = ComputerUseRecorder()
r1 = ActionResult(success=True, action="click", output="ok")
r2 = ActionResult(success=False, action="type", error="fail")
r3 = ActionResult(success=True, action="scroll", output="ok")
recorder.record("click", {}, r1)
recorder.record("type", {}, r2)
recorder.record("scroll", {}, r3)
assert recorder.success_count == 2
assert recorder.failure_count == 1
def test_summary(self):
recorder = ComputerUseRecorder()
r1 = ActionResult(success=True, action="click", output="ok")
r2 = ActionResult(success=False, action="type", error="fail")
recorder.record("click", {}, r1)
recorder.record("type", {}, r2)
s = recorder.summary()
assert s["total_actions"] == 2
assert s["success_count"] == 1
assert s["failure_count"] == 1
assert "click" in s["action_types"]
assert "type" in s["action_types"]
def test_clear(self):
recorder = ComputerUseRecorder()
result = ActionResult(success=True, action="click", output="ok")
recorder.record("click", {}, result)
recorder.clear()
assert recorder.total_actions == 0
def test_save_and_load_recording(self):
recorder = ComputerUseRecorder()
r1 = ActionResult(success=True, action="click", output="Clicked at (10, 20)")
r2 = ActionResult(success=True, action="type", output="Typed: hello")
recorder.record("click", {"x": 10, "y": 20}, r1)
recorder.record("type", {"text": "hello"}, r2)
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
path = f.name
try:
recorder.save_recording(path)
# 加载到新录制器
recorder2 = ComputerUseRecorder()
recorder2.load_recording(path)
assert recorder2.total_actions == 2
records = recorder2.get_records()
assert records[0].action == "click"
assert records[1].action == "type"
finally:
Path(path).unlink(missing_ok=True)
def test_load_nonexistent_file(self):
recorder = ComputerUseRecorder()
with pytest.raises(FileNotFoundError):
recorder.load_recording("/nonexistent/path/recording.json")
def test_load_invalid_format(self):
recorder = ComputerUseRecorder()
with tempfile.NamedTemporaryFile(
suffix=".json", delete=False, mode="w"
) as f:
json.dump({"invalid": True}, f)
path = f.name
try:
with pytest.raises(ValueError, match="Invalid recording format"):
recorder.load_recording(path)
finally:
Path(path).unlink(missing_ok=True)
@pytest.mark.asyncio
async def test_replay(self):
"""回放操作序列"""
recorder = ComputerUseRecorder()
r1 = ActionResult(success=True, action="click", output="Clicked")
r2 = ActionResult(success=True, action="type", output="Typed")
recorder.record("click", {"x": 10, "y": 20}, r1)
recorder.record("type", {"text": "hello"}, r2)
session = InMemoryComputerUseSession(session_id="replay-test")
results = await recorder.replay(session)
assert len(results) == 2
assert results[0].success is True
assert results[1].success is True
assert session.is_started is True
@pytest.mark.asyncio
async def test_replay_with_screenshot(self):
"""回放包含截屏的操作序列"""
recorder = ComputerUseRecorder()
r1 = ActionResult(success=True, action="screenshot", output="Screen captured")
recorder.record("screenshot", {}, r1)
session = InMemoryComputerUseSession(session_id="replay-screenshot")
results = await recorder.replay(session)
assert len(results) == 1
assert results[0].success is True
# ============================================================
# ComputerUseTool 构造测试
# ============================================================
class TestComputerUseToolConstruction:
"""测试 ComputerUseTool 构造"""
def test_default_construction(self):
tool = ComputerUseTool()
assert tool.name == "computer_use"
assert tool.input_schema is not None
assert "action" in tool.input_schema["properties"]
assert tool.input_schema["required"] == ["action"]
def test_custom_construction(self):
tool = ComputerUseTool(name="my_cu", version="2.0.0")
assert tool.name == "my_cu"
assert tool.version == "2.0.0"
def test_to_dict(self):
tool = ComputerUseTool()
d = tool.to_dict()
assert d["name"] == "computer_use"
assert "input_schema" in d
def test_repr(self):
tool = ComputerUseTool()
r = repr(tool)
assert "ComputerUseTool" in r
assert "computer_use" in r
def test_session_manager_accessible(self):
tool = ComputerUseTool()
assert tool.session_manager is not None
def test_recorder_accessible(self):
tool = ComputerUseTool()
assert tool.recorder is not None
def test_custom_recorder(self):
recorder = ComputerUseRecorder()
tool = ComputerUseTool(recorder=recorder)
assert tool.recorder is recorder
# ============================================================
# ComputerUseTool 执行测试
# ============================================================
class TestComputerUseToolExecution:
"""测试 ComputerUseTool 操作执行"""
@pytest.mark.asyncio
async def test_screenshot_action(self):
"""截屏并识别 UI 元素"""
tool = ComputerUseTool()
result = await tool.execute(action="screenshot")
assert result["success"] is True
assert result["action"] == "screenshot"
assert "output" in result
@pytest.mark.asyncio
async def test_click_action(self):
"""点击指定坐标"""
tool = ComputerUseTool()
result = await tool.execute(action="click", x=100, y=200)
assert result["success"] is True
assert result["action"] == "click"
assert "(100, 200)" in result["output"]
@pytest.mark.asyncio
async def test_type_action(self):
"""输入文本到输入框"""
tool = ComputerUseTool()
result = await tool.execute(action="type", text="hello world")
assert result["success"] is True
assert result["action"] == "type"
assert "hello world" in result["output"]
@pytest.mark.asyncio
async def test_scroll_action(self):
"""滚动操作"""
tool = ComputerUseTool()
result = await tool.execute(action="scroll", direction="down", amount=5)
assert result["success"] is True
assert "down" in result["output"]
@pytest.mark.asyncio
async def test_drag_action(self):
"""拖拽操作"""
tool = ComputerUseTool()
result = await tool.execute(
action="drag",
start_x=10, start_y=20,
end_x=100, end_y=200,
)
assert result["success"] is True
assert result["action"] == "drag"
@pytest.mark.asyncio
async def test_key_action(self):
"""按键操作"""
tool = ComputerUseTool()
result = await tool.execute(action="key", key_name="Enter")
assert result["success"] is True
assert "Enter" in result["output"]
@pytest.mark.asyncio
async def test_wait_action(self):
"""等待操作"""
tool = ComputerUseTool()
result = await tool.execute(action="wait", duration=0.1)
assert result["success"] is True
@pytest.mark.asyncio
async def test_missing_action(self):
"""缺少 action 参数"""
tool = ComputerUseTool()
result = await tool.execute()
assert result["success"] is False
assert "action" in result["error"]
@pytest.mark.asyncio
async def test_invalid_action(self):
"""无效操作类型"""
tool = ComputerUseTool()
result = await tool.execute(action="invalid_action")
assert result["success"] is False
assert "无效" in result["error"]
@pytest.mark.asyncio
async def test_click_missing_coordinates(self):
"""click 缺少坐标参数"""
tool = ComputerUseTool()
result = await tool.execute(action="click")
assert result["success"] is False
assert "x" in result["error"]
@pytest.mark.asyncio
async def test_type_missing_text(self):
"""type 缺少文本参数"""
tool = ComputerUseTool()
result = await tool.execute(action="type")
assert result["success"] is False
assert "text" in result["error"]
@pytest.mark.asyncio
async def test_key_missing_key_name(self):
"""key 缺少按键名称参数"""
tool = ComputerUseTool()
result = await tool.execute(action="key")
assert result["success"] is False
assert "key_name" in result["error"]
@pytest.mark.asyncio
async def test_drag_missing_params(self):
"""drag 缺少参数"""
tool = ComputerUseTool()
result = await tool.execute(action="drag", start_x=0, start_y=0)
assert result["success"] is False
assert "end_x" in result["error"]
@pytest.mark.asyncio
async def test_session_id(self):
"""指定会话 ID"""
tool = ComputerUseTool()
result = await tool.execute(action="screenshot", session_id="my-session")
assert result["success"] is True
assert result["session_id"] == "my-session"
@pytest.mark.asyncio
async def test_session_reuse(self):
"""同一会话 ID 复用会话"""
tool = ComputerUseTool()
r1 = await tool.execute(action="click", x=10, y=20, session_id="reuse-test")
r2 = await tool.execute(action="type", text="hello", session_id="reuse-test")
assert r1["session_id"] == "reuse-test"
assert r2["session_id"] == "reuse-test"
assert tool.session_manager.has_session("reuse-test")
# ============================================================
# ComputerUseTool 多步骤 UI 操作测试
# ============================================================
class TestComputerUseToolMultiStep:
"""测试多步骤 UI 操作"""
@pytest.mark.asyncio
async def test_multi_step_ui_operation(self):
"""多步骤 UI 操作:截屏→点击→输入→按键"""
tool = ComputerUseTool()
session_id = "multi-step"
# Step 1: 截屏识别
r1 = await tool.execute(action="screenshot", session_id=session_id)
assert r1["success"] is True
# Step 2: 点击输入框
r2 = await tool.execute(action="click", x=100, y=200, session_id=session_id)
assert r2["success"] is True
# Step 3: 输入文本
r3 = await tool.execute(action="type", text="test input", session_id=session_id)
assert r3["success"] is True
# Step 4: 按回车提交
r4 = await tool.execute(action="key", key_name="Enter", session_id=session_id)
assert r4["success"] is True
# 验证录制
assert tool.recorder.total_actions == 4
assert tool.recorder.success_count == 4
@pytest.mark.asyncio
async def test_each_step_informs_next(self):
"""每步根据结果决定下一步"""
tool = ComputerUseTool()
session_id = "adaptive"
# Step 1: 截屏
r1 = await tool.execute(action="screenshot", session_id=session_id)
assert r1["success"] is True
# 根据截屏结果决定下一步(模拟决策逻辑)
if r1["success"]:
r2 = await tool.execute(action="click", x=50, y=50, session_id=session_id)
else:
r2 = await tool.execute(action="wait", duration=1.0, session_id=session_id)
assert r2["success"] is True
# ============================================================
# ComputerUseTool 降级测试
# ============================================================
class TestComputerUseToolFallback:
"""测试 ComputerUseTool 降级链"""
@pytest.mark.asyncio
async def test_fallback_without_api_key(self):
"""无 API Key 时降级到 Session 本地执行"""
tool = ComputerUseTool() # 无 api_key
result = await tool.execute(action="click", x=100, y=200)
assert result["success"] is True # InMemory session 可以执行
@pytest.mark.asyncio
async def test_api_failure_fallback_to_session(self):
"""API 调用失败时降级到 Session 本地执行"""
tool = ComputerUseTool(api_key="sk-test-key")
with patch.object(
tool, "_call_anthropic_api",
new_callable=AsyncMock,
side_effect=Exception("API connection failed"),
):
result = await tool.execute(action="click", x=100, y=200)
assert result["success"] is True # 降级到 InMemory session
@pytest.mark.asyncio
async def test_api_and_session_failure_fallback_suggestion(self):
"""API 和 Session 都失败时返回降级建议"""
tool = ComputerUseTool(api_key="sk-test-key")
# Mock API 失败
with patch.object(
tool, "_call_anthropic_api",
new_callable=AsyncMock,
return_value=ActionResult(success=False, action="click", error="API error"),
):
# Mock Session 也失败
mock_session = AsyncMock(spec=ComputerUseSession)
mock_session.session_id = "fallback-test"
mock_session.screen = ScreenInfo()
mock_session.is_started = True
mock_session.screenshot.return_value = ActionResult(
success=True, action="screenshot", screenshot_base64=""
)
mock_session.execute_action.return_value = ActionResult(
success=False, action="click", error="Session error"
)
# 直接注入 mock session
tool._session_manager._sessions["fallback-test"] = mock_session
result = await tool.execute(
action="click", x=100, y=200, session_id="fallback-test"
)
assert result["success"] is False
assert "fallback_suggestion" in result
@pytest.mark.asyncio
async def test_custom_fallback_callback(self):
"""自定义降级回调"""
async def fallback(action: str, params: dict) -> dict:
return {"success": True, "output": f"Fallback executed: {action}"}
tool = ComputerUseTool(fallback_callback=fallback)
# Mock API 和 Session 都失败
with patch.object(
tool, "_call_anthropic_api",
new_callable=AsyncMock,
side_effect=Exception("API failed"),
):
mock_session = AsyncMock(spec=ComputerUseSession)
mock_session.session_id = "cb-test"
mock_session.screen = ScreenInfo()
mock_session.is_started = True
mock_session.screenshot.return_value = ActionResult(
success=True, action="screenshot", screenshot_base64=""
)
mock_session.execute_action.return_value = ActionResult(
success=False, action="click", error="Session error"
)
tool._session_manager._sessions["cb-test"] = mock_session
result = await tool.execute(
action="click", x=100, y=200, session_id="cb-test"
)
assert result["success"] is True
assert "Fallback" in result["output"]
@pytest.mark.asyncio
async def test_ae2_computer_use_fallback_to_oa_api(self):
"""AE2: Computer Use 失败 → 降级到 OA 系统 API
模拟场景Computer Use 无法操作 OA 系统 UI
降级到 OA 系统 API 完成操作。
"""
oa_api_called = False
async def oa_api_fallback(action: str, params: dict) -> dict:
nonlocal oa_api_called
oa_api_called = True
return {
"success": True,
"output": f"OA API completed: {action} with {params}",
}
tool = ComputerUseTool(
api_key="sk-test-key",
fallback_callback=oa_api_fallback,
)
# Mock API 和 Session 都失败
with patch.object(
tool, "_call_anthropic_api",
new_callable=AsyncMock,
side_effect=Exception("API unavailable"),
):
mock_session = AsyncMock(spec=ComputerUseSession)
mock_session.session_id = "oa-test"
mock_session.screen = ScreenInfo()
mock_session.is_started = True
mock_session.screenshot.return_value = ActionResult(
success=True, action="screenshot", screenshot_base64=""
)
mock_session.execute_action.return_value = ActionResult(
success=False, action="click", error="UI not accessible"
)
tool._session_manager._sessions["oa-test"] = mock_session
result = await tool.execute(
action="click", x=100, y=200, session_id="oa-test"
)
assert result["success"] is True
assert oa_api_called is True
# ============================================================
# ComputerUseTool 录制集成测试
# ============================================================
class TestComputerUseToolRecording:
"""测试 ComputerUseTool 与 Recorder 的集成"""
@pytest.mark.asyncio
async def test_actions_recorded(self):
"""操作自动录制"""
tool = ComputerUseTool()
await tool.execute(action="click", x=10, y=20)
await tool.execute(action="type", text="hello")
assert tool.recorder.total_actions == 2
@pytest.mark.asyncio
async def test_recording_save_and_replay(self):
"""录制保存和回放"""
tool = ComputerUseTool()
await tool.execute(action="click", x=10, y=20, session_id="rec-1")
await tool.execute(action="type", text="hello", session_id="rec-1")
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
path = f.name
try:
tool.recorder.save_recording(path)
# 加载到新录制器并回放
recorder2 = ComputerUseRecorder()
recorder2.load_recording(path)
assert recorder2.total_actions == 2
session = InMemoryComputerUseSession(session_id="replay-1")
results = await recorder2.replay(session)
assert len(results) == 2
assert all(r.success for r in results)
finally:
Path(path).unlink(missing_ok=True)
@pytest.mark.asyncio
async def test_recording_summary(self):
"""录制摘要"""
tool = ComputerUseTool()
await tool.execute(action="click", x=10, y=20)
await tool.execute(action="type", text="hello")
summary = tool.recorder.summary()
assert summary["total_actions"] == 2
assert summary["success_count"] == 2
assert summary["failure_count"] == 0
# ============================================================
# ComputerUseTool API 调用测试Mock httpx
# ============================================================
class TestComputerUseToolAPICall:
"""测试 ComputerUseTool Anthropic API 调用Mock"""
@pytest.mark.asyncio
async def test_api_call_success(self):
"""API 调用成功"""
tool = ComputerUseTool(api_key="sk-test-key")
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"content": [
{
"type": "tool_use",
"name": "computer",
"input": {"action": "click"},
}
]
}
with patch("httpx.AsyncClient") as mock_client_cls:
mock_client = AsyncMock()
mock_client.post.return_value = mock_response
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
mock_client_cls.return_value = mock_client
result = await tool.execute(action="click", x=100, y=200)
assert result["success"] is True
@pytest.mark.asyncio
async def test_api_call_http_error(self):
"""API 调用 HTTP 错误"""
tool = ComputerUseTool(api_key="sk-test-key")
mock_response = MagicMock()
mock_response.status_code = 429
mock_response.text = "Rate limited"
with patch("httpx.AsyncClient") as mock_client_cls:
mock_client = AsyncMock()
mock_client.post.return_value = mock_response
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
mock_client_cls.return_value = mock_client
# API 返回错误,降级到 session 本地执行
result = await tool.execute(action="click", x=100, y=200)
assert result["success"] is True # 降级成功
@pytest.mark.asyncio
async def test_api_call_network_error(self):
"""API 调用网络错误"""
tool = ComputerUseTool(api_key="sk-test-key")
with patch("httpx.AsyncClient") as mock_client_cls:
mock_client = AsyncMock()
mock_client.post.side_effect = Exception("Connection refused")
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
mock_client_cls.return_value = mock_client
# 网络错误,降级到 session 本地执行
result = await tool.execute(action="click", x=100, y=200)
assert result["success"] is True # 降级成功