1071 lines
38 KiB
Python
1071 lines
38 KiB
Python
"""ComputerUseTool / ComputerUseSession / ComputerUseRecorder 单元测试
|
||
|
||
测试场景:
|
||
- 截屏并识别 UI 元素 → 返回可操作区域列表
|
||
- 点击指定坐标 → 操作成功
|
||
- 输入文本到输入框 → 操作成功
|
||
- 多步骤 UI 操作 → 每步根据结果决定下一步
|
||
- API 不可用时降级到 ShellTool → 正确降级
|
||
- Covers AE2: Computer Use 失败 → 降级到 OA 系统 API
|
||
- 操作录制回放 → 可回放操作序列
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
import tempfile
|
||
from pathlib import Path
|
||
from unittest.mock import AsyncMock, MagicMock, patch
|
||
|
||
import pytest
|
||
|
||
from agentkit.tools.computer_use import ComputerUseTool
|
||
from agentkit.tools.computer_use_session import (
|
||
ComputerUseSession,
|
||
InMemoryComputerUseSession,
|
||
DockerComputerUseSession,
|
||
ComputerUseSessionManager,
|
||
ActionResult,
|
||
ScreenInfo,
|
||
)
|
||
from agentkit.tools.computer_use_recorder import ComputerUseRecorder, ActionRecord
|
||
|
||
|
||
# ============================================================
|
||
# ActionResult 测试
|
||
# ============================================================
|
||
|
||
|
||
class TestActionResult:
|
||
"""测试 ActionResult 数据类"""
|
||
|
||
def test_success_result(self):
|
||
result = ActionResult(success=True, action="click", output="Clicked at (100, 200)")
|
||
assert result.success is True
|
||
assert result.action == "click"
|
||
assert result.output == "Clicked at (100, 200)"
|
||
assert result.error == ""
|
||
assert result.screenshot_base64 == ""
|
||
|
||
def test_failure_result(self):
|
||
result = ActionResult(success=False, action="click", error="Session not started")
|
||
assert result.success is False
|
||
assert result.error == "Session not started"
|
||
|
||
def test_result_with_metadata(self):
|
||
result = ActionResult(
|
||
success=True,
|
||
action="screenshot",
|
||
metadata={"screen_state": {"cursor": (0, 0)}},
|
||
)
|
||
assert result.metadata["screen_state"]["cursor"] == (0, 0)
|
||
|
||
|
||
# ============================================================
|
||
# ScreenInfo 测试
|
||
# ============================================================
|
||
|
||
|
||
class TestScreenInfo:
|
||
"""测试 ScreenInfo 数据类"""
|
||
|
||
def test_default_screen(self):
|
||
screen = ScreenInfo()
|
||
assert screen.width == 1280
|
||
assert screen.height == 720
|
||
|
||
def test_custom_screen(self):
|
||
screen = ScreenInfo(width=1920, height=1080)
|
||
assert screen.width == 1920
|
||
assert screen.height == 1080
|
||
|
||
|
||
# ============================================================
|
||
# InMemoryComputerUseSession 测试
|
||
# ============================================================
|
||
|
||
|
||
class TestInMemoryComputerUseSession:
|
||
"""测试 InMemoryComputerUseSession 内存模拟会话"""
|
||
|
||
def test_construction_default(self):
|
||
session = InMemoryComputerUseSession()
|
||
assert session.session_id is not None
|
||
assert session.screen.width == 1280
|
||
assert session.screen.height == 720
|
||
assert session.is_started is False
|
||
|
||
def test_construction_custom(self):
|
||
session = InMemoryComputerUseSession(
|
||
session_id="test-123",
|
||
screen_width=1920,
|
||
screen_height=1080,
|
||
)
|
||
assert session.session_id == "test-123"
|
||
assert session.screen.width == 1920
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_start_stop(self):
|
||
session = InMemoryComputerUseSession()
|
||
assert session.is_started is False
|
||
await session.start()
|
||
assert session.is_started is True
|
||
await session.stop()
|
||
assert session.is_started is False
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_screenshot_not_started(self):
|
||
"""未启动时截屏失败"""
|
||
session = InMemoryComputerUseSession()
|
||
result = await session.screenshot()
|
||
assert result.success is False
|
||
assert "not started" in result.error
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_screenshot_started(self):
|
||
"""启动后截屏成功"""
|
||
session = InMemoryComputerUseSession()
|
||
await session.start()
|
||
result = await session.screenshot()
|
||
assert result.success is True
|
||
assert result.action == "screenshot"
|
||
assert "1280x720" in result.output
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_click_action(self):
|
||
"""点击操作"""
|
||
session = InMemoryComputerUseSession()
|
||
await session.start()
|
||
result = await session.execute_action("click", x=100, y=200)
|
||
assert result.success is True
|
||
assert "(100, 200)" in result.output
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_type_action(self):
|
||
"""输入文本操作"""
|
||
session = InMemoryComputerUseSession()
|
||
await session.start()
|
||
result = await session.execute_action("type", text="hello world")
|
||
assert result.success is True
|
||
assert "hello world" in result.output
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_scroll_action(self):
|
||
"""滚动操作"""
|
||
session = InMemoryComputerUseSession()
|
||
await session.start()
|
||
result = await session.execute_action("scroll", direction="down", amount=5)
|
||
assert result.success is True
|
||
assert "down" in result.output
|
||
assert "5" in result.output
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_drag_action(self):
|
||
"""拖拽操作"""
|
||
session = InMemoryComputerUseSession()
|
||
await session.start()
|
||
result = await session.execute_action(
|
||
"drag", start_x=10, start_y=20, end_x=100, end_y=200
|
||
)
|
||
assert result.success is True
|
||
assert "(10,20)" in result.output
|
||
assert "(100,200)" in result.output
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_key_action(self):
|
||
"""按键操作"""
|
||
session = InMemoryComputerUseSession()
|
||
await session.start()
|
||
result = await session.execute_action("key", key_name="Enter")
|
||
assert result.success is True
|
||
assert "Enter" in result.output
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_wait_action(self):
|
||
"""等待操作"""
|
||
session = InMemoryComputerUseSession()
|
||
await session.start()
|
||
result = await session.execute_action("wait", duration=2.0)
|
||
assert result.success is True
|
||
assert "2.0" in result.output
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_unknown_action(self):
|
||
"""未知操作类型"""
|
||
session = InMemoryComputerUseSession()
|
||
await session.start()
|
||
result = await session.execute_action("unknown_action")
|
||
assert result.success is False
|
||
assert "Unknown" in result.error
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_action_not_started(self):
|
||
"""未启动时执行操作失败"""
|
||
session = InMemoryComputerUseSession()
|
||
result = await session.execute_action("click", x=0, y=0)
|
||
assert result.success is False
|
||
assert "not started" in result.error
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_action_history(self):
|
||
"""操作历史记录"""
|
||
session = InMemoryComputerUseSession()
|
||
await session.start()
|
||
await session.execute_action("click", x=10, y=20)
|
||
await session.execute_action("type", text="test")
|
||
assert len(session.action_history) == 2
|
||
assert session.action_history[0]["action"] == "click"
|
||
assert session.action_history[1]["action"] == "type"
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_action_history_is_copy(self):
|
||
"""操作历史返回副本"""
|
||
session = InMemoryComputerUseSession()
|
||
await session.start()
|
||
await session.execute_action("click", x=0, y=0)
|
||
history = session.action_history
|
||
history.clear()
|
||
assert len(session.action_history) == 1
|
||
|
||
def test_repr(self):
|
||
session = InMemoryComputerUseSession(session_id="test-123")
|
||
r = repr(session)
|
||
assert "InMemory" in r
|
||
assert "stopped" in r
|
||
|
||
|
||
# ============================================================
|
||
# DockerComputerUseSession 测试
|
||
# ============================================================
|
||
|
||
|
||
class TestDockerComputerUseSession:
|
||
"""测试 DockerComputerUseSession(stub 实现)"""
|
||
|
||
def test_construction(self):
|
||
session = DockerComputerUseSession(
|
||
session_id="docker-1",
|
||
container_image="anthropic/computer-use-demo:latest",
|
||
)
|
||
assert session.session_id == "docker-1"
|
||
assert session.container_id is None
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_start_stop(self):
|
||
session = DockerComputerUseSession(session_id="docker-1")
|
||
await session.start()
|
||
assert session.is_started is True
|
||
assert session.container_id is not None
|
||
await session.stop()
|
||
assert session.is_started is False
|
||
assert session.container_id is None
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_screenshot_not_started(self):
|
||
session = DockerComputerUseSession(session_id="docker-1")
|
||
result = await session.screenshot()
|
||
assert result.success is False
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_screenshot_started(self):
|
||
session = DockerComputerUseSession(session_id="docker-1")
|
||
await session.start()
|
||
result = await session.screenshot()
|
||
assert result.success is True
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_execute_action_not_started(self):
|
||
session = DockerComputerUseSession(session_id="docker-1")
|
||
result = await session.execute_action("click", x=0, y=0)
|
||
assert result.success is False
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_execute_action_started(self):
|
||
session = DockerComputerUseSession(session_id="docker-1")
|
||
await session.start()
|
||
result = await session.execute_action("click", x=100, y=200)
|
||
assert result.success is True
|
||
|
||
|
||
# ============================================================
|
||
# ComputerUseSessionManager 测试
|
||
# ============================================================
|
||
|
||
|
||
class TestComputerUseSessionManager:
|
||
"""测试 ComputerUseSessionManager 会话管理"""
|
||
|
||
def test_get_or_create_new(self):
|
||
manager = ComputerUseSessionManager()
|
||
session = manager.get_or_create("s1")
|
||
assert session.session_id == "s1"
|
||
|
||
def test_get_or_create_existing(self):
|
||
manager = ComputerUseSessionManager()
|
||
s1 = manager.get_or_create("s1")
|
||
s2 = manager.get_or_create("s1")
|
||
assert s1 is s2
|
||
|
||
def test_get_existing(self):
|
||
manager = ComputerUseSessionManager()
|
||
manager.get_or_create("s1")
|
||
session = manager.get("s1")
|
||
assert session is not None
|
||
|
||
def test_get_nonexistent(self):
|
||
manager = ComputerUseSessionManager()
|
||
assert manager.get("nonexistent") is None
|
||
|
||
def test_remove(self):
|
||
manager = ComputerUseSessionManager()
|
||
manager.get_or_create("s1")
|
||
manager.remove("s1")
|
||
assert manager.get("s1") is None
|
||
|
||
def test_list_sessions(self):
|
||
manager = ComputerUseSessionManager()
|
||
manager.get_or_create("s1")
|
||
manager.get_or_create("s2")
|
||
assert sorted(manager.list_sessions()) == ["s1", "s2"]
|
||
|
||
def test_has_session(self):
|
||
manager = ComputerUseSessionManager()
|
||
manager.get_or_create("s1")
|
||
assert manager.has_session("s1") is True
|
||
assert manager.has_session("s2") is False
|
||
|
||
def test_max_sessions_eviction(self):
|
||
manager = ComputerUseSessionManager(max_sessions=2)
|
||
manager.get_or_create("s1")
|
||
manager.get_or_create("s2")
|
||
manager.get_or_create("s3")
|
||
assert not manager.has_session("s1")
|
||
assert manager.has_session("s2")
|
||
assert manager.has_session("s3")
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_close_all(self):
|
||
manager = ComputerUseSessionManager()
|
||
s1 = manager.get_or_create("s1")
|
||
s2 = manager.get_or_create("s2")
|
||
await s1.start()
|
||
await s2.start()
|
||
await manager.close_all()
|
||
assert manager.list_sessions() == []
|
||
assert s1.is_started is False
|
||
assert s2.is_started is False
|
||
|
||
def test_custom_session_factory(self):
|
||
manager = ComputerUseSessionManager(
|
||
session_factory=DockerComputerUseSession,
|
||
)
|
||
session = manager.get_or_create("docker-1")
|
||
assert isinstance(session, DockerComputerUseSession)
|
||
|
||
|
||
# ============================================================
|
||
# ComputerUseRecorder 测试
|
||
# ============================================================
|
||
|
||
|
||
class TestActionRecord:
|
||
"""测试 ActionRecord 数据类"""
|
||
|
||
def test_to_dict(self):
|
||
record = ActionRecord(
|
||
timestamp=1000.0,
|
||
action="click",
|
||
params={"x": 100, "y": 200},
|
||
success=True,
|
||
output="Clicked at (100, 200)",
|
||
)
|
||
d = record.to_dict()
|
||
assert d["action"] == "click"
|
||
assert d["params"]["x"] == 100
|
||
assert d["success"] is True
|
||
|
||
def test_from_dict(self):
|
||
data = {
|
||
"timestamp": 1000.0,
|
||
"action": "type",
|
||
"params": {"text": "hello"},
|
||
"success": True,
|
||
"output": "Typed: hello",
|
||
"error": "",
|
||
"screenshot_path": "",
|
||
}
|
||
record = ActionRecord.from_dict(data)
|
||
assert record.action == "type"
|
||
assert record.params["text"] == "hello"
|
||
|
||
def test_roundtrip(self):
|
||
record = ActionRecord(
|
||
timestamp=1000.0,
|
||
action="click",
|
||
params={"x": 50, "y": 60},
|
||
success=False,
|
||
error="Timeout",
|
||
)
|
||
d = record.to_dict()
|
||
restored = ActionRecord.from_dict(d)
|
||
assert restored.action == record.action
|
||
assert restored.success == record.success
|
||
assert restored.error == record.error
|
||
|
||
|
||
class TestComputerUseRecorder:
|
||
"""测试 ComputerUseRecorder 操作录制器"""
|
||
|
||
def test_record_action(self):
|
||
recorder = ComputerUseRecorder()
|
||
result = ActionResult(success=True, action="click", output="Clicked")
|
||
record = recorder.record("click", {"x": 100, "y": 200}, result)
|
||
assert record.action == "click"
|
||
assert record.success is True
|
||
|
||
def test_get_records(self):
|
||
recorder = ComputerUseRecorder()
|
||
r1 = ActionResult(success=True, action="click", output="ok")
|
||
r2 = ActionResult(success=True, action="type", output="ok")
|
||
recorder.record("click", {"x": 1, "y": 2}, r1)
|
||
recorder.record("type", {"text": "hi"}, r2)
|
||
records = recorder.get_records()
|
||
assert len(records) == 2
|
||
|
||
def test_get_records_is_copy(self):
|
||
recorder = ComputerUseRecorder()
|
||
result = ActionResult(success=True, action="click", output="ok")
|
||
recorder.record("click", {"x": 1, "y": 2}, result)
|
||
records = recorder.get_records()
|
||
records.clear()
|
||
assert len(recorder.get_records()) == 1
|
||
|
||
def test_get_records_by_action(self):
|
||
recorder = ComputerUseRecorder()
|
||
r1 = ActionResult(success=True, action="click", output="ok")
|
||
r2 = ActionResult(success=True, action="type", output="ok")
|
||
r3 = ActionResult(success=True, action="click", output="ok")
|
||
recorder.record("click", {"x": 1, "y": 2}, r1)
|
||
recorder.record("type", {"text": "hi"}, r2)
|
||
recorder.record("click", {"x": 3, "y": 4}, r3)
|
||
click_records = recorder.get_records_by_action("click")
|
||
assert len(click_records) == 2
|
||
|
||
def test_get_failed_records(self):
|
||
recorder = ComputerUseRecorder()
|
||
r1 = ActionResult(success=True, action="click", output="ok")
|
||
r2 = ActionResult(success=False, action="type", error="failed")
|
||
recorder.record("click", {"x": 1, "y": 2}, r1)
|
||
recorder.record("type", {"text": "hi"}, r2)
|
||
failed = recorder.get_failed_records()
|
||
assert len(failed) == 1
|
||
assert failed[0].action == "type"
|
||
|
||
def test_total_actions(self):
|
||
recorder = ComputerUseRecorder()
|
||
assert recorder.total_actions == 0
|
||
result = ActionResult(success=True, action="click", output="ok")
|
||
recorder.record("click", {"x": 1, "y": 2}, result)
|
||
assert recorder.total_actions == 1
|
||
|
||
def test_success_failure_counts(self):
|
||
recorder = ComputerUseRecorder()
|
||
r1 = ActionResult(success=True, action="click", output="ok")
|
||
r2 = ActionResult(success=False, action="type", error="fail")
|
||
r3 = ActionResult(success=True, action="scroll", output="ok")
|
||
recorder.record("click", {}, r1)
|
||
recorder.record("type", {}, r2)
|
||
recorder.record("scroll", {}, r3)
|
||
assert recorder.success_count == 2
|
||
assert recorder.failure_count == 1
|
||
|
||
def test_summary(self):
|
||
recorder = ComputerUseRecorder()
|
||
r1 = ActionResult(success=True, action="click", output="ok")
|
||
r2 = ActionResult(success=False, action="type", error="fail")
|
||
recorder.record("click", {}, r1)
|
||
recorder.record("type", {}, r2)
|
||
s = recorder.summary()
|
||
assert s["total_actions"] == 2
|
||
assert s["success_count"] == 1
|
||
assert s["failure_count"] == 1
|
||
assert "click" in s["action_types"]
|
||
assert "type" in s["action_types"]
|
||
|
||
def test_clear(self):
|
||
recorder = ComputerUseRecorder()
|
||
result = ActionResult(success=True, action="click", output="ok")
|
||
recorder.record("click", {}, result)
|
||
recorder.clear()
|
||
assert recorder.total_actions == 0
|
||
|
||
def test_save_and_load_recording(self):
|
||
recorder = ComputerUseRecorder()
|
||
r1 = ActionResult(success=True, action="click", output="Clicked at (10, 20)")
|
||
r2 = ActionResult(success=True, action="type", output="Typed: hello")
|
||
recorder.record("click", {"x": 10, "y": 20}, r1)
|
||
recorder.record("type", {"text": "hello"}, r2)
|
||
|
||
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
|
||
path = f.name
|
||
|
||
try:
|
||
recorder.save_recording(path)
|
||
|
||
# 加载到新录制器
|
||
recorder2 = ComputerUseRecorder()
|
||
recorder2.load_recording(path)
|
||
assert recorder2.total_actions == 2
|
||
records = recorder2.get_records()
|
||
assert records[0].action == "click"
|
||
assert records[1].action == "type"
|
||
finally:
|
||
Path(path).unlink(missing_ok=True)
|
||
|
||
def test_load_nonexistent_file(self):
|
||
recorder = ComputerUseRecorder()
|
||
with pytest.raises(FileNotFoundError):
|
||
recorder.load_recording("/nonexistent/path/recording.json")
|
||
|
||
def test_load_invalid_format(self):
|
||
recorder = ComputerUseRecorder()
|
||
with tempfile.NamedTemporaryFile(
|
||
suffix=".json", delete=False, mode="w"
|
||
) as f:
|
||
json.dump({"invalid": True}, f)
|
||
path = f.name
|
||
|
||
try:
|
||
with pytest.raises(ValueError, match="Invalid recording format"):
|
||
recorder.load_recording(path)
|
||
finally:
|
||
Path(path).unlink(missing_ok=True)
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_replay(self):
|
||
"""回放操作序列"""
|
||
recorder = ComputerUseRecorder()
|
||
r1 = ActionResult(success=True, action="click", output="Clicked")
|
||
r2 = ActionResult(success=True, action="type", output="Typed")
|
||
recorder.record("click", {"x": 10, "y": 20}, r1)
|
||
recorder.record("type", {"text": "hello"}, r2)
|
||
|
||
session = InMemoryComputerUseSession(session_id="replay-test")
|
||
results = await recorder.replay(session)
|
||
assert len(results) == 2
|
||
assert results[0].success is True
|
||
assert results[1].success is True
|
||
assert session.is_started is True
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_replay_with_screenshot(self):
|
||
"""回放包含截屏的操作序列"""
|
||
recorder = ComputerUseRecorder()
|
||
r1 = ActionResult(success=True, action="screenshot", output="Screen captured")
|
||
recorder.record("screenshot", {}, r1)
|
||
|
||
session = InMemoryComputerUseSession(session_id="replay-screenshot")
|
||
results = await recorder.replay(session)
|
||
assert len(results) == 1
|
||
assert results[0].success is True
|
||
|
||
|
||
# ============================================================
|
||
# ComputerUseTool 构造测试
|
||
# ============================================================
|
||
|
||
|
||
class TestComputerUseToolConstruction:
|
||
"""测试 ComputerUseTool 构造"""
|
||
|
||
def test_default_construction(self):
|
||
tool = ComputerUseTool()
|
||
assert tool.name == "computer_use"
|
||
assert tool.input_schema is not None
|
||
assert "action" in tool.input_schema["properties"]
|
||
assert tool.input_schema["required"] == ["action"]
|
||
|
||
def test_custom_construction(self):
|
||
tool = ComputerUseTool(name="my_cu", version="2.0.0")
|
||
assert tool.name == "my_cu"
|
||
assert tool.version == "2.0.0"
|
||
|
||
def test_to_dict(self):
|
||
tool = ComputerUseTool()
|
||
d = tool.to_dict()
|
||
assert d["name"] == "computer_use"
|
||
assert "input_schema" in d
|
||
|
||
def test_repr(self):
|
||
tool = ComputerUseTool()
|
||
r = repr(tool)
|
||
assert "ComputerUseTool" in r
|
||
assert "computer_use" in r
|
||
|
||
def test_session_manager_accessible(self):
|
||
tool = ComputerUseTool()
|
||
assert tool.session_manager is not None
|
||
|
||
def test_recorder_accessible(self):
|
||
tool = ComputerUseTool()
|
||
assert tool.recorder is not None
|
||
|
||
def test_custom_recorder(self):
|
||
recorder = ComputerUseRecorder()
|
||
tool = ComputerUseTool(recorder=recorder)
|
||
assert tool.recorder is recorder
|
||
|
||
|
||
# ============================================================
|
||
# ComputerUseTool 执行测试
|
||
# ============================================================
|
||
|
||
|
||
class TestComputerUseToolExecution:
|
||
"""测试 ComputerUseTool 操作执行"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_screenshot_action(self):
|
||
"""截屏并识别 UI 元素"""
|
||
tool = ComputerUseTool()
|
||
result = await tool.execute(action="screenshot")
|
||
assert result["success"] is True
|
||
assert result["action"] == "screenshot"
|
||
assert "output" in result
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_click_action(self):
|
||
"""点击指定坐标"""
|
||
tool = ComputerUseTool()
|
||
result = await tool.execute(action="click", x=100, y=200)
|
||
assert result["success"] is True
|
||
assert result["action"] == "click"
|
||
assert "(100, 200)" in result["output"]
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_type_action(self):
|
||
"""输入文本到输入框"""
|
||
tool = ComputerUseTool()
|
||
result = await tool.execute(action="type", text="hello world")
|
||
assert result["success"] is True
|
||
assert result["action"] == "type"
|
||
assert "hello world" in result["output"]
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_scroll_action(self):
|
||
"""滚动操作"""
|
||
tool = ComputerUseTool()
|
||
result = await tool.execute(action="scroll", direction="down", amount=5)
|
||
assert result["success"] is True
|
||
assert "down" in result["output"]
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_drag_action(self):
|
||
"""拖拽操作"""
|
||
tool = ComputerUseTool()
|
||
result = await tool.execute(
|
||
action="drag",
|
||
start_x=10, start_y=20,
|
||
end_x=100, end_y=200,
|
||
)
|
||
assert result["success"] is True
|
||
assert result["action"] == "drag"
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_key_action(self):
|
||
"""按键操作"""
|
||
tool = ComputerUseTool()
|
||
result = await tool.execute(action="key", key_name="Enter")
|
||
assert result["success"] is True
|
||
assert "Enter" in result["output"]
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_wait_action(self):
|
||
"""等待操作"""
|
||
tool = ComputerUseTool()
|
||
result = await tool.execute(action="wait", duration=0.1)
|
||
assert result["success"] is True
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_missing_action(self):
|
||
"""缺少 action 参数"""
|
||
tool = ComputerUseTool()
|
||
result = await tool.execute()
|
||
assert result["success"] is False
|
||
assert "action" in result["error"]
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_invalid_action(self):
|
||
"""无效操作类型"""
|
||
tool = ComputerUseTool()
|
||
result = await tool.execute(action="invalid_action")
|
||
assert result["success"] is False
|
||
assert "无效" in result["error"]
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_click_missing_coordinates(self):
|
||
"""click 缺少坐标参数"""
|
||
tool = ComputerUseTool()
|
||
result = await tool.execute(action="click")
|
||
assert result["success"] is False
|
||
assert "x" in result["error"]
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_type_missing_text(self):
|
||
"""type 缺少文本参数"""
|
||
tool = ComputerUseTool()
|
||
result = await tool.execute(action="type")
|
||
assert result["success"] is False
|
||
assert "text" in result["error"]
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_key_missing_key_name(self):
|
||
"""key 缺少按键名称参数"""
|
||
tool = ComputerUseTool()
|
||
result = await tool.execute(action="key")
|
||
assert result["success"] is False
|
||
assert "key_name" in result["error"]
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_drag_missing_params(self):
|
||
"""drag 缺少参数"""
|
||
tool = ComputerUseTool()
|
||
result = await tool.execute(action="drag", start_x=0, start_y=0)
|
||
assert result["success"] is False
|
||
assert "end_x" in result["error"]
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_session_id(self):
|
||
"""指定会话 ID"""
|
||
tool = ComputerUseTool()
|
||
result = await tool.execute(action="screenshot", session_id="my-session")
|
||
assert result["success"] is True
|
||
assert result["session_id"] == "my-session"
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_session_reuse(self):
|
||
"""同一会话 ID 复用会话"""
|
||
tool = ComputerUseTool()
|
||
r1 = await tool.execute(action="click", x=10, y=20, session_id="reuse-test")
|
||
r2 = await tool.execute(action="type", text="hello", session_id="reuse-test")
|
||
assert r1["session_id"] == "reuse-test"
|
||
assert r2["session_id"] == "reuse-test"
|
||
assert tool.session_manager.has_session("reuse-test")
|
||
|
||
|
||
# ============================================================
|
||
# ComputerUseTool 多步骤 UI 操作测试
|
||
# ============================================================
|
||
|
||
|
||
class TestComputerUseToolMultiStep:
|
||
"""测试多步骤 UI 操作"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_multi_step_ui_operation(self):
|
||
"""多步骤 UI 操作:截屏→点击→输入→按键"""
|
||
tool = ComputerUseTool()
|
||
session_id = "multi-step"
|
||
|
||
# Step 1: 截屏识别
|
||
r1 = await tool.execute(action="screenshot", session_id=session_id)
|
||
assert r1["success"] is True
|
||
|
||
# Step 2: 点击输入框
|
||
r2 = await tool.execute(action="click", x=100, y=200, session_id=session_id)
|
||
assert r2["success"] is True
|
||
|
||
# Step 3: 输入文本
|
||
r3 = await tool.execute(action="type", text="test input", session_id=session_id)
|
||
assert r3["success"] is True
|
||
|
||
# Step 4: 按回车提交
|
||
r4 = await tool.execute(action="key", key_name="Enter", session_id=session_id)
|
||
assert r4["success"] is True
|
||
|
||
# 验证录制
|
||
assert tool.recorder.total_actions == 4
|
||
assert tool.recorder.success_count == 4
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_each_step_informs_next(self):
|
||
"""每步根据结果决定下一步"""
|
||
tool = ComputerUseTool()
|
||
session_id = "adaptive"
|
||
|
||
# Step 1: 截屏
|
||
r1 = await tool.execute(action="screenshot", session_id=session_id)
|
||
assert r1["success"] is True
|
||
|
||
# 根据截屏结果决定下一步(模拟决策逻辑)
|
||
if r1["success"]:
|
||
r2 = await tool.execute(action="click", x=50, y=50, session_id=session_id)
|
||
else:
|
||
r2 = await tool.execute(action="wait", duration=1.0, session_id=session_id)
|
||
|
||
assert r2["success"] is True
|
||
|
||
|
||
# ============================================================
|
||
# ComputerUseTool 降级测试
|
||
# ============================================================
|
||
|
||
|
||
class TestComputerUseToolFallback:
|
||
"""测试 ComputerUseTool 降级链"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_fallback_without_api_key(self):
|
||
"""无 API Key 时降级到 Session 本地执行"""
|
||
tool = ComputerUseTool() # 无 api_key
|
||
result = await tool.execute(action="click", x=100, y=200)
|
||
assert result["success"] is True # InMemory session 可以执行
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_api_failure_fallback_to_session(self):
|
||
"""API 调用失败时降级到 Session 本地执行"""
|
||
tool = ComputerUseTool(api_key="sk-test-key")
|
||
|
||
with patch.object(
|
||
tool, "_call_anthropic_api",
|
||
new_callable=AsyncMock,
|
||
side_effect=Exception("API connection failed"),
|
||
):
|
||
result = await tool.execute(action="click", x=100, y=200)
|
||
assert result["success"] is True # 降级到 InMemory session
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_api_and_session_failure_fallback_suggestion(self):
|
||
"""API 和 Session 都失败时返回降级建议"""
|
||
tool = ComputerUseTool(api_key="sk-test-key")
|
||
|
||
# Mock API 失败
|
||
with patch.object(
|
||
tool, "_call_anthropic_api",
|
||
new_callable=AsyncMock,
|
||
return_value=ActionResult(success=False, action="click", error="API error"),
|
||
):
|
||
# Mock Session 也失败
|
||
mock_session = AsyncMock(spec=ComputerUseSession)
|
||
mock_session.session_id = "fallback-test"
|
||
mock_session.screen = ScreenInfo()
|
||
mock_session.is_started = True
|
||
mock_session.screenshot.return_value = ActionResult(
|
||
success=True, action="screenshot", screenshot_base64=""
|
||
)
|
||
mock_session.execute_action.return_value = ActionResult(
|
||
success=False, action="click", error="Session error"
|
||
)
|
||
|
||
# 直接注入 mock session
|
||
tool._session_manager._sessions["fallback-test"] = mock_session
|
||
|
||
result = await tool.execute(
|
||
action="click", x=100, y=200, session_id="fallback-test"
|
||
)
|
||
assert result["success"] is False
|
||
assert "fallback_suggestion" in result
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_custom_fallback_callback(self):
|
||
"""自定义降级回调"""
|
||
async def fallback(action: str, params: dict) -> dict:
|
||
return {"success": True, "output": f"Fallback executed: {action}"}
|
||
|
||
tool = ComputerUseTool(fallback_callback=fallback)
|
||
|
||
# Mock API 和 Session 都失败
|
||
with patch.object(
|
||
tool, "_call_anthropic_api",
|
||
new_callable=AsyncMock,
|
||
side_effect=Exception("API failed"),
|
||
):
|
||
mock_session = AsyncMock(spec=ComputerUseSession)
|
||
mock_session.session_id = "cb-test"
|
||
mock_session.screen = ScreenInfo()
|
||
mock_session.is_started = True
|
||
mock_session.screenshot.return_value = ActionResult(
|
||
success=True, action="screenshot", screenshot_base64=""
|
||
)
|
||
mock_session.execute_action.return_value = ActionResult(
|
||
success=False, action="click", error="Session error"
|
||
)
|
||
tool._session_manager._sessions["cb-test"] = mock_session
|
||
|
||
result = await tool.execute(
|
||
action="click", x=100, y=200, session_id="cb-test"
|
||
)
|
||
assert result["success"] is True
|
||
assert "Fallback" in result["output"]
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_ae2_computer_use_fallback_to_oa_api(self):
|
||
"""AE2: Computer Use 失败 → 降级到 OA 系统 API
|
||
|
||
模拟场景:Computer Use 无法操作 OA 系统 UI,
|
||
降级到 OA 系统 API 完成操作。
|
||
"""
|
||
oa_api_called = False
|
||
|
||
async def oa_api_fallback(action: str, params: dict) -> dict:
|
||
nonlocal oa_api_called
|
||
oa_api_called = True
|
||
return {
|
||
"success": True,
|
||
"output": f"OA API completed: {action} with {params}",
|
||
}
|
||
|
||
tool = ComputerUseTool(
|
||
api_key="sk-test-key",
|
||
fallback_callback=oa_api_fallback,
|
||
)
|
||
|
||
# Mock API 和 Session 都失败
|
||
with patch.object(
|
||
tool, "_call_anthropic_api",
|
||
new_callable=AsyncMock,
|
||
side_effect=Exception("API unavailable"),
|
||
):
|
||
mock_session = AsyncMock(spec=ComputerUseSession)
|
||
mock_session.session_id = "oa-test"
|
||
mock_session.screen = ScreenInfo()
|
||
mock_session.is_started = True
|
||
mock_session.screenshot.return_value = ActionResult(
|
||
success=True, action="screenshot", screenshot_base64=""
|
||
)
|
||
mock_session.execute_action.return_value = ActionResult(
|
||
success=False, action="click", error="UI not accessible"
|
||
)
|
||
tool._session_manager._sessions["oa-test"] = mock_session
|
||
|
||
result = await tool.execute(
|
||
action="click", x=100, y=200, session_id="oa-test"
|
||
)
|
||
assert result["success"] is True
|
||
assert oa_api_called is True
|
||
|
||
|
||
# ============================================================
|
||
# ComputerUseTool 录制集成测试
|
||
# ============================================================
|
||
|
||
|
||
class TestComputerUseToolRecording:
|
||
"""测试 ComputerUseTool 与 Recorder 的集成"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_actions_recorded(self):
|
||
"""操作自动录制"""
|
||
tool = ComputerUseTool()
|
||
await tool.execute(action="click", x=10, y=20)
|
||
await tool.execute(action="type", text="hello")
|
||
assert tool.recorder.total_actions == 2
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_recording_save_and_replay(self):
|
||
"""录制保存和回放"""
|
||
tool = ComputerUseTool()
|
||
await tool.execute(action="click", x=10, y=20, session_id="rec-1")
|
||
await tool.execute(action="type", text="hello", session_id="rec-1")
|
||
|
||
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
|
||
path = f.name
|
||
|
||
try:
|
||
tool.recorder.save_recording(path)
|
||
|
||
# 加载到新录制器并回放
|
||
recorder2 = ComputerUseRecorder()
|
||
recorder2.load_recording(path)
|
||
assert recorder2.total_actions == 2
|
||
|
||
session = InMemoryComputerUseSession(session_id="replay-1")
|
||
results = await recorder2.replay(session)
|
||
assert len(results) == 2
|
||
assert all(r.success for r in results)
|
||
finally:
|
||
Path(path).unlink(missing_ok=True)
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_recording_summary(self):
|
||
"""录制摘要"""
|
||
tool = ComputerUseTool()
|
||
await tool.execute(action="click", x=10, y=20)
|
||
await tool.execute(action="type", text="hello")
|
||
summary = tool.recorder.summary()
|
||
assert summary["total_actions"] == 2
|
||
assert summary["success_count"] == 2
|
||
assert summary["failure_count"] == 0
|
||
|
||
|
||
# ============================================================
|
||
# ComputerUseTool API 调用测试(Mock httpx)
|
||
# ============================================================
|
||
|
||
|
||
class TestComputerUseToolAPICall:
|
||
"""测试 ComputerUseTool Anthropic API 调用(Mock)"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_api_call_success(self):
|
||
"""API 调用成功"""
|
||
tool = ComputerUseTool(api_key="sk-test-key")
|
||
|
||
mock_response = MagicMock()
|
||
mock_response.status_code = 200
|
||
mock_response.json.return_value = {
|
||
"content": [
|
||
{
|
||
"type": "tool_use",
|
||
"name": "computer",
|
||
"input": {"action": "click"},
|
||
}
|
||
]
|
||
}
|
||
|
||
with patch("httpx.AsyncClient") as mock_client_cls:
|
||
mock_client = AsyncMock()
|
||
mock_client.post.return_value = mock_response
|
||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||
mock_client_cls.return_value = mock_client
|
||
|
||
result = await tool.execute(action="click", x=100, y=200)
|
||
assert result["success"] is True
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_api_call_http_error(self):
|
||
"""API 调用 HTTP 错误"""
|
||
tool = ComputerUseTool(api_key="sk-test-key")
|
||
|
||
mock_response = MagicMock()
|
||
mock_response.status_code = 429
|
||
mock_response.text = "Rate limited"
|
||
|
||
with patch("httpx.AsyncClient") as mock_client_cls:
|
||
mock_client = AsyncMock()
|
||
mock_client.post.return_value = mock_response
|
||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||
mock_client_cls.return_value = mock_client
|
||
|
||
# API 返回错误,降级到 session 本地执行
|
||
result = await tool.execute(action="click", x=100, y=200)
|
||
assert result["success"] is True # 降级成功
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_api_call_network_error(self):
|
||
"""API 调用网络错误"""
|
||
tool = ComputerUseTool(api_key="sk-test-key")
|
||
|
||
with patch("httpx.AsyncClient") as mock_client_cls:
|
||
mock_client = AsyncMock()
|
||
mock_client.post.side_effect = Exception("Connection refused")
|
||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||
mock_client_cls.return_value = mock_client
|
||
|
||
# 网络错误,降级到 session 本地执行
|
||
result = await tool.execute(action="click", x=100, y=200)
|
||
assert result["success"] is True # 降级成功
|