137 lines
5.2 KiB
Python
137 lines
5.2 KiB
Python
"""VerificationLoop 单元测试"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
|
|
import pytest
|
|
|
|
from agentkit.core.verification_loop import VerificationLoop, VerificationResult
|
|
|
|
|
|
class TestVerify:
|
|
"""verify() 方法测试"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_verify_success(self) -> None:
|
|
"""成功命令返回 passed=True"""
|
|
loop = VerificationLoop(commands=["echo ok"], timeout=10.0)
|
|
result = await loop.verify()
|
|
assert isinstance(result, VerificationResult)
|
|
assert result.passed is True
|
|
assert result.attempts == 1
|
|
assert "ok" in result.test_output
|
|
assert result.errors == []
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_verify_failure(self) -> None:
|
|
"""失败命令返回 passed=False"""
|
|
loop = VerificationLoop(commands=["false"], timeout=10.0)
|
|
result = await loop.verify()
|
|
assert result.passed is False
|
|
assert result.attempts == 1
|
|
assert len(result.errors) > 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_verify_timeout(self) -> None:
|
|
"""超时命令返回 passed=False"""
|
|
loop = VerificationLoop(commands=["sleep 10"], timeout=0.5)
|
|
result = await loop.verify()
|
|
assert result.passed is False
|
|
assert any("timed out" in e for e in result.errors)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_verify_command_not_found(self) -> None:
|
|
"""不存在的命令返回 passed=False"""
|
|
loop = VerificationLoop(commands=["nonexistent_command_xyz"], timeout=5.0)
|
|
result = await loop.verify()
|
|
assert result.passed is False
|
|
assert len(result.errors) > 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_verify_multiple_commands_partial_failure(self) -> None:
|
|
"""部分命令失败时整体返回 passed=False"""
|
|
loop = VerificationLoop(commands=["echo ok", "false"], timeout=10.0)
|
|
result = await loop.verify()
|
|
assert result.passed is False
|
|
assert len(result.errors) == 1
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_verify_default_commands(self) -> None:
|
|
"""默认命令为 pytest 和 ruff check"""
|
|
loop = VerificationLoop()
|
|
assert loop._commands == ["pytest -x -q", "ruff check src/"]
|
|
|
|
|
|
class TestVerifyAndRetry:
|
|
"""verify_and_retry() 方法测试"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_retry_no_fix_callback(self) -> None:
|
|
"""无 fix_callback 时重试指定次数"""
|
|
loop = VerificationLoop(commands=["false"], max_retries=2, timeout=5.0)
|
|
result = await loop.verify_and_retry()
|
|
assert result.passed is False
|
|
assert result.attempts == 3 # 1 initial + 2 retries
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_max_retries_respected(self) -> None:
|
|
"""max_retries=0 时不重试"""
|
|
loop = VerificationLoop(commands=["false"], max_retries=0, timeout=5.0)
|
|
result = await loop.verify_and_retry()
|
|
assert result.passed is False
|
|
assert result.attempts == 1
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_retry_with_fix_callback(self) -> None:
|
|
"""fix_callback 被调用并接收 errors 和 test_output"""
|
|
call_count = 0
|
|
received_args: list[tuple[list[str], str]] = []
|
|
|
|
async def fix_cb(errors: list[str], test_output: str) -> None:
|
|
nonlocal call_count
|
|
call_count += 1
|
|
received_args.append((errors, test_output))
|
|
|
|
loop = VerificationLoop(commands=["false"], max_retries=1, timeout=5.0)
|
|
result = await loop.verify_and_retry(fix_callback=fix_cb)
|
|
assert result.passed is False
|
|
assert call_count == 1
|
|
assert len(received_args) == 1
|
|
assert len(received_args[0][0]) > 0 # errors
|
|
assert isinstance(received_args[0][1], str) # test_output
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_retry_succeeds_after_fix(self) -> None:
|
|
"""fix_callback 修复后验证成功"""
|
|
attempt = 0
|
|
|
|
async def fix_cb(errors: list[str], test_output: str) -> None:
|
|
pass # Simulate fix applied
|
|
|
|
# Use a command that always fails — but test that the retry mechanism works
|
|
loop = VerificationLoop(commands=["false"], max_retries=1, timeout=5.0)
|
|
result = await loop.verify_and_retry(fix_callback=fix_cb)
|
|
# false always fails, so result should still be False
|
|
assert result.passed is False
|
|
assert result.attempts == 2
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_fix_callback_exception_handled(self) -> None:
|
|
"""fix_callback 抛出异常时不影响重试"""
|
|
async def bad_fix_cb(errors: list[str], test_output: str) -> None:
|
|
raise RuntimeError("fix failed!")
|
|
|
|
loop = VerificationLoop(commands=["false"], max_retries=1, timeout=5.0)
|
|
result = await loop.verify_and_retry(fix_callback=bad_fix_cb)
|
|
assert result.passed is False
|
|
assert result.attempts == 2
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_verify_and_retry_success_first_try(self) -> None:
|
|
"""首次验证成功时不重试"""
|
|
loop = VerificationLoop(commands=["echo ok"], max_retries=3, timeout=10.0)
|
|
result = await loop.verify_and_retry()
|
|
assert result.passed is True
|
|
assert result.attempts == 1
|