513 lines
23 KiB
Python
513 lines
23 KiB
Python
"""Tests for PathOptimizer - 执行路径优化器"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from datetime import datetime, timezone
|
||
|
||
import pytest
|
||
|
||
from agentkit.evolution.path_optimizer import ExecutionPath, PathOptimizer, PathUpdateResult
|
||
|
||
|
||
# ── Fixtures ──────────────────────────────────────────────
|
||
|
||
|
||
@pytest.fixture
|
||
def optimizer():
|
||
"""默认 PathOptimizer 实例"""
|
||
return PathOptimizer(min_sample_count=3, success_rate_threshold=0.05, duration_improvement_threshold=0.2)
|
||
|
||
|
||
@pytest.fixture
|
||
def optimizer_custom_thresholds():
|
||
"""自定义阈值的 PathOptimizer"""
|
||
return PathOptimizer(
|
||
min_sample_count=5,
|
||
success_rate_threshold=0.1,
|
||
duration_improvement_threshold=0.3,
|
||
)
|
||
|
||
|
||
def _make_path(
|
||
task_type: str = "code_review",
|
||
steps: list[str] | None = None,
|
||
total_duration: float = 10.0,
|
||
success_rate: float = 0.8,
|
||
sample_count: int = 5,
|
||
is_recommended: bool = False,
|
||
path_id: str = "",
|
||
created_at: datetime | None = None,
|
||
) -> ExecutionPath:
|
||
"""创建测试用 ExecutionPath"""
|
||
return ExecutionPath(
|
||
path_id=path_id,
|
||
task_type=task_type,
|
||
steps=steps or ["step1", "step2", "step3"],
|
||
total_duration=total_duration,
|
||
success_rate=success_rate,
|
||
sample_count=sample_count,
|
||
is_recommended=is_recommended,
|
||
created_at=created_at or datetime.now(timezone.utc),
|
||
)
|
||
|
||
|
||
# ── ExecutionPath 数据模型测试 ────────────────────────────
|
||
|
||
|
||
class TestExecutionPath:
|
||
def test_default_values(self):
|
||
path = ExecutionPath()
|
||
assert path.path_id == ""
|
||
assert path.task_type == ""
|
||
assert path.steps == []
|
||
assert path.total_duration == 0.0
|
||
assert path.success_rate == 0.0
|
||
assert path.sample_count == 0
|
||
assert path.is_recommended is False
|
||
assert isinstance(path.created_at, datetime)
|
||
|
||
def test_custom_values(self):
|
||
now = datetime.now(timezone.utc)
|
||
path = ExecutionPath(
|
||
path_id="p1",
|
||
task_type="code_review",
|
||
steps=["analyze", "review", "report"],
|
||
total_duration=15.5,
|
||
success_rate=0.9,
|
||
sample_count=10,
|
||
is_recommended=True,
|
||
created_at=now,
|
||
)
|
||
assert path.path_id == "p1"
|
||
assert path.task_type == "code_review"
|
||
assert path.steps == ["analyze", "review", "report"]
|
||
assert path.total_duration == 15.5
|
||
assert path.success_rate == 0.9
|
||
assert path.sample_count == 10
|
||
assert path.is_recommended is True
|
||
assert path.created_at == now
|
||
|
||
|
||
# ── PathUpdateResult 数据模型测试 ─────────────────────────
|
||
|
||
|
||
class TestPathUpdateResult:
|
||
def test_default_values(self):
|
||
result = PathUpdateResult()
|
||
assert result.updated is False
|
||
assert result.old_path is None
|
||
assert result.new_path is None
|
||
assert result.reason == ""
|
||
|
||
def test_updated_result(self):
|
||
old = _make_path(success_rate=0.7)
|
||
new = _make_path(success_rate=0.9)
|
||
result = PathUpdateResult(
|
||
updated=True,
|
||
old_path=old,
|
||
new_path=new,
|
||
reason="成功率显著提升",
|
||
)
|
||
assert result.updated is True
|
||
assert result.old_path.success_rate == 0.7
|
||
assert result.new_path.success_rate == 0.9
|
||
assert "成功率" in result.reason
|
||
|
||
|
||
# ── get_recommended_path 测试 ─────────────────────────────
|
||
|
||
|
||
class TestGetRecommendedPath:
|
||
async def test_no_recommended_path(self, optimizer):
|
||
result = optimizer.get_recommended_path("code_review")
|
||
assert result is None
|
||
|
||
async def test_returns_recommended_path(self, optimizer):
|
||
path = _make_path(task_type="code_review", success_rate=0.8, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", path)
|
||
result = optimizer.get_recommended_path("code_review")
|
||
assert result is not None
|
||
assert result.success_rate == 0.8
|
||
assert result.is_recommended is True
|
||
|
||
async def test_different_task_types_independent(self, optimizer):
|
||
path_a = _make_path(task_type="code_review", success_rate=0.8, sample_count=5)
|
||
path_b = _make_path(task_type="data_analysis", success_rate=0.9, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", path_a)
|
||
await optimizer.evaluate_and_update("data_analysis", path_b)
|
||
|
||
result_a = optimizer.get_recommended_path("code_review")
|
||
result_b = optimizer.get_recommended_path("data_analysis")
|
||
assert result_a is not None
|
||
assert result_b is not None
|
||
assert result_a.success_rate == 0.8
|
||
assert result_b.success_rate == 0.9
|
||
|
||
|
||
# ── 样本量不足测试 ────────────────────────────────────────
|
||
|
||
|
||
class TestInsufficientSamples:
|
||
async def test_insufficient_samples_no_update(self, optimizer):
|
||
"""样本量不足 → 不更新,记录待观察"""
|
||
path = _make_path(sample_count=2, success_rate=0.9)
|
||
result = await optimizer.evaluate_and_update("code_review", path)
|
||
assert result.updated is False
|
||
assert "样本量不足" in result.reason
|
||
assert optimizer.get_recommended_path("code_review") is None
|
||
|
||
async def test_insufficient_samples_recorded_as_pending(self, optimizer):
|
||
"""样本量不足的路径被记录到待观察列表"""
|
||
path = _make_path(sample_count=2, success_rate=0.9)
|
||
await optimizer.evaluate_and_update("code_review", path)
|
||
pending = optimizer.get_pending_paths("code_review")
|
||
assert len(pending) == 1
|
||
assert pending[0].success_rate == 0.9
|
||
|
||
async def test_exact_min_samples_updates(self, optimizer):
|
||
"""刚好达到最小样本量 → 可以更新"""
|
||
path = _make_path(sample_count=3, success_rate=0.8)
|
||
result = await optimizer.evaluate_and_update("code_review", path)
|
||
assert result.updated is True
|
||
assert result.reason == "无现有推荐路径,直接设为推荐"
|
||
|
||
async def test_custom_min_sample_count(self, optimizer_custom_thresholds):
|
||
"""自定义最小样本量"""
|
||
path = _make_path(sample_count=4, success_rate=0.9)
|
||
result = await optimizer_custom_thresholds.evaluate_and_update("code_review", path)
|
||
assert result.updated is False
|
||
assert "样本量不足" in result.reason
|
||
|
||
|
||
# ── 首次设置推荐路径测试 ──────────────────────────────────
|
||
|
||
|
||
class TestFirstRecommendation:
|
||
async def test_first_path_becomes_recommended(self, optimizer):
|
||
"""无现有推荐路径时,新路径直接设为推荐"""
|
||
path = _make_path(success_rate=0.7, sample_count=5)
|
||
result = await optimizer.evaluate_and_update("code_review", path)
|
||
assert result.updated is True
|
||
assert result.old_path is None
|
||
assert result.new_path is not None
|
||
assert result.new_path.is_recommended is True
|
||
assert "无现有推荐路径" in result.reason
|
||
|
||
async def test_auto_generates_path_id(self, optimizer):
|
||
"""未提供 path_id 时自动生成"""
|
||
path = _make_path(path_id="", sample_count=5)
|
||
result = await optimizer.evaluate_and_update("code_review", path)
|
||
assert result.updated is True
|
||
assert result.new_path is not None
|
||
assert len(result.new_path.path_id) > 0
|
||
|
||
|
||
# ── 成功率显著提升测试 ────────────────────────────────────
|
||
|
||
|
||
class TestSuccessRateImprovement:
|
||
async def test_higher_success_rate_updates(self, optimizer):
|
||
"""新路径成功率更高 → 更新推荐路径"""
|
||
old_path = _make_path(success_rate=0.7, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", old_path)
|
||
|
||
new_path = _make_path(success_rate=0.85, sample_count=5)
|
||
result = await optimizer.evaluate_and_update("code_review", new_path)
|
||
assert result.updated is True
|
||
assert result.old_path.success_rate == 0.7
|
||
assert result.new_path.success_rate == 0.85
|
||
assert "成功率显著提升" in result.reason
|
||
|
||
async def test_marginal_success_rate_no_update(self, optimizer):
|
||
"""成功率提升不足阈值 → 不更新"""
|
||
old_path = _make_path(success_rate=0.8, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", old_path)
|
||
|
||
# 提升仅 0.03,低于默认阈值 0.05
|
||
new_path = _make_path(success_rate=0.83, sample_count=5)
|
||
result = await optimizer.evaluate_and_update("code_review", new_path)
|
||
assert result.updated is False
|
||
assert "无明显优势" in result.reason
|
||
|
||
async def test_custom_success_rate_threshold(self, optimizer_custom_thresholds):
|
||
"""自定义成功率阈值"""
|
||
old_path = _make_path(success_rate=0.7, sample_count=10)
|
||
await optimizer_custom_thresholds.evaluate_and_update("code_review", old_path)
|
||
|
||
# 提升 0.08,低于自定义阈值 0.1
|
||
new_path = _make_path(success_rate=0.78, sample_count=10)
|
||
result = await optimizer_custom_thresholds.evaluate_and_update("code_review", new_path)
|
||
assert result.updated is False
|
||
|
||
async def test_lower_success_rate_no_update(self, optimizer):
|
||
"""新路径成功率更低 → 不更新"""
|
||
old_path = _make_path(success_rate=0.9, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", old_path)
|
||
|
||
new_path = _make_path(success_rate=0.6, sample_count=5)
|
||
result = await optimizer.evaluate_and_update("code_review", new_path)
|
||
assert result.updated is False
|
||
|
||
|
||
# ── 耗时显著更短测试 ──────────────────────────────────────
|
||
|
||
|
||
class TestDurationImprovement:
|
||
async def test_shorter_duration_with_similar_success_rate_updates(self, optimizer):
|
||
"""成功率相近但耗时显著更短 → 更新推荐路径"""
|
||
old_path = _make_path(total_duration=100.0, success_rate=0.8, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", old_path)
|
||
|
||
# 耗时减少 30%(> 20% 阈值),成功率相近
|
||
new_path = _make_path(total_duration=70.0, success_rate=0.82, sample_count=5)
|
||
result = await optimizer.evaluate_and_update("code_review", new_path)
|
||
assert result.updated is True
|
||
assert "耗时显著更短" in result.reason
|
||
|
||
async def test_marginal_duration_improvement_no_update(self, optimizer):
|
||
"""耗时改善不足阈值 → 不更新"""
|
||
old_path = _make_path(total_duration=100.0, success_rate=0.8, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", old_path)
|
||
|
||
# 耗时减少仅 10%(< 20% 阈值)
|
||
new_path = _make_path(total_duration=90.0, success_rate=0.82, sample_count=5)
|
||
result = await optimizer.evaluate_and_update("code_review", new_path)
|
||
assert result.updated is False
|
||
assert "无明显优势" in result.reason
|
||
|
||
async def test_longer_duration_no_update(self, optimizer):
|
||
"""耗时更长 → 不更新"""
|
||
old_path = _make_path(total_duration=50.0, success_rate=0.8, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", old_path)
|
||
|
||
new_path = _make_path(total_duration=80.0, success_rate=0.82, sample_count=5)
|
||
result = await optimizer.evaluate_and_update("code_review", new_path)
|
||
assert result.updated is False
|
||
|
||
async def test_custom_duration_improvement_threshold(self, optimizer_custom_thresholds):
|
||
"""自定义耗时改善阈值"""
|
||
old_path = _make_path(total_duration=100.0, success_rate=0.8, sample_count=10)
|
||
await optimizer_custom_thresholds.evaluate_and_update("code_review", old_path)
|
||
|
||
# 耗时减少 25%(< 30% 自定义阈值)
|
||
new_path = _make_path(total_duration=75.0, success_rate=0.82, sample_count=10)
|
||
result = await optimizer_custom_thresholds.evaluate_and_update("code_review", new_path)
|
||
assert result.updated is False
|
||
|
||
async def test_zero_duration_current_path(self, optimizer):
|
||
"""现有路径耗时为 0 → 不因耗时更新"""
|
||
old_path = _make_path(total_duration=0.0, success_rate=0.8, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", old_path)
|
||
|
||
new_path = _make_path(total_duration=10.0, success_rate=0.82, sample_count=5)
|
||
result = await optimizer.evaluate_and_update("code_review", new_path)
|
||
assert result.updated is False
|
||
|
||
async def test_both_zero_duration(self, optimizer):
|
||
"""两者耗时均为 0 → 不因耗时更新"""
|
||
old_path = _make_path(total_duration=0.0, success_rate=0.8, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", old_path)
|
||
|
||
new_path = _make_path(total_duration=0.0, success_rate=0.82, sample_count=5)
|
||
result = await optimizer.evaluate_and_update("code_review", new_path)
|
||
assert result.updated is False
|
||
|
||
|
||
# ── 保留现有推荐路径测试 ──────────────────────────────────
|
||
|
||
|
||
class TestKeepCurrentPath:
|
||
async def test_no_advantage_keeps_current(self, optimizer):
|
||
"""新路径无明显优势 → 保留现有推荐路径"""
|
||
old_path = _make_path(total_duration=50.0, success_rate=0.8, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", old_path)
|
||
|
||
new_path = _make_path(total_duration=48.0, success_rate=0.79, sample_count=5)
|
||
result = await optimizer.evaluate_and_update("code_review", new_path)
|
||
assert result.updated is False
|
||
assert result.old_path.success_rate == 0.8
|
||
# 推荐路径不变
|
||
recommended = optimizer.get_recommended_path("code_review")
|
||
assert recommended is not None
|
||
assert recommended.success_rate == 0.8
|
||
|
||
async def test_is_recommended_flag_preserved(self, optimizer):
|
||
"""未更新时,现有路径的 is_recommended 标志保持为 True"""
|
||
old_path = _make_path(success_rate=0.8, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", old_path)
|
||
|
||
new_path = _make_path(success_rate=0.79, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", new_path)
|
||
|
||
recommended = optimizer.get_recommended_path("code_review")
|
||
assert recommended is not None
|
||
assert recommended.is_recommended is True
|
||
|
||
|
||
# ── is_recommended 标志管理测试 ────────────────────────────
|
||
|
||
|
||
class TestIsRecommendedFlag:
|
||
async def test_old_path_loses_recommended_flag(self, optimizer):
|
||
"""更新后旧路径的 is_recommended 变为 False"""
|
||
old_path = _make_path(success_rate=0.7, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", old_path)
|
||
assert old_path.is_recommended is True # 首次设置,is_recommended 为 True
|
||
|
||
new_path = _make_path(success_rate=0.9, sample_count=5)
|
||
result = await optimizer.evaluate_and_update("code_review", new_path)
|
||
assert result.updated is True
|
||
assert result.old_path.is_recommended is False # 更新后旧路径失去标志
|
||
assert result.new_path.is_recommended is True
|
||
|
||
|
||
# ── 多次迭代优化测试 ──────────────────────────────────────
|
||
|
||
|
||
class TestIterativeOptimization:
|
||
async def test_multiple_updates_converge_to_best(self, optimizer):
|
||
"""多次迭代后推荐路径收敛到最优"""
|
||
# 第一次:初始路径
|
||
path1 = _make_path(success_rate=0.6, total_duration=100.0, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", path1)
|
||
assert optimizer.get_recommended_path("code_review").success_rate == 0.6
|
||
|
||
# 第二次:成功率显著提升
|
||
path2 = _make_path(success_rate=0.8, total_duration=90.0, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", path2)
|
||
assert optimizer.get_recommended_path("code_review").success_rate == 0.8
|
||
|
||
# 第三次:成功率相近但耗时更短
|
||
path3 = _make_path(success_rate=0.82, total_duration=50.0, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", path3)
|
||
assert optimizer.get_recommended_path("code_review").total_duration == 50.0
|
||
|
||
# 第四次:无明显优势
|
||
path4 = _make_path(success_rate=0.81, total_duration=48.0, sample_count=5)
|
||
result = await optimizer.evaluate_and_update("code_review", path4)
|
||
assert result.updated is False
|
||
assert optimizer.get_recommended_path("code_review").total_duration == 50.0
|
||
|
||
async def test_different_task_types_evolve_independently(self, optimizer):
|
||
"""不同任务类型的推荐路径独立进化"""
|
||
path_a1 = _make_path(task_type="code_review", success_rate=0.7, sample_count=5)
|
||
path_b1 = _make_path(task_type="data_analysis", success_rate=0.6, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", path_a1)
|
||
await optimizer.evaluate_and_update("data_analysis", path_b1)
|
||
|
||
path_a2 = _make_path(task_type="code_review", success_rate=0.9, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", path_a2)
|
||
|
||
# code_review 更新了,data_analysis 不受影响
|
||
assert optimizer.get_recommended_path("code_review").success_rate == 0.9
|
||
assert optimizer.get_recommended_path("data_analysis").success_rate == 0.6
|
||
|
||
|
||
# ── 待观察路径管理测试 ────────────────────────────────────
|
||
|
||
|
||
class TestPendingPaths:
|
||
async def test_pending_paths_empty_initially(self, optimizer):
|
||
assert optimizer.get_pending_paths("code_review") == []
|
||
|
||
async def test_pending_paths_accumulate(self, optimizer):
|
||
"""多次样本不足的路径会累积"""
|
||
path1 = _make_path(sample_count=1, success_rate=0.9)
|
||
path2 = _make_path(sample_count=2, success_rate=0.85)
|
||
await optimizer.evaluate_and_update("code_review", path1)
|
||
await optimizer.evaluate_and_update("code_review", path2)
|
||
|
||
pending = optimizer.get_pending_paths("code_review")
|
||
assert len(pending) == 2
|
||
|
||
async def test_pending_paths_isolated_by_task_type(self, optimizer):
|
||
"""不同任务类型的待观察路径相互隔离"""
|
||
path_a = _make_path(task_type="code_review", sample_count=1, success_rate=0.9)
|
||
path_b = _make_path(task_type="data_analysis", sample_count=1, success_rate=0.8)
|
||
await optimizer.evaluate_and_update("code_review", path_a)
|
||
await optimizer.evaluate_and_update("data_analysis", path_b)
|
||
|
||
assert len(optimizer.get_pending_paths("code_review")) == 1
|
||
assert len(optimizer.get_pending_paths("data_analysis")) == 1
|
||
|
||
async def test_sufficient_samples_not_pending(self, optimizer):
|
||
"""样本量充足的路径不会进入待观察列表"""
|
||
path = _make_path(sample_count=5, success_rate=0.8)
|
||
await optimizer.evaluate_and_update("code_review", path)
|
||
assert optimizer.get_pending_paths("code_review") == []
|
||
|
||
|
||
# ── ExperienceStore 集成测试 ──────────────────────────────
|
||
|
||
|
||
class TestExperienceStoreIntegration:
|
||
async def test_with_experience_store(self):
|
||
"""PathOptimizer 可以接受 ExperienceStore 实例"""
|
||
from agentkit.evolution.experience_store import InMemoryExperienceStore
|
||
|
||
store = InMemoryExperienceStore()
|
||
optimizer = PathOptimizer(experience_store=store, min_sample_count=3)
|
||
|
||
path = _make_path(success_rate=0.8, sample_count=5)
|
||
result = await optimizer.evaluate_and_update("code_review", path)
|
||
assert result.updated is True
|
||
|
||
async def test_without_experience_store(self, optimizer):
|
||
"""PathOptimizer 可以不依赖 ExperienceStore 独立运行"""
|
||
path = _make_path(success_rate=0.8, sample_count=5)
|
||
result = await optimizer.evaluate_and_update("code_review", path)
|
||
assert result.updated is True
|
||
|
||
|
||
# ── 边界条件测试 ──────────────────────────────────────────
|
||
|
||
|
||
class TestEdgeCases:
|
||
async def test_same_path_twice(self, optimizer):
|
||
"""提交相同路径两次"""
|
||
path = _make_path(success_rate=0.8, sample_count=5)
|
||
result1 = await optimizer.evaluate_and_update("code_review", path)
|
||
assert result1.updated is True
|
||
|
||
# 第二次提交相同参数的路径(但不同实例)
|
||
path2 = _make_path(success_rate=0.8, sample_count=5)
|
||
result2 = await optimizer.evaluate_and_update("code_review", path2)
|
||
# 成功率相同,耗时相同 → 无明显优势
|
||
assert result2.updated is False
|
||
|
||
async def test_success_rate_at_boundary(self, optimizer):
|
||
"""成功率刚好在阈值边界"""
|
||
old_path = _make_path(success_rate=0.8, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", old_path)
|
||
|
||
# 提升恰好等于阈值 0.05,不满足 > threshold
|
||
new_path = _make_path(success_rate=0.85, sample_count=5)
|
||
result = await optimizer.evaluate_and_update("code_review", new_path)
|
||
assert result.updated is False
|
||
|
||
async def test_duration_improvement_at_boundary(self, optimizer):
|
||
"""耗时改善刚好在阈值边界"""
|
||
old_path = _make_path(total_duration=100.0, success_rate=0.8, sample_count=5)
|
||
await optimizer.evaluate_and_update("code_review", old_path)
|
||
|
||
# 改善恰好等于阈值 20%,不满足 > threshold
|
||
new_path = _make_path(total_duration=80.0, success_rate=0.82, sample_count=5)
|
||
result = await optimizer.evaluate_and_update("code_review", new_path)
|
||
assert result.updated is False
|
||
|
||
async def test_zero_sample_count(self, optimizer):
|
||
"""样本量为 0"""
|
||
path = _make_path(sample_count=0, success_rate=0.9)
|
||
result = await optimizer.evaluate_and_update("code_review", path)
|
||
assert result.updated is False
|
||
assert "样本量不足" in result.reason
|
||
|
||
async def test_path_task_type_override(self, optimizer):
|
||
"""evaluate_and_update 会用传入的 task_type 覆盖路径的 task_type"""
|
||
path = _make_path(task_type="wrong_type", success_rate=0.8, sample_count=5)
|
||
result = await optimizer.evaluate_and_update("code_review", path)
|
||
assert result.updated is True
|
||
assert path.task_type == "code_review"
|
||
recommended = optimizer.get_recommended_path("code_review")
|
||
assert recommended is not None
|