fischer-agentkit/tests/unit/evolution/test_path_optimizer.py

513 lines
23 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Tests for PathOptimizer - 执行路径优化器"""
from __future__ import annotations
from datetime import datetime, timezone
import pytest
from agentkit.evolution.path_optimizer import ExecutionPath, PathOptimizer, PathUpdateResult
# ── Fixtures ──────────────────────────────────────────────
@pytest.fixture
def optimizer():
"""默认 PathOptimizer 实例"""
return PathOptimizer(min_sample_count=3, success_rate_threshold=0.05, duration_improvement_threshold=0.2)
@pytest.fixture
def optimizer_custom_thresholds():
"""自定义阈值的 PathOptimizer"""
return PathOptimizer(
min_sample_count=5,
success_rate_threshold=0.1,
duration_improvement_threshold=0.3,
)
def _make_path(
task_type: str = "code_review",
steps: list[str] | None = None,
total_duration: float = 10.0,
success_rate: float = 0.8,
sample_count: int = 5,
is_recommended: bool = False,
path_id: str = "",
created_at: datetime | None = None,
) -> ExecutionPath:
"""创建测试用 ExecutionPath"""
return ExecutionPath(
path_id=path_id,
task_type=task_type,
steps=steps or ["step1", "step2", "step3"],
total_duration=total_duration,
success_rate=success_rate,
sample_count=sample_count,
is_recommended=is_recommended,
created_at=created_at or datetime.now(timezone.utc),
)
# ── ExecutionPath 数据模型测试 ────────────────────────────
class TestExecutionPath:
def test_default_values(self):
path = ExecutionPath()
assert path.path_id == ""
assert path.task_type == ""
assert path.steps == []
assert path.total_duration == 0.0
assert path.success_rate == 0.0
assert path.sample_count == 0
assert path.is_recommended is False
assert isinstance(path.created_at, datetime)
def test_custom_values(self):
now = datetime.now(timezone.utc)
path = ExecutionPath(
path_id="p1",
task_type="code_review",
steps=["analyze", "review", "report"],
total_duration=15.5,
success_rate=0.9,
sample_count=10,
is_recommended=True,
created_at=now,
)
assert path.path_id == "p1"
assert path.task_type == "code_review"
assert path.steps == ["analyze", "review", "report"]
assert path.total_duration == 15.5
assert path.success_rate == 0.9
assert path.sample_count == 10
assert path.is_recommended is True
assert path.created_at == now
# ── PathUpdateResult 数据模型测试 ─────────────────────────
class TestPathUpdateResult:
def test_default_values(self):
result = PathUpdateResult()
assert result.updated is False
assert result.old_path is None
assert result.new_path is None
assert result.reason == ""
def test_updated_result(self):
old = _make_path(success_rate=0.7)
new = _make_path(success_rate=0.9)
result = PathUpdateResult(
updated=True,
old_path=old,
new_path=new,
reason="成功率显著提升",
)
assert result.updated is True
assert result.old_path.success_rate == 0.7
assert result.new_path.success_rate == 0.9
assert "成功率" in result.reason
# ── get_recommended_path 测试 ─────────────────────────────
class TestGetRecommendedPath:
async def test_no_recommended_path(self, optimizer):
result = optimizer.get_recommended_path("code_review")
assert result is None
async def test_returns_recommended_path(self, optimizer):
path = _make_path(task_type="code_review", success_rate=0.8, sample_count=5)
await optimizer.evaluate_and_update("code_review", path)
result = optimizer.get_recommended_path("code_review")
assert result is not None
assert result.success_rate == 0.8
assert result.is_recommended is True
async def test_different_task_types_independent(self, optimizer):
path_a = _make_path(task_type="code_review", success_rate=0.8, sample_count=5)
path_b = _make_path(task_type="data_analysis", success_rate=0.9, sample_count=5)
await optimizer.evaluate_and_update("code_review", path_a)
await optimizer.evaluate_and_update("data_analysis", path_b)
result_a = optimizer.get_recommended_path("code_review")
result_b = optimizer.get_recommended_path("data_analysis")
assert result_a is not None
assert result_b is not None
assert result_a.success_rate == 0.8
assert result_b.success_rate == 0.9
# ── 样本量不足测试 ────────────────────────────────────────
class TestInsufficientSamples:
async def test_insufficient_samples_no_update(self, optimizer):
"""样本量不足 → 不更新,记录待观察"""
path = _make_path(sample_count=2, success_rate=0.9)
result = await optimizer.evaluate_and_update("code_review", path)
assert result.updated is False
assert "样本量不足" in result.reason
assert optimizer.get_recommended_path("code_review") is None
async def test_insufficient_samples_recorded_as_pending(self, optimizer):
"""样本量不足的路径被记录到待观察列表"""
path = _make_path(sample_count=2, success_rate=0.9)
await optimizer.evaluate_and_update("code_review", path)
pending = optimizer.get_pending_paths("code_review")
assert len(pending) == 1
assert pending[0].success_rate == 0.9
async def test_exact_min_samples_updates(self, optimizer):
"""刚好达到最小样本量 → 可以更新"""
path = _make_path(sample_count=3, success_rate=0.8)
result = await optimizer.evaluate_and_update("code_review", path)
assert result.updated is True
assert result.reason == "无现有推荐路径,直接设为推荐"
async def test_custom_min_sample_count(self, optimizer_custom_thresholds):
"""自定义最小样本量"""
path = _make_path(sample_count=4, success_rate=0.9)
result = await optimizer_custom_thresholds.evaluate_and_update("code_review", path)
assert result.updated is False
assert "样本量不足" in result.reason
# ── 首次设置推荐路径测试 ──────────────────────────────────
class TestFirstRecommendation:
async def test_first_path_becomes_recommended(self, optimizer):
"""无现有推荐路径时,新路径直接设为推荐"""
path = _make_path(success_rate=0.7, sample_count=5)
result = await optimizer.evaluate_and_update("code_review", path)
assert result.updated is True
assert result.old_path is None
assert result.new_path is not None
assert result.new_path.is_recommended is True
assert "无现有推荐路径" in result.reason
async def test_auto_generates_path_id(self, optimizer):
"""未提供 path_id 时自动生成"""
path = _make_path(path_id="", sample_count=5)
result = await optimizer.evaluate_and_update("code_review", path)
assert result.updated is True
assert result.new_path is not None
assert len(result.new_path.path_id) > 0
# ── 成功率显著提升测试 ────────────────────────────────────
class TestSuccessRateImprovement:
async def test_higher_success_rate_updates(self, optimizer):
"""新路径成功率更高 → 更新推荐路径"""
old_path = _make_path(success_rate=0.7, sample_count=5)
await optimizer.evaluate_and_update("code_review", old_path)
new_path = _make_path(success_rate=0.85, sample_count=5)
result = await optimizer.evaluate_and_update("code_review", new_path)
assert result.updated is True
assert result.old_path.success_rate == 0.7
assert result.new_path.success_rate == 0.85
assert "成功率显著提升" in result.reason
async def test_marginal_success_rate_no_update(self, optimizer):
"""成功率提升不足阈值 → 不更新"""
old_path = _make_path(success_rate=0.8, sample_count=5)
await optimizer.evaluate_and_update("code_review", old_path)
# 提升仅 0.03,低于默认阈值 0.05
new_path = _make_path(success_rate=0.83, sample_count=5)
result = await optimizer.evaluate_and_update("code_review", new_path)
assert result.updated is False
assert "无明显优势" in result.reason
async def test_custom_success_rate_threshold(self, optimizer_custom_thresholds):
"""自定义成功率阈值"""
old_path = _make_path(success_rate=0.7, sample_count=10)
await optimizer_custom_thresholds.evaluate_and_update("code_review", old_path)
# 提升 0.08,低于自定义阈值 0.1
new_path = _make_path(success_rate=0.78, sample_count=10)
result = await optimizer_custom_thresholds.evaluate_and_update("code_review", new_path)
assert result.updated is False
async def test_lower_success_rate_no_update(self, optimizer):
"""新路径成功率更低 → 不更新"""
old_path = _make_path(success_rate=0.9, sample_count=5)
await optimizer.evaluate_and_update("code_review", old_path)
new_path = _make_path(success_rate=0.6, sample_count=5)
result = await optimizer.evaluate_and_update("code_review", new_path)
assert result.updated is False
# ── 耗时显著更短测试 ──────────────────────────────────────
class TestDurationImprovement:
async def test_shorter_duration_with_similar_success_rate_updates(self, optimizer):
"""成功率相近但耗时显著更短 → 更新推荐路径"""
old_path = _make_path(total_duration=100.0, success_rate=0.8, sample_count=5)
await optimizer.evaluate_and_update("code_review", old_path)
# 耗时减少 30%> 20% 阈值),成功率相近
new_path = _make_path(total_duration=70.0, success_rate=0.82, sample_count=5)
result = await optimizer.evaluate_and_update("code_review", new_path)
assert result.updated is True
assert "耗时显著更短" in result.reason
async def test_marginal_duration_improvement_no_update(self, optimizer):
"""耗时改善不足阈值 → 不更新"""
old_path = _make_path(total_duration=100.0, success_rate=0.8, sample_count=5)
await optimizer.evaluate_and_update("code_review", old_path)
# 耗时减少仅 10%< 20% 阈值)
new_path = _make_path(total_duration=90.0, success_rate=0.82, sample_count=5)
result = await optimizer.evaluate_and_update("code_review", new_path)
assert result.updated is False
assert "无明显优势" in result.reason
async def test_longer_duration_no_update(self, optimizer):
"""耗时更长 → 不更新"""
old_path = _make_path(total_duration=50.0, success_rate=0.8, sample_count=5)
await optimizer.evaluate_and_update("code_review", old_path)
new_path = _make_path(total_duration=80.0, success_rate=0.82, sample_count=5)
result = await optimizer.evaluate_and_update("code_review", new_path)
assert result.updated is False
async def test_custom_duration_improvement_threshold(self, optimizer_custom_thresholds):
"""自定义耗时改善阈值"""
old_path = _make_path(total_duration=100.0, success_rate=0.8, sample_count=10)
await optimizer_custom_thresholds.evaluate_and_update("code_review", old_path)
# 耗时减少 25%< 30% 自定义阈值)
new_path = _make_path(total_duration=75.0, success_rate=0.82, sample_count=10)
result = await optimizer_custom_thresholds.evaluate_and_update("code_review", new_path)
assert result.updated is False
async def test_zero_duration_current_path(self, optimizer):
"""现有路径耗时为 0 → 不因耗时更新"""
old_path = _make_path(total_duration=0.0, success_rate=0.8, sample_count=5)
await optimizer.evaluate_and_update("code_review", old_path)
new_path = _make_path(total_duration=10.0, success_rate=0.82, sample_count=5)
result = await optimizer.evaluate_and_update("code_review", new_path)
assert result.updated is False
async def test_both_zero_duration(self, optimizer):
"""两者耗时均为 0 → 不因耗时更新"""
old_path = _make_path(total_duration=0.0, success_rate=0.8, sample_count=5)
await optimizer.evaluate_and_update("code_review", old_path)
new_path = _make_path(total_duration=0.0, success_rate=0.82, sample_count=5)
result = await optimizer.evaluate_and_update("code_review", new_path)
assert result.updated is False
# ── 保留现有推荐路径测试 ──────────────────────────────────
class TestKeepCurrentPath:
async def test_no_advantage_keeps_current(self, optimizer):
"""新路径无明显优势 → 保留现有推荐路径"""
old_path = _make_path(total_duration=50.0, success_rate=0.8, sample_count=5)
await optimizer.evaluate_and_update("code_review", old_path)
new_path = _make_path(total_duration=48.0, success_rate=0.79, sample_count=5)
result = await optimizer.evaluate_and_update("code_review", new_path)
assert result.updated is False
assert result.old_path.success_rate == 0.8
# 推荐路径不变
recommended = optimizer.get_recommended_path("code_review")
assert recommended is not None
assert recommended.success_rate == 0.8
async def test_is_recommended_flag_preserved(self, optimizer):
"""未更新时,现有路径的 is_recommended 标志保持为 True"""
old_path = _make_path(success_rate=0.8, sample_count=5)
await optimizer.evaluate_and_update("code_review", old_path)
new_path = _make_path(success_rate=0.79, sample_count=5)
await optimizer.evaluate_and_update("code_review", new_path)
recommended = optimizer.get_recommended_path("code_review")
assert recommended is not None
assert recommended.is_recommended is True
# ── is_recommended 标志管理测试 ────────────────────────────
class TestIsRecommendedFlag:
async def test_old_path_loses_recommended_flag(self, optimizer):
"""更新后旧路径的 is_recommended 变为 False"""
old_path = _make_path(success_rate=0.7, sample_count=5)
await optimizer.evaluate_and_update("code_review", old_path)
assert old_path.is_recommended is True # 首次设置is_recommended 为 True
new_path = _make_path(success_rate=0.9, sample_count=5)
result = await optimizer.evaluate_and_update("code_review", new_path)
assert result.updated is True
assert result.old_path.is_recommended is False # 更新后旧路径失去标志
assert result.new_path.is_recommended is True
# ── 多次迭代优化测试 ──────────────────────────────────────
class TestIterativeOptimization:
async def test_multiple_updates_converge_to_best(self, optimizer):
"""多次迭代后推荐路径收敛到最优"""
# 第一次:初始路径
path1 = _make_path(success_rate=0.6, total_duration=100.0, sample_count=5)
await optimizer.evaluate_and_update("code_review", path1)
assert optimizer.get_recommended_path("code_review").success_rate == 0.6
# 第二次:成功率显著提升
path2 = _make_path(success_rate=0.8, total_duration=90.0, sample_count=5)
await optimizer.evaluate_and_update("code_review", path2)
assert optimizer.get_recommended_path("code_review").success_rate == 0.8
# 第三次:成功率相近但耗时更短
path3 = _make_path(success_rate=0.82, total_duration=50.0, sample_count=5)
await optimizer.evaluate_and_update("code_review", path3)
assert optimizer.get_recommended_path("code_review").total_duration == 50.0
# 第四次:无明显优势
path4 = _make_path(success_rate=0.81, total_duration=48.0, sample_count=5)
result = await optimizer.evaluate_and_update("code_review", path4)
assert result.updated is False
assert optimizer.get_recommended_path("code_review").total_duration == 50.0
async def test_different_task_types_evolve_independently(self, optimizer):
"""不同任务类型的推荐路径独立进化"""
path_a1 = _make_path(task_type="code_review", success_rate=0.7, sample_count=5)
path_b1 = _make_path(task_type="data_analysis", success_rate=0.6, sample_count=5)
await optimizer.evaluate_and_update("code_review", path_a1)
await optimizer.evaluate_and_update("data_analysis", path_b1)
path_a2 = _make_path(task_type="code_review", success_rate=0.9, sample_count=5)
await optimizer.evaluate_and_update("code_review", path_a2)
# code_review 更新了data_analysis 不受影响
assert optimizer.get_recommended_path("code_review").success_rate == 0.9
assert optimizer.get_recommended_path("data_analysis").success_rate == 0.6
# ── 待观察路径管理测试 ────────────────────────────────────
class TestPendingPaths:
async def test_pending_paths_empty_initially(self, optimizer):
assert optimizer.get_pending_paths("code_review") == []
async def test_pending_paths_accumulate(self, optimizer):
"""多次样本不足的路径会累积"""
path1 = _make_path(sample_count=1, success_rate=0.9)
path2 = _make_path(sample_count=2, success_rate=0.85)
await optimizer.evaluate_and_update("code_review", path1)
await optimizer.evaluate_and_update("code_review", path2)
pending = optimizer.get_pending_paths("code_review")
assert len(pending) == 2
async def test_pending_paths_isolated_by_task_type(self, optimizer):
"""不同任务类型的待观察路径相互隔离"""
path_a = _make_path(task_type="code_review", sample_count=1, success_rate=0.9)
path_b = _make_path(task_type="data_analysis", sample_count=1, success_rate=0.8)
await optimizer.evaluate_and_update("code_review", path_a)
await optimizer.evaluate_and_update("data_analysis", path_b)
assert len(optimizer.get_pending_paths("code_review")) == 1
assert len(optimizer.get_pending_paths("data_analysis")) == 1
async def test_sufficient_samples_not_pending(self, optimizer):
"""样本量充足的路径不会进入待观察列表"""
path = _make_path(sample_count=5, success_rate=0.8)
await optimizer.evaluate_and_update("code_review", path)
assert optimizer.get_pending_paths("code_review") == []
# ── ExperienceStore 集成测试 ──────────────────────────────
class TestExperienceStoreIntegration:
async def test_with_experience_store(self):
"""PathOptimizer 可以接受 ExperienceStore 实例"""
from agentkit.evolution.experience_store import InMemoryExperienceStore
store = InMemoryExperienceStore()
optimizer = PathOptimizer(experience_store=store, min_sample_count=3)
path = _make_path(success_rate=0.8, sample_count=5)
result = await optimizer.evaluate_and_update("code_review", path)
assert result.updated is True
async def test_without_experience_store(self, optimizer):
"""PathOptimizer 可以不依赖 ExperienceStore 独立运行"""
path = _make_path(success_rate=0.8, sample_count=5)
result = await optimizer.evaluate_and_update("code_review", path)
assert result.updated is True
# ── 边界条件测试 ──────────────────────────────────────────
class TestEdgeCases:
async def test_same_path_twice(self, optimizer):
"""提交相同路径两次"""
path = _make_path(success_rate=0.8, sample_count=5)
result1 = await optimizer.evaluate_and_update("code_review", path)
assert result1.updated is True
# 第二次提交相同参数的路径(但不同实例)
path2 = _make_path(success_rate=0.8, sample_count=5)
result2 = await optimizer.evaluate_and_update("code_review", path2)
# 成功率相同,耗时相同 → 无明显优势
assert result2.updated is False
async def test_success_rate_at_boundary(self, optimizer):
"""成功率刚好在阈值边界"""
old_path = _make_path(success_rate=0.8, sample_count=5)
await optimizer.evaluate_and_update("code_review", old_path)
# 提升恰好等于阈值 0.05,不满足 > threshold
new_path = _make_path(success_rate=0.85, sample_count=5)
result = await optimizer.evaluate_and_update("code_review", new_path)
assert result.updated is False
async def test_duration_improvement_at_boundary(self, optimizer):
"""耗时改善刚好在阈值边界"""
old_path = _make_path(total_duration=100.0, success_rate=0.8, sample_count=5)
await optimizer.evaluate_and_update("code_review", old_path)
# 改善恰好等于阈值 20%,不满足 > threshold
new_path = _make_path(total_duration=80.0, success_rate=0.82, sample_count=5)
result = await optimizer.evaluate_and_update("code_review", new_path)
assert result.updated is False
async def test_zero_sample_count(self, optimizer):
"""样本量为 0"""
path = _make_path(sample_count=0, success_rate=0.9)
result = await optimizer.evaluate_and_update("code_review", path)
assert result.updated is False
assert "样本量不足" in result.reason
async def test_path_task_type_override(self, optimizer):
"""evaluate_and_update 会用传入的 task_type 覆盖路径的 task_type"""
path = _make_path(task_type="wrong_type", success_rate=0.8, sample_count=5)
result = await optimizer.evaluate_and_update("code_review", path)
assert result.updated is True
assert path.task_type == "code_review"
recommended = optimizer.get_recommended_path("code_review")
assert recommended is not None