"""Tests for PathOptimizer - 执行路径优化器""" from __future__ import annotations from datetime import datetime, timezone import pytest from agentkit.evolution.path_optimizer import ExecutionPath, PathOptimizer, PathUpdateResult # ── Fixtures ────────────────────────────────────────────── @pytest.fixture def optimizer(): """默认 PathOptimizer 实例""" return PathOptimizer(min_sample_count=3, success_rate_threshold=0.05, duration_improvement_threshold=0.2) @pytest.fixture def optimizer_custom_thresholds(): """自定义阈值的 PathOptimizer""" return PathOptimizer( min_sample_count=5, success_rate_threshold=0.1, duration_improvement_threshold=0.3, ) def _make_path( task_type: str = "code_review", steps: list[str] | None = None, total_duration: float = 10.0, success_rate: float = 0.8, sample_count: int = 5, is_recommended: bool = False, path_id: str = "", created_at: datetime | None = None, ) -> ExecutionPath: """创建测试用 ExecutionPath""" return ExecutionPath( path_id=path_id, task_type=task_type, steps=steps or ["step1", "step2", "step3"], total_duration=total_duration, success_rate=success_rate, sample_count=sample_count, is_recommended=is_recommended, created_at=created_at or datetime.now(timezone.utc), ) # ── ExecutionPath 数据模型测试 ──────────────────────────── class TestExecutionPath: def test_default_values(self): path = ExecutionPath() assert path.path_id == "" assert path.task_type == "" assert path.steps == [] assert path.total_duration == 0.0 assert path.success_rate == 0.0 assert path.sample_count == 0 assert path.is_recommended is False assert isinstance(path.created_at, datetime) def test_custom_values(self): now = datetime.now(timezone.utc) path = ExecutionPath( path_id="p1", task_type="code_review", steps=["analyze", "review", "report"], total_duration=15.5, success_rate=0.9, sample_count=10, is_recommended=True, created_at=now, ) assert path.path_id == "p1" assert path.task_type == "code_review" assert path.steps == ["analyze", "review", "report"] assert path.total_duration == 15.5 assert path.success_rate == 0.9 assert path.sample_count == 10 assert path.is_recommended is True assert path.created_at == now # ── PathUpdateResult 数据模型测试 ───────────────────────── class TestPathUpdateResult: def test_default_values(self): result = PathUpdateResult() assert result.updated is False assert result.old_path is None assert result.new_path is None assert result.reason == "" def test_updated_result(self): old = _make_path(success_rate=0.7) new = _make_path(success_rate=0.9) result = PathUpdateResult( updated=True, old_path=old, new_path=new, reason="成功率显著提升", ) assert result.updated is True assert result.old_path.success_rate == 0.7 assert result.new_path.success_rate == 0.9 assert "成功率" in result.reason # ── get_recommended_path 测试 ───────────────────────────── class TestGetRecommendedPath: async def test_no_recommended_path(self, optimizer): result = optimizer.get_recommended_path("code_review") assert result is None async def test_returns_recommended_path(self, optimizer): path = _make_path(task_type="code_review", success_rate=0.8, sample_count=5) await optimizer.evaluate_and_update("code_review", path) result = optimizer.get_recommended_path("code_review") assert result is not None assert result.success_rate == 0.8 assert result.is_recommended is True async def test_different_task_types_independent(self, optimizer): path_a = _make_path(task_type="code_review", success_rate=0.8, sample_count=5) path_b = _make_path(task_type="data_analysis", success_rate=0.9, sample_count=5) await optimizer.evaluate_and_update("code_review", path_a) await optimizer.evaluate_and_update("data_analysis", path_b) result_a = optimizer.get_recommended_path("code_review") result_b = optimizer.get_recommended_path("data_analysis") assert result_a is not None assert result_b is not None assert result_a.success_rate == 0.8 assert result_b.success_rate == 0.9 # ── 样本量不足测试 ──────────────────────────────────────── class TestInsufficientSamples: async def test_insufficient_samples_no_update(self, optimizer): """样本量不足 → 不更新,记录待观察""" path = _make_path(sample_count=2, success_rate=0.9) result = await optimizer.evaluate_and_update("code_review", path) assert result.updated is False assert "样本量不足" in result.reason assert optimizer.get_recommended_path("code_review") is None async def test_insufficient_samples_recorded_as_pending(self, optimizer): """样本量不足的路径被记录到待观察列表""" path = _make_path(sample_count=2, success_rate=0.9) await optimizer.evaluate_and_update("code_review", path) pending = optimizer.get_pending_paths("code_review") assert len(pending) == 1 assert pending[0].success_rate == 0.9 async def test_exact_min_samples_updates(self, optimizer): """刚好达到最小样本量 → 可以更新""" path = _make_path(sample_count=3, success_rate=0.8) result = await optimizer.evaluate_and_update("code_review", path) assert result.updated is True assert result.reason == "无现有推荐路径,直接设为推荐" async def test_custom_min_sample_count(self, optimizer_custom_thresholds): """自定义最小样本量""" path = _make_path(sample_count=4, success_rate=0.9) result = await optimizer_custom_thresholds.evaluate_and_update("code_review", path) assert result.updated is False assert "样本量不足" in result.reason # ── 首次设置推荐路径测试 ────────────────────────────────── class TestFirstRecommendation: async def test_first_path_becomes_recommended(self, optimizer): """无现有推荐路径时,新路径直接设为推荐""" path = _make_path(success_rate=0.7, sample_count=5) result = await optimizer.evaluate_and_update("code_review", path) assert result.updated is True assert result.old_path is None assert result.new_path is not None assert result.new_path.is_recommended is True assert "无现有推荐路径" in result.reason async def test_auto_generates_path_id(self, optimizer): """未提供 path_id 时自动生成""" path = _make_path(path_id="", sample_count=5) result = await optimizer.evaluate_and_update("code_review", path) assert result.updated is True assert result.new_path is not None assert len(result.new_path.path_id) > 0 # ── 成功率显著提升测试 ──────────────────────────────────── class TestSuccessRateImprovement: async def test_higher_success_rate_updates(self, optimizer): """新路径成功率更高 → 更新推荐路径""" old_path = _make_path(success_rate=0.7, sample_count=5) await optimizer.evaluate_and_update("code_review", old_path) new_path = _make_path(success_rate=0.85, sample_count=5) result = await optimizer.evaluate_and_update("code_review", new_path) assert result.updated is True assert result.old_path.success_rate == 0.7 assert result.new_path.success_rate == 0.85 assert "成功率显著提升" in result.reason async def test_marginal_success_rate_no_update(self, optimizer): """成功率提升不足阈值 → 不更新""" old_path = _make_path(success_rate=0.8, sample_count=5) await optimizer.evaluate_and_update("code_review", old_path) # 提升仅 0.03,低于默认阈值 0.05 new_path = _make_path(success_rate=0.83, sample_count=5) result = await optimizer.evaluate_and_update("code_review", new_path) assert result.updated is False assert "无明显优势" in result.reason async def test_custom_success_rate_threshold(self, optimizer_custom_thresholds): """自定义成功率阈值""" old_path = _make_path(success_rate=0.7, sample_count=10) await optimizer_custom_thresholds.evaluate_and_update("code_review", old_path) # 提升 0.08,低于自定义阈值 0.1 new_path = _make_path(success_rate=0.78, sample_count=10) result = await optimizer_custom_thresholds.evaluate_and_update("code_review", new_path) assert result.updated is False async def test_lower_success_rate_no_update(self, optimizer): """新路径成功率更低 → 不更新""" old_path = _make_path(success_rate=0.9, sample_count=5) await optimizer.evaluate_and_update("code_review", old_path) new_path = _make_path(success_rate=0.6, sample_count=5) result = await optimizer.evaluate_and_update("code_review", new_path) assert result.updated is False # ── 耗时显著更短测试 ────────────────────────────────────── class TestDurationImprovement: async def test_shorter_duration_with_similar_success_rate_updates(self, optimizer): """成功率相近但耗时显著更短 → 更新推荐路径""" old_path = _make_path(total_duration=100.0, success_rate=0.8, sample_count=5) await optimizer.evaluate_and_update("code_review", old_path) # 耗时减少 30%(> 20% 阈值),成功率相近 new_path = _make_path(total_duration=70.0, success_rate=0.82, sample_count=5) result = await optimizer.evaluate_and_update("code_review", new_path) assert result.updated is True assert "耗时显著更短" in result.reason async def test_marginal_duration_improvement_no_update(self, optimizer): """耗时改善不足阈值 → 不更新""" old_path = _make_path(total_duration=100.0, success_rate=0.8, sample_count=5) await optimizer.evaluate_and_update("code_review", old_path) # 耗时减少仅 10%(< 20% 阈值) new_path = _make_path(total_duration=90.0, success_rate=0.82, sample_count=5) result = await optimizer.evaluate_and_update("code_review", new_path) assert result.updated is False assert "无明显优势" in result.reason async def test_longer_duration_no_update(self, optimizer): """耗时更长 → 不更新""" old_path = _make_path(total_duration=50.0, success_rate=0.8, sample_count=5) await optimizer.evaluate_and_update("code_review", old_path) new_path = _make_path(total_duration=80.0, success_rate=0.82, sample_count=5) result = await optimizer.evaluate_and_update("code_review", new_path) assert result.updated is False async def test_custom_duration_improvement_threshold(self, optimizer_custom_thresholds): """自定义耗时改善阈值""" old_path = _make_path(total_duration=100.0, success_rate=0.8, sample_count=10) await optimizer_custom_thresholds.evaluate_and_update("code_review", old_path) # 耗时减少 25%(< 30% 自定义阈值) new_path = _make_path(total_duration=75.0, success_rate=0.82, sample_count=10) result = await optimizer_custom_thresholds.evaluate_and_update("code_review", new_path) assert result.updated is False async def test_zero_duration_current_path(self, optimizer): """现有路径耗时为 0 → 不因耗时更新""" old_path = _make_path(total_duration=0.0, success_rate=0.8, sample_count=5) await optimizer.evaluate_and_update("code_review", old_path) new_path = _make_path(total_duration=10.0, success_rate=0.82, sample_count=5) result = await optimizer.evaluate_and_update("code_review", new_path) assert result.updated is False async def test_both_zero_duration(self, optimizer): """两者耗时均为 0 → 不因耗时更新""" old_path = _make_path(total_duration=0.0, success_rate=0.8, sample_count=5) await optimizer.evaluate_and_update("code_review", old_path) new_path = _make_path(total_duration=0.0, success_rate=0.82, sample_count=5) result = await optimizer.evaluate_and_update("code_review", new_path) assert result.updated is False # ── 保留现有推荐路径测试 ────────────────────────────────── class TestKeepCurrentPath: async def test_no_advantage_keeps_current(self, optimizer): """新路径无明显优势 → 保留现有推荐路径""" old_path = _make_path(total_duration=50.0, success_rate=0.8, sample_count=5) await optimizer.evaluate_and_update("code_review", old_path) new_path = _make_path(total_duration=48.0, success_rate=0.79, sample_count=5) result = await optimizer.evaluate_and_update("code_review", new_path) assert result.updated is False assert result.old_path.success_rate == 0.8 # 推荐路径不变 recommended = optimizer.get_recommended_path("code_review") assert recommended is not None assert recommended.success_rate == 0.8 async def test_is_recommended_flag_preserved(self, optimizer): """未更新时,现有路径的 is_recommended 标志保持为 True""" old_path = _make_path(success_rate=0.8, sample_count=5) await optimizer.evaluate_and_update("code_review", old_path) new_path = _make_path(success_rate=0.79, sample_count=5) await optimizer.evaluate_and_update("code_review", new_path) recommended = optimizer.get_recommended_path("code_review") assert recommended is not None assert recommended.is_recommended is True # ── is_recommended 标志管理测试 ──────────────────────────── class TestIsRecommendedFlag: async def test_old_path_loses_recommended_flag(self, optimizer): """更新后旧路径的 is_recommended 变为 False""" old_path = _make_path(success_rate=0.7, sample_count=5) await optimizer.evaluate_and_update("code_review", old_path) assert old_path.is_recommended is True # 首次设置,is_recommended 为 True new_path = _make_path(success_rate=0.9, sample_count=5) result = await optimizer.evaluate_and_update("code_review", new_path) assert result.updated is True assert result.old_path.is_recommended is False # 更新后旧路径失去标志 assert result.new_path.is_recommended is True # ── 多次迭代优化测试 ────────────────────────────────────── class TestIterativeOptimization: async def test_multiple_updates_converge_to_best(self, optimizer): """多次迭代后推荐路径收敛到最优""" # 第一次:初始路径 path1 = _make_path(success_rate=0.6, total_duration=100.0, sample_count=5) await optimizer.evaluate_and_update("code_review", path1) assert optimizer.get_recommended_path("code_review").success_rate == 0.6 # 第二次:成功率显著提升 path2 = _make_path(success_rate=0.8, total_duration=90.0, sample_count=5) await optimizer.evaluate_and_update("code_review", path2) assert optimizer.get_recommended_path("code_review").success_rate == 0.8 # 第三次:成功率相近但耗时更短 path3 = _make_path(success_rate=0.82, total_duration=50.0, sample_count=5) await optimizer.evaluate_and_update("code_review", path3) assert optimizer.get_recommended_path("code_review").total_duration == 50.0 # 第四次:无明显优势 path4 = _make_path(success_rate=0.81, total_duration=48.0, sample_count=5) result = await optimizer.evaluate_and_update("code_review", path4) assert result.updated is False assert optimizer.get_recommended_path("code_review").total_duration == 50.0 async def test_different_task_types_evolve_independently(self, optimizer): """不同任务类型的推荐路径独立进化""" path_a1 = _make_path(task_type="code_review", success_rate=0.7, sample_count=5) path_b1 = _make_path(task_type="data_analysis", success_rate=0.6, sample_count=5) await optimizer.evaluate_and_update("code_review", path_a1) await optimizer.evaluate_and_update("data_analysis", path_b1) path_a2 = _make_path(task_type="code_review", success_rate=0.9, sample_count=5) await optimizer.evaluate_and_update("code_review", path_a2) # code_review 更新了,data_analysis 不受影响 assert optimizer.get_recommended_path("code_review").success_rate == 0.9 assert optimizer.get_recommended_path("data_analysis").success_rate == 0.6 # ── 待观察路径管理测试 ──────────────────────────────────── class TestPendingPaths: async def test_pending_paths_empty_initially(self, optimizer): assert optimizer.get_pending_paths("code_review") == [] async def test_pending_paths_accumulate(self, optimizer): """多次样本不足的路径会累积""" path1 = _make_path(sample_count=1, success_rate=0.9) path2 = _make_path(sample_count=2, success_rate=0.85) await optimizer.evaluate_and_update("code_review", path1) await optimizer.evaluate_and_update("code_review", path2) pending = optimizer.get_pending_paths("code_review") assert len(pending) == 2 async def test_pending_paths_isolated_by_task_type(self, optimizer): """不同任务类型的待观察路径相互隔离""" path_a = _make_path(task_type="code_review", sample_count=1, success_rate=0.9) path_b = _make_path(task_type="data_analysis", sample_count=1, success_rate=0.8) await optimizer.evaluate_and_update("code_review", path_a) await optimizer.evaluate_and_update("data_analysis", path_b) assert len(optimizer.get_pending_paths("code_review")) == 1 assert len(optimizer.get_pending_paths("data_analysis")) == 1 async def test_sufficient_samples_not_pending(self, optimizer): """样本量充足的路径不会进入待观察列表""" path = _make_path(sample_count=5, success_rate=0.8) await optimizer.evaluate_and_update("code_review", path) assert optimizer.get_pending_paths("code_review") == [] # ── ExperienceStore 集成测试 ────────────────────────────── class TestExperienceStoreIntegration: async def test_with_experience_store(self): """PathOptimizer 可以接受 ExperienceStore 实例""" from agentkit.evolution.experience_store import InMemoryExperienceStore store = InMemoryExperienceStore() optimizer = PathOptimizer(experience_store=store, min_sample_count=3) path = _make_path(success_rate=0.8, sample_count=5) result = await optimizer.evaluate_and_update("code_review", path) assert result.updated is True async def test_without_experience_store(self, optimizer): """PathOptimizer 可以不依赖 ExperienceStore 独立运行""" path = _make_path(success_rate=0.8, sample_count=5) result = await optimizer.evaluate_and_update("code_review", path) assert result.updated is True # ── 边界条件测试 ────────────────────────────────────────── class TestEdgeCases: async def test_same_path_twice(self, optimizer): """提交相同路径两次""" path = _make_path(success_rate=0.8, sample_count=5) result1 = await optimizer.evaluate_and_update("code_review", path) assert result1.updated is True # 第二次提交相同参数的路径(但不同实例) path2 = _make_path(success_rate=0.8, sample_count=5) result2 = await optimizer.evaluate_and_update("code_review", path2) # 成功率相同,耗时相同 → 无明显优势 assert result2.updated is False async def test_success_rate_at_boundary(self, optimizer): """成功率刚好在阈值边界""" old_path = _make_path(success_rate=0.8, sample_count=5) await optimizer.evaluate_and_update("code_review", old_path) # 提升恰好等于阈值 0.05,不满足 > threshold new_path = _make_path(success_rate=0.85, sample_count=5) result = await optimizer.evaluate_and_update("code_review", new_path) assert result.updated is False async def test_duration_improvement_at_boundary(self, optimizer): """耗时改善刚好在阈值边界""" old_path = _make_path(total_duration=100.0, success_rate=0.8, sample_count=5) await optimizer.evaluate_and_update("code_review", old_path) # 改善恰好等于阈值 20%,不满足 > threshold new_path = _make_path(total_duration=80.0, success_rate=0.82, sample_count=5) result = await optimizer.evaluate_and_update("code_review", new_path) assert result.updated is False async def test_zero_sample_count(self, optimizer): """样本量为 0""" path = _make_path(sample_count=0, success_rate=0.9) result = await optimizer.evaluate_and_update("code_review", path) assert result.updated is False assert "样本量不足" in result.reason async def test_path_task_type_override(self, optimizer): """evaluate_and_update 会用传入的 task_type 覆盖路径的 task_type""" path = _make_path(task_type="wrong_type", success_rate=0.8, sample_count=5) result = await optimizer.evaluate_and_update("code_review", path) assert result.updated is True assert path.task_type == "code_review" recommended = optimizer.get_recommended_path("code_review") assert recommended is not None