fischer-agentkit/tests/unit/evolution/test_path_optimizer.py

"""Tests for PathOptimizer - 执行路径优化器"""

from __future__ import annotations

from datetime import datetime, timezone

import pytest

from agentkit.evolution.path_optimizer import ExecutionPath, PathOptimizer, PathUpdateResult


# ── Fixtures ──────────────────────────────────────────────


@pytest.fixture
def optimizer():
    """默认 PathOptimizer 实例"""
    return PathOptimizer(min_sample_count=3, success_rate_threshold=0.05, duration_improvement_threshold=0.2)


@pytest.fixture
def optimizer_custom_thresholds():
    """自定义阈值的 PathOptimizer"""
    return PathOptimizer(
        min_sample_count=5,
        success_rate_threshold=0.1,
        duration_improvement_threshold=0.3,
    )


def _make_path(
    task_type: str = "code_review",
    steps: list[str] | None = None,
    total_duration: float = 10.0,
    success_rate: float = 0.8,
    sample_count: int = 5,
    is_recommended: bool = False,
    path_id: str = "",
    created_at: datetime | None = None,
) -> ExecutionPath:
    """创建测试用 ExecutionPath"""
    return ExecutionPath(
        path_id=path_id,
        task_type=task_type,
        steps=steps or ["step1", "step2", "step3"],
        total_duration=total_duration,
        success_rate=success_rate,
        sample_count=sample_count,
        is_recommended=is_recommended,
        created_at=created_at or datetime.now(timezone.utc),
    )


# ── ExecutionPath 数据模型测试 ────────────────────────────


class TestExecutionPath:
    def test_default_values(self):
        path = ExecutionPath()
        assert path.path_id == ""
        assert path.task_type == ""
        assert path.steps == []
        assert path.total_duration == 0.0
        assert path.success_rate == 0.0
        assert path.sample_count == 0
        assert path.is_recommended is False
        assert isinstance(path.created_at, datetime)

    def test_custom_values(self):
        now = datetime.now(timezone.utc)
        path = ExecutionPath(
            path_id="p1",
            task_type="code_review",
            steps=["analyze", "review", "report"],
            total_duration=15.5,
            success_rate=0.9,
            sample_count=10,
            is_recommended=True,
            created_at=now,
        )
        assert path.path_id == "p1"
        assert path.task_type == "code_review"
        assert path.steps == ["analyze", "review", "report"]
        assert path.total_duration == 15.5
        assert path.success_rate == 0.9
        assert path.sample_count == 10
        assert path.is_recommended is True
        assert path.created_at == now


# ── PathUpdateResult 数据模型测试 ─────────────────────────


class TestPathUpdateResult:
    def test_default_values(self):
        result = PathUpdateResult()
        assert result.updated is False
        assert result.old_path is None
        assert result.new_path is None
        assert result.reason == ""

    def test_updated_result(self):
        old = _make_path(success_rate=0.7)
        new = _make_path(success_rate=0.9)
        result = PathUpdateResult(
            updated=True,
            old_path=old,
            new_path=new,
            reason="成功率显著提升",
        )
        assert result.updated is True
        assert result.old_path.success_rate == 0.7
        assert result.new_path.success_rate == 0.9
        assert "成功率" in result.reason


# ── get_recommended_path 测试 ─────────────────────────────


class TestGetRecommendedPath:
    async def test_no_recommended_path(self, optimizer):
        result = optimizer.get_recommended_path("code_review")
        assert result is None

    async def test_returns_recommended_path(self, optimizer):
        path = _make_path(task_type="code_review", success_rate=0.8, sample_count=5)
        await optimizer.evaluate_and_update("code_review", path)
        result = optimizer.get_recommended_path("code_review")
        assert result is not None
        assert result.success_rate == 0.8
        assert result.is_recommended is True

    async def test_different_task_types_independent(self, optimizer):
        path_a = _make_path(task_type="code_review", success_rate=0.8, sample_count=5)
        path_b = _make_path(task_type="data_analysis", success_rate=0.9, sample_count=5)
        await optimizer.evaluate_and_update("code_review", path_a)
        await optimizer.evaluate_and_update("data_analysis", path_b)

        result_a = optimizer.get_recommended_path("code_review")
        result_b = optimizer.get_recommended_path("data_analysis")
        assert result_a is not None
        assert result_b is not None
        assert result_a.success_rate == 0.8
        assert result_b.success_rate == 0.9


# ── 样本量不足测试 ────────────────────────────────────────


class TestInsufficientSamples:
    async def test_insufficient_samples_no_update(self, optimizer):
        """样本量不足 → 不更新，记录待观察"""
        path = _make_path(sample_count=2, success_rate=0.9)
        result = await optimizer.evaluate_and_update("code_review", path)
        assert result.updated is False
        assert "样本量不足" in result.reason
        assert optimizer.get_recommended_path("code_review") is None

    async def test_insufficient_samples_recorded_as_pending(self, optimizer):
        """样本量不足的路径被记录到待观察列表"""
        path = _make_path(sample_count=2, success_rate=0.9)
        await optimizer.evaluate_and_update("code_review", path)
        pending = optimizer.get_pending_paths("code_review")
        assert len(pending) == 1
        assert pending[0].success_rate == 0.9

    async def test_exact_min_samples_updates(self, optimizer):
        """刚好达到最小样本量 → 可以更新"""
        path = _make_path(sample_count=3, success_rate=0.8)
        result = await optimizer.evaluate_and_update("code_review", path)
        assert result.updated is True
        assert result.reason == "无现有推荐路径，直接设为推荐"

    async def test_custom_min_sample_count(self, optimizer_custom_thresholds):
        """自定义最小样本量"""
        path = _make_path(sample_count=4, success_rate=0.9)
        result = await optimizer_custom_thresholds.evaluate_and_update("code_review", path)
        assert result.updated is False
        assert "样本量不足" in result.reason


# ── 首次设置推荐路径测试 ──────────────────────────────────


class TestFirstRecommendation:
    async def test_first_path_becomes_recommended(self, optimizer):
        """无现有推荐路径时，新路径直接设为推荐"""
        path = _make_path(success_rate=0.7, sample_count=5)
        result = await optimizer.evaluate_and_update("code_review", path)
        assert result.updated is True
        assert result.old_path is None
        assert result.new_path is not None
        assert result.new_path.is_recommended is True
        assert "无现有推荐路径" in result.reason

    async def test_auto_generates_path_id(self, optimizer):
        """未提供 path_id 时自动生成"""
        path = _make_path(path_id="", sample_count=5)
        result = await optimizer.evaluate_and_update("code_review", path)
        assert result.updated is True
        assert result.new_path is not None
        assert len(result.new_path.path_id) > 0


# ── 成功率显著提升测试 ────────────────────────────────────


class TestSuccessRateImprovement:
    async def test_higher_success_rate_updates(self, optimizer):
        """新路径成功率更高 → 更新推荐路径"""
        old_path = _make_path(success_rate=0.7, sample_count=5)
        await optimizer.evaluate_and_update("code_review", old_path)

        new_path = _make_path(success_rate=0.85, sample_count=5)
        result = await optimizer.evaluate_and_update("code_review", new_path)
        assert result.updated is True
        assert result.old_path.success_rate == 0.7
        assert result.new_path.success_rate == 0.85
        assert "成功率显著提升" in result.reason

    async def test_marginal_success_rate_no_update(self, optimizer):
        """成功率提升不足阈值 → 不更新"""
        old_path = _make_path(success_rate=0.8, sample_count=5)
        await optimizer.evaluate_and_update("code_review", old_path)

        # 提升仅 0.03，低于默认阈值 0.05
        new_path = _make_path(success_rate=0.83, sample_count=5)
        result = await optimizer.evaluate_and_update("code_review", new_path)
        assert result.updated is False
        assert "无明显优势" in result.reason

    async def test_custom_success_rate_threshold(self, optimizer_custom_thresholds):
        """自定义成功率阈值"""
        old_path = _make_path(success_rate=0.7, sample_count=10)
        await optimizer_custom_thresholds.evaluate_and_update("code_review", old_path)

        # 提升 0.08，低于自定义阈值 0.1
        new_path = _make_path(success_rate=0.78, sample_count=10)
        result = await optimizer_custom_thresholds.evaluate_and_update("code_review", new_path)
        assert result.updated is False

    async def test_lower_success_rate_no_update(self, optimizer):
        """新路径成功率更低 → 不更新"""
        old_path = _make_path(success_rate=0.9, sample_count=5)
        await optimizer.evaluate_and_update("code_review", old_path)

        new_path = _make_path(success_rate=0.6, sample_count=5)
        result = await optimizer.evaluate_and_update("code_review", new_path)
        assert result.updated is False


# ── 耗时显著更短测试 ──────────────────────────────────────


class TestDurationImprovement:
    async def test_shorter_duration_with_similar_success_rate_updates(self, optimizer):
        """成功率相近但耗时显著更短 → 更新推荐路径"""
        old_path = _make_path(total_duration=100.0, success_rate=0.8, sample_count=5)
        await optimizer.evaluate_and_update("code_review", old_path)

        # 耗时减少 30%（> 20% 阈值），成功率相近
        new_path = _make_path(total_duration=70.0, success_rate=0.82, sample_count=5)
        result = await optimizer.evaluate_and_update("code_review", new_path)
        assert result.updated is True
        assert "耗时显著更短" in result.reason

    async def test_marginal_duration_improvement_no_update(self, optimizer):
        """耗时改善不足阈值 → 不更新"""
        old_path = _make_path(total_duration=100.0, success_rate=0.8, sample_count=5)
        await optimizer.evaluate_and_update("code_review", old_path)

        # 耗时减少仅 10%（< 20% 阈值）
        new_path = _make_path(total_duration=90.0, success_rate=0.82, sample_count=5)
        result = await optimizer.evaluate_and_update("code_review", new_path)
        assert result.updated is False
        assert "无明显优势" in result.reason

    async def test_longer_duration_no_update(self, optimizer):
        """耗时更长 → 不更新"""
        old_path = _make_path(total_duration=50.0, success_rate=0.8, sample_count=5)
        await optimizer.evaluate_and_update("code_review", old_path)

        new_path = _make_path(total_duration=80.0, success_rate=0.82, sample_count=5)
        result = await optimizer.evaluate_and_update("code_review", new_path)
        assert result.updated is False

    async def test_custom_duration_improvement_threshold(self, optimizer_custom_thresholds):
        """自定义耗时改善阈值"""
        old_path = _make_path(total_duration=100.0, success_rate=0.8, sample_count=10)
        await optimizer_custom_thresholds.evaluate_and_update("code_review", old_path)

        # 耗时减少 25%（< 30% 自定义阈值）
        new_path = _make_path(total_duration=75.0, success_rate=0.82, sample_count=10)
        result = await optimizer_custom_thresholds.evaluate_and_update("code_review", new_path)
        assert result.updated is False

    async def test_zero_duration_current_path(self, optimizer):
        """现有路径耗时为 0 → 不因耗时更新"""
        old_path = _make_path(total_duration=0.0, success_rate=0.8, sample_count=5)
        await optimizer.evaluate_and_update("code_review", old_path)

        new_path = _make_path(total_duration=10.0, success_rate=0.82, sample_count=5)
        result = await optimizer.evaluate_and_update("code_review", new_path)
        assert result.updated is False

    async def test_both_zero_duration(self, optimizer):
        """两者耗时均为 0 → 不因耗时更新"""
        old_path = _make_path(total_duration=0.0, success_rate=0.8, sample_count=5)
        await optimizer.evaluate_and_update("code_review", old_path)

        new_path = _make_path(total_duration=0.0, success_rate=0.82, sample_count=5)
        result = await optimizer.evaluate_and_update("code_review", new_path)
        assert result.updated is False


# ── 保留现有推荐路径测试 ──────────────────────────────────


class TestKeepCurrentPath:
    async def test_no_advantage_keeps_current(self, optimizer):
        """新路径无明显优势 → 保留现有推荐路径"""
        old_path = _make_path(total_duration=50.0, success_rate=0.8, sample_count=5)
        await optimizer.evaluate_and_update("code_review", old_path)

        new_path = _make_path(total_duration=48.0, success_rate=0.79, sample_count=5)
        result = await optimizer.evaluate_and_update("code_review", new_path)
        assert result.updated is False
        assert result.old_path.success_rate == 0.8
        # 推荐路径不变
        recommended = optimizer.get_recommended_path("code_review")
        assert recommended is not None
        assert recommended.success_rate == 0.8

    async def test_is_recommended_flag_preserved(self, optimizer):
        """未更新时，现有路径的 is_recommended 标志保持为 True"""
        old_path = _make_path(success_rate=0.8, sample_count=5)
        await optimizer.evaluate_and_update("code_review", old_path)

        new_path = _make_path(success_rate=0.79, sample_count=5)
        await optimizer.evaluate_and_update("code_review", new_path)

        recommended = optimizer.get_recommended_path("code_review")
        assert recommended is not None
        assert recommended.is_recommended is True


# ── is_recommended 标志管理测试 ────────────────────────────


class TestIsRecommendedFlag:
    async def test_old_path_loses_recommended_flag(self, optimizer):
        """更新后旧路径的 is_recommended 变为 False"""
        old_path = _make_path(success_rate=0.7, sample_count=5)
        await optimizer.evaluate_and_update("code_review", old_path)
        assert old_path.is_recommended is True  # 首次设置，is_recommended 为 True

        new_path = _make_path(success_rate=0.9, sample_count=5)
        result = await optimizer.evaluate_and_update("code_review", new_path)
        assert result.updated is True
        assert result.old_path.is_recommended is False  # 更新后旧路径失去标志
        assert result.new_path.is_recommended is True


# ── 多次迭代优化测试 ──────────────────────────────────────


class TestIterativeOptimization:
    async def test_multiple_updates_converge_to_best(self, optimizer):
        """多次迭代后推荐路径收敛到最优"""
        # 第一次：初始路径
        path1 = _make_path(success_rate=0.6, total_duration=100.0, sample_count=5)
        await optimizer.evaluate_and_update("code_review", path1)
        assert optimizer.get_recommended_path("code_review").success_rate == 0.6

        # 第二次：成功率显著提升
        path2 = _make_path(success_rate=0.8, total_duration=90.0, sample_count=5)
        await optimizer.evaluate_and_update("code_review", path2)
        assert optimizer.get_recommended_path("code_review").success_rate == 0.8

        # 第三次：成功率相近但耗时更短
        path3 = _make_path(success_rate=0.82, total_duration=50.0, sample_count=5)
        await optimizer.evaluate_and_update("code_review", path3)
        assert optimizer.get_recommended_path("code_review").total_duration == 50.0

        # 第四次：无明显优势
        path4 = _make_path(success_rate=0.81, total_duration=48.0, sample_count=5)
        result = await optimizer.evaluate_and_update("code_review", path4)
        assert result.updated is False
        assert optimizer.get_recommended_path("code_review").total_duration == 50.0

    async def test_different_task_types_evolve_independently(self, optimizer):
        """不同任务类型的推荐路径独立进化"""
        path_a1 = _make_path(task_type="code_review", success_rate=0.7, sample_count=5)
        path_b1 = _make_path(task_type="data_analysis", success_rate=0.6, sample_count=5)
        await optimizer.evaluate_and_update("code_review", path_a1)
        await optimizer.evaluate_and_update("data_analysis", path_b1)

        path_a2 = _make_path(task_type="code_review", success_rate=0.9, sample_count=5)
        await optimizer.evaluate_and_update("code_review", path_a2)

        # code_review 更新了，data_analysis 不受影响
        assert optimizer.get_recommended_path("code_review").success_rate == 0.9
        assert optimizer.get_recommended_path("data_analysis").success_rate == 0.6


# ── 待观察路径管理测试 ────────────────────────────────────


class TestPendingPaths:
    async def test_pending_paths_empty_initially(self, optimizer):
        assert optimizer.get_pending_paths("code_review") == []

    async def test_pending_paths_accumulate(self, optimizer):
        """多次样本不足的路径会累积"""
        path1 = _make_path(sample_count=1, success_rate=0.9)
        path2 = _make_path(sample_count=2, success_rate=0.85)
        await optimizer.evaluate_and_update("code_review", path1)
        await optimizer.evaluate_and_update("code_review", path2)

        pending = optimizer.get_pending_paths("code_review")
        assert len(pending) == 2

    async def test_pending_paths_isolated_by_task_type(self, optimizer):
        """不同任务类型的待观察路径相互隔离"""
        path_a = _make_path(task_type="code_review", sample_count=1, success_rate=0.9)
        path_b = _make_path(task_type="data_analysis", sample_count=1, success_rate=0.8)
        await optimizer.evaluate_and_update("code_review", path_a)
        await optimizer.evaluate_and_update("data_analysis", path_b)

        assert len(optimizer.get_pending_paths("code_review")) == 1
        assert len(optimizer.get_pending_paths("data_analysis")) == 1

    async def test_sufficient_samples_not_pending(self, optimizer):
        """样本量充足的路径不会进入待观察列表"""
        path = _make_path(sample_count=5, success_rate=0.8)
        await optimizer.evaluate_and_update("code_review", path)
        assert optimizer.get_pending_paths("code_review") == []


# ── ExperienceStore 集成测试 ──────────────────────────────


class TestExperienceStoreIntegration:
    async def test_with_experience_store(self):
        """PathOptimizer 可以接受 ExperienceStore 实例"""
        from agentkit.evolution.experience_store import InMemoryExperienceStore

        store = InMemoryExperienceStore()
        optimizer = PathOptimizer(experience_store=store, min_sample_count=3)

        path = _make_path(success_rate=0.8, sample_count=5)
        result = await optimizer.evaluate_and_update("code_review", path)
        assert result.updated is True

    async def test_without_experience_store(self, optimizer):
        """PathOptimizer 可以不依赖 ExperienceStore 独立运行"""
        path = _make_path(success_rate=0.8, sample_count=5)
        result = await optimizer.evaluate_and_update("code_review", path)
        assert result.updated is True


# ── 边界条件测试 ──────────────────────────────────────────


class TestEdgeCases:
    async def test_same_path_twice(self, optimizer):
        """提交相同路径两次"""
        path = _make_path(success_rate=0.8, sample_count=5)
        result1 = await optimizer.evaluate_and_update("code_review", path)
        assert result1.updated is True

        # 第二次提交相同参数的路径（但不同实例）
        path2 = _make_path(success_rate=0.8, sample_count=5)
        result2 = await optimizer.evaluate_and_update("code_review", path2)
        # 成功率相同，耗时相同 → 无明显优势
        assert result2.updated is False

    async def test_success_rate_at_boundary(self, optimizer):
        """成功率刚好在阈值边界"""
        old_path = _make_path(success_rate=0.8, sample_count=5)
        await optimizer.evaluate_and_update("code_review", old_path)

        # 提升恰好等于阈值 0.05，不满足 > threshold
        new_path = _make_path(success_rate=0.85, sample_count=5)
        result = await optimizer.evaluate_and_update("code_review", new_path)
        assert result.updated is False

    async def test_duration_improvement_at_boundary(self, optimizer):
        """耗时改善刚好在阈值边界"""
        old_path = _make_path(total_duration=100.0, success_rate=0.8, sample_count=5)
        await optimizer.evaluate_and_update("code_review", old_path)

        # 改善恰好等于阈值 20%，不满足 > threshold
        new_path = _make_path(total_duration=80.0, success_rate=0.82, sample_count=5)
        result = await optimizer.evaluate_and_update("code_review", new_path)
        assert result.updated is False

    async def test_zero_sample_count(self, optimizer):
        """样本量为 0"""
        path = _make_path(sample_count=0, success_rate=0.9)
        result = await optimizer.evaluate_and_update("code_review", path)
        assert result.updated is False
        assert "样本量不足" in result.reason

    async def test_path_task_type_override(self, optimizer):
        """evaluate_and_update 会用传入的 task_type 覆盖路径的 task_type"""
        path = _make_path(task_type="wrong_type", success_rate=0.8, sample_count=5)
        result = await optimizer.evaluate_and_update("code_review", path)
        assert result.updated is True
        assert path.task_type == "code_review"
        recommended = optimizer.get_recommended_path("code_review")
        assert recommended is not None