fischer-agentkit/src/agentkit/evolution/strategy_tuner.py

"""StrategyTuner - 策略调优

自动调整 Agent 参数（temperature, tool 选择权重, Pipeline 路径）。
使用简化的 Bayesian-inspired 优化替代随机扰动。
"""

import logging
import math
import random
from dataclasses import dataclass, field
from typing import Any

logger = logging.getLogger(__name__)


@dataclass
class StrategyConfig:
    """策略配置"""
    temperature: float = 0.5
    tool_weights: dict[str, float] = field(default_factory=dict)
    max_iterations: int = 5
    timeout_seconds: int = 300


class StrategyTuner:
    """策略调优器

    基于历史效果数据自动调整 Agent 参数。
    使用简化的 Bayesian-inspired 1D 优化：对每个参数，
    找到历史最优值并添加小高斯噪声。
    """

    def __init__(self, param_ranges: dict[str, tuple[float, float]] | None = None):
        self._param_ranges = param_ranges or {
            "temperature": (0.0, 1.0),
            "max_iterations": (1, 10),
        }
        self._history: list[dict[str, Any]] = []

    def record(self, config: StrategyConfig, metric: float) -> None:
        """记录配置和对应的效果指标"""
        self._history.append({
            "config": config,
            "metric": metric,
        })

    async def suggest(self, current: StrategyConfig) -> StrategyConfig:
        """基于历史数据建议新的策略配置

        使用简化的 Bayesian-inspired 优化：
        1. 对每个参数，在历史中找到得分最高的配置对应的参数值
        2. 在该最优值附近添加小高斯噪声进行探索
        """
        if len(self._history) < 3:
            logger.info("Not enough history for strategy tuning")
            return current

        # Find best config in history
        best = max(self._history, key=lambda x: x["metric"])
        best_config = best["config"]

        # For each parameter, find the best value and add Gaussian noise
        suggested_temperature = self._optimize_param_1d(
            param_name="temperature",
            get_value=lambda c: c.temperature,
            best_value=best_config.temperature,
            noise_std=0.05,
        )

        suggested_max_iterations = int(self._optimize_param_1d(
            param_name="max_iterations",
            get_value=lambda c: c.max_iterations,
            best_value=best_config.max_iterations,
            noise_std=0.5,
        ))

        suggested = StrategyConfig(
            temperature=suggested_temperature,
            tool_weights=dict(best_config.tool_weights),
            max_iterations=suggested_max_iterations,
            timeout_seconds=current.timeout_seconds,
        )

        logger.info(
            f"Strategy suggestion: temperature {current.temperature:.2f} -> {suggested.temperature:.2f}, "
            f"max_iterations {current.max_iterations} -> {suggested.max_iterations}"
        )

        return suggested

    def _optimize_param_1d(
        self,
        param_name: str,
        get_value: Any,
        best_value: float,
        noise_std: float,
    ) -> float:
        """简化的 1D Bayesian-inspired 优化

        在历史最优值附近添加高斯噪声进行探索。
        噪声标准差随历史数据量递减（探索-利用平衡）。
        """
        # Decay noise as we accumulate more data (exploit more, explore less)
        decay_factor = 1.0 / (1.0 + len(self._history) / 10.0)
        effective_noise = noise_std * decay_factor

        # Add Gaussian noise around the best value
        perturbation = random.gauss(0, effective_noise)
        new_value = best_value + perturbation

        # Clamp to valid range
        min_val, max_val = self._param_ranges.get(param_name, (0.0, 1.0))
        return max(min_val, min(max_val, new_value))

    @staticmethod
    def _clamp(value: float, min_val: float, max_val: float) -> float:
        return max(min_val, min(max_val, value))