fischer-agentkit/src/agentkit/evolution/strategy_tuner.py

118 lines
3.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""StrategyTuner - 策略调优
自动调整 Agent 参数temperature, tool 选择权重, Pipeline 路径)。
使用简化的 Bayesian-inspired 优化替代随机扰动。
"""
import logging
import math
import random
from dataclasses import dataclass, field
from typing import Any
logger = logging.getLogger(__name__)
@dataclass
class StrategyConfig:
"""策略配置"""
temperature: float = 0.5
tool_weights: dict[str, float] = field(default_factory=dict)
max_iterations: int = 5
timeout_seconds: int = 300
class StrategyTuner:
"""策略调优器
基于历史效果数据自动调整 Agent 参数。
使用简化的 Bayesian-inspired 1D 优化:对每个参数,
找到历史最优值并添加小高斯噪声。
"""
def __init__(self, param_ranges: dict[str, tuple[float, float]] | None = None):
self._param_ranges = param_ranges or {
"temperature": (0.0, 1.0),
"max_iterations": (1, 10),
}
self._history: list[dict[str, Any]] = []
def record(self, config: StrategyConfig, metric: float) -> None:
"""记录配置和对应的效果指标"""
self._history.append({
"config": config,
"metric": metric,
})
async def suggest(self, current: StrategyConfig) -> StrategyConfig:
"""基于历史数据建议新的策略配置
使用简化的 Bayesian-inspired 优化:
1. 对每个参数,在历史中找到得分最高的配置对应的参数值
2. 在该最优值附近添加小高斯噪声进行探索
"""
if len(self._history) < 3:
logger.info("Not enough history for strategy tuning")
return current
# Find best config in history
best = max(self._history, key=lambda x: x["metric"])
best_config = best["config"]
# For each parameter, find the best value and add Gaussian noise
suggested_temperature = self._optimize_param_1d(
param_name="temperature",
get_value=lambda c: c.temperature,
best_value=best_config.temperature,
noise_std=0.05,
)
suggested_max_iterations = int(self._optimize_param_1d(
param_name="max_iterations",
get_value=lambda c: c.max_iterations,
best_value=best_config.max_iterations,
noise_std=0.5,
))
suggested = StrategyConfig(
temperature=suggested_temperature,
tool_weights=dict(best_config.tool_weights),
max_iterations=suggested_max_iterations,
timeout_seconds=current.timeout_seconds,
)
logger.info(
f"Strategy suggestion: temperature {current.temperature:.2f} -> {suggested.temperature:.2f}, "
f"max_iterations {current.max_iterations} -> {suggested.max_iterations}"
)
return suggested
def _optimize_param_1d(
self,
param_name: str,
get_value: Any,
best_value: float,
noise_std: float,
) -> float:
"""简化的 1D Bayesian-inspired 优化
在历史最优值附近添加高斯噪声进行探索。
噪声标准差随历史数据量递减(探索-利用平衡)。
"""
# Decay noise as we accumulate more data (exploit more, explore less)
decay_factor = 1.0 / (1.0 + len(self._history) / 10.0)
effective_noise = noise_std * decay_factor
# Add Gaussian noise around the best value
perturbation = random.gauss(0, effective_noise)
new_value = best_value + perturbation
# Clamp to valid range
min_val, max_val = self._param_ranges.get(param_name, (0.0, 1.0))
return max(min_val, min(max_val, new_value))
@staticmethod
def _clamp(value: float, min_val: float, max_val: float) -> float:
return max(min_val, min(max_val, value))