118 lines
3.8 KiB
Python
118 lines
3.8 KiB
Python
"""StrategyTuner - 策略调优
|
||
|
||
自动调整 Agent 参数(temperature, tool 选择权重, Pipeline 路径)。
|
||
使用简化的 Bayesian-inspired 优化替代随机扰动。
|
||
"""
|
||
|
||
import logging
|
||
import math
|
||
import random
|
||
from dataclasses import dataclass, field
|
||
from typing import Any
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
@dataclass
|
||
class StrategyConfig:
|
||
"""策略配置"""
|
||
temperature: float = 0.5
|
||
tool_weights: dict[str, float] = field(default_factory=dict)
|
||
max_iterations: int = 5
|
||
timeout_seconds: int = 300
|
||
|
||
|
||
class StrategyTuner:
|
||
"""策略调优器
|
||
|
||
基于历史效果数据自动调整 Agent 参数。
|
||
使用简化的 Bayesian-inspired 1D 优化:对每个参数,
|
||
找到历史最优值并添加小高斯噪声。
|
||
"""
|
||
|
||
def __init__(self, param_ranges: dict[str, tuple[float, float]] | None = None):
|
||
self._param_ranges = param_ranges or {
|
||
"temperature": (0.0, 1.0),
|
||
"max_iterations": (1, 10),
|
||
}
|
||
self._history: list[dict[str, Any]] = []
|
||
|
||
def record(self, config: StrategyConfig, metric: float) -> None:
|
||
"""记录配置和对应的效果指标"""
|
||
self._history.append({
|
||
"config": config,
|
||
"metric": metric,
|
||
})
|
||
|
||
async def suggest(self, current: StrategyConfig) -> StrategyConfig:
|
||
"""基于历史数据建议新的策略配置
|
||
|
||
使用简化的 Bayesian-inspired 优化:
|
||
1. 对每个参数,在历史中找到得分最高的配置对应的参数值
|
||
2. 在该最优值附近添加小高斯噪声进行探索
|
||
"""
|
||
if len(self._history) < 3:
|
||
logger.info("Not enough history for strategy tuning")
|
||
return current
|
||
|
||
# Find best config in history
|
||
best = max(self._history, key=lambda x: x["metric"])
|
||
best_config = best["config"]
|
||
|
||
# For each parameter, find the best value and add Gaussian noise
|
||
suggested_temperature = self._optimize_param_1d(
|
||
param_name="temperature",
|
||
get_value=lambda c: c.temperature,
|
||
best_value=best_config.temperature,
|
||
noise_std=0.05,
|
||
)
|
||
|
||
suggested_max_iterations = int(self._optimize_param_1d(
|
||
param_name="max_iterations",
|
||
get_value=lambda c: c.max_iterations,
|
||
best_value=best_config.max_iterations,
|
||
noise_std=0.5,
|
||
))
|
||
|
||
suggested = StrategyConfig(
|
||
temperature=suggested_temperature,
|
||
tool_weights=dict(best_config.tool_weights),
|
||
max_iterations=suggested_max_iterations,
|
||
timeout_seconds=current.timeout_seconds,
|
||
)
|
||
|
||
logger.info(
|
||
f"Strategy suggestion: temperature {current.temperature:.2f} -> {suggested.temperature:.2f}, "
|
||
f"max_iterations {current.max_iterations} -> {suggested.max_iterations}"
|
||
)
|
||
|
||
return suggested
|
||
|
||
def _optimize_param_1d(
|
||
self,
|
||
param_name: str,
|
||
get_value: Any,
|
||
best_value: float,
|
||
noise_std: float,
|
||
) -> float:
|
||
"""简化的 1D Bayesian-inspired 优化
|
||
|
||
在历史最优值附近添加高斯噪声进行探索。
|
||
噪声标准差随历史数据量递减(探索-利用平衡)。
|
||
"""
|
||
# Decay noise as we accumulate more data (exploit more, explore less)
|
||
decay_factor = 1.0 / (1.0 + len(self._history) / 10.0)
|
||
effective_noise = noise_std * decay_factor
|
||
|
||
# Add Gaussian noise around the best value
|
||
perturbation = random.gauss(0, effective_noise)
|
||
new_value = best_value + perturbation
|
||
|
||
# Clamp to valid range
|
||
min_val, max_val = self._param_ranges.get(param_name, (0.0, 1.0))
|
||
return max(min_val, min(max_val, new_value))
|
||
|
||
@staticmethod
|
||
def _clamp(value: float, min_val: float, max_val: float) -> float:
|
||
return max(min_val, min(max_val, value))
|