348 lines
11 KiB
Python
348 lines
11 KiB
Python
"""Tests for EvolutionMixin - 进化引擎与 Agent 生命周期集成"""
|
||
|
||
import pytest
|
||
|
||
from agentkit.core.protocol import TaskMessage, TaskResult, TaskStatus
|
||
from agentkit.evolution.ab_tester import ABTestConfig, ABTestResult, ABTester
|
||
from agentkit.evolution.evolution_store import EvolutionStore
|
||
from agentkit.evolution.lifecycle import EvolutionLogEntry, EvolutionMixin
|
||
from agentkit.evolution.prompt_optimizer import Module, PromptOptimizer, Signature
|
||
from agentkit.evolution.reflector import Reflection, Reflector
|
||
from agentkit.evolution.strategy_tuner import StrategyConfig, StrategyTuner
|
||
from datetime import datetime, timezone
|
||
|
||
|
||
def _make_task() -> TaskMessage:
|
||
return TaskMessage(
|
||
task_id="test-001",
|
||
agent_name="evolving_agent",
|
||
task_type="echo",
|
||
priority=0,
|
||
input_data={"query": "hello"},
|
||
callback_url=None,
|
||
created_at=datetime.now(timezone.utc),
|
||
)
|
||
|
||
|
||
def _make_result(status: str = TaskStatus.COMPLETED) -> TaskResult:
|
||
return TaskResult(
|
||
task_id="test-001",
|
||
agent_name="evolving_agent",
|
||
status=status,
|
||
output_data={"key": "value"},
|
||
error_message=None,
|
||
started_at=datetime.now(timezone.utc),
|
||
completed_at=datetime.now(timezone.utc),
|
||
metrics={"elapsed_seconds": 5.0},
|
||
)
|
||
|
||
|
||
def _make_module() -> Module:
|
||
return Module(
|
||
name="test_module",
|
||
signature=Signature(
|
||
input_fields={"query": "search query"},
|
||
output_fields={"result": "search result"},
|
||
instruction="Find the best result.",
|
||
),
|
||
)
|
||
|
||
|
||
# ── EvolutionMixin 与 Agent on_task_complete 集成 ──────────────
|
||
|
||
|
||
class EvolvingAgent(EvolutionMixin):
|
||
"""模拟集成了 EvolutionMixin 的 Agent"""
|
||
|
||
def __init__(self, reflector=None, prompt_optimizer=None, ab_tester=None, evolution_store=None):
|
||
super().__init__(
|
||
reflector=reflector,
|
||
prompt_optimizer=prompt_optimizer,
|
||
ab_tester=ab_tester,
|
||
evolution_store=evolution_store,
|
||
)
|
||
self.name = "evolving_agent"
|
||
self.evolve_called = False
|
||
|
||
async def on_task_complete(self, task: TaskMessage, output: dict) -> None:
|
||
"""任务完成后触发进化"""
|
||
result = _make_result()
|
||
await self.evolve_after_task(task, result)
|
||
self.evolve_called = True
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_mixin_integrates_with_on_task_complete():
|
||
"""EvolutionMixin 与 Agent 的 on_task_complete 集成"""
|
||
reflector = Reflector()
|
||
agent = EvolvingAgent(reflector=reflector)
|
||
agent.set_current_module(_make_module())
|
||
|
||
task = _make_task()
|
||
await agent.on_task_complete(task, {"key": "value"})
|
||
|
||
assert agent.evolve_called is True
|
||
history = agent.get_evolution_history()
|
||
assert len(history) == 1
|
||
assert history[0]["task_id"] == "test-001"
|
||
|
||
|
||
# ── Reflector 生成反思 ──────────────────────────────────────
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_reflector_generates_reflection_after_task():
|
||
"""Reflector 在任务完成后生成反思"""
|
||
reflector = Reflector()
|
||
mixin = EvolutionMixin(reflector=reflector)
|
||
mixin.set_current_module(_make_module())
|
||
|
||
task = _make_task()
|
||
result = _make_result()
|
||
entry = await mixin.evolve_after_task(task, result)
|
||
|
||
assert entry.reflection is not None
|
||
assert entry.reflection.outcome == "success"
|
||
assert entry.reflection.quality_score > 0
|
||
|
||
|
||
# ── Prompt 优化在有改进建议时触发 ──────────────────────────────
|
||
|
||
|
||
class LowQualityReflector(Reflector):
|
||
"""总是产生低质量结果和改进建议的 Reflector"""
|
||
|
||
async def reflect(self, task, result):
|
||
return Reflection(
|
||
task_id=task.task_id,
|
||
agent_name=result.agent_name,
|
||
outcome="failure",
|
||
quality_score=0.2,
|
||
patterns=["slow_execution"],
|
||
insights=["Low quality score indicates potential issues"],
|
||
suggestions=["Consider prompt optimization for this task type"],
|
||
)
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_prompt_optimization_triggered_when_reflection_suggests_improvement():
|
||
"""当反思建议改进时,触发 Prompt 优化"""
|
||
reflector = LowQualityReflector()
|
||
optimizer = PromptOptimizer(max_demos=3, min_examples_for_optimization=1)
|
||
|
||
# 预填充足够的成功样本以触发优化
|
||
for i in range(3):
|
||
optimizer.add_example(
|
||
input_data={"query": f"q_{i}"},
|
||
output_data={"result": f"r_{i}"},
|
||
quality_score=0.9,
|
||
)
|
||
|
||
mixin = EvolutionMixin(reflector=reflector, prompt_optimizer=optimizer)
|
||
module = _make_module()
|
||
mixin.set_current_module(module)
|
||
|
||
task = _make_task()
|
||
result = _make_result()
|
||
entry = await mixin.evolve_after_task(task, result)
|
||
|
||
assert entry.reflection is not None
|
||
assert len(entry.reflection.suggestions) > 0
|
||
assert entry.optimized_module is not None
|
||
assert entry.optimized_module.name == "test_module_optimized"
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_no_optimization_when_no_suggestions():
|
||
"""当反思没有改进建议时,不触发优化"""
|
||
# 默认 Reflector 对成功任务不会产生建议
|
||
reflector = Reflector()
|
||
mixin = EvolutionMixin(reflector=reflector, prompt_optimizer=PromptOptimizer())
|
||
mixin.set_current_module(_make_module())
|
||
|
||
task = _make_task()
|
||
result = _make_result()
|
||
entry = await mixin.evolve_after_task(task, result)
|
||
|
||
assert entry.reflection is not None
|
||
assert entry.optimized_module is None
|
||
|
||
|
||
# ── AB 测试验证 ──────────────────────────────────────────────
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_ab_test_validation_before_applying():
|
||
"""AB 测试在应用变更前进行验证"""
|
||
reflector = LowQualityReflector()
|
||
optimizer = PromptOptimizer(max_demos=3, min_examples_for_optimization=1)
|
||
for i in range(3):
|
||
optimizer.add_example(
|
||
input_data={"query": f"q_{i}"},
|
||
output_data={"result": f"r_{i}"},
|
||
quality_score=0.9,
|
||
)
|
||
|
||
ab_tester = ABTester()
|
||
mixin = EvolutionMixin(
|
||
reflector=reflector,
|
||
prompt_optimizer=optimizer,
|
||
ab_tester=ab_tester,
|
||
)
|
||
mixin.set_current_module(_make_module())
|
||
|
||
task = _make_task()
|
||
result = _make_result()
|
||
entry = await mixin.evolve_after_task(task, result)
|
||
|
||
assert entry.ab_test_result is not None
|
||
assert entry.ab_test_result.test_id.startswith("evolve_")
|
||
|
||
|
||
# ── AB 测试失败时回滚 ──────────────────────────────────────
|
||
|
||
|
||
class FailingABTester(ABTester):
|
||
"""总是让对照组获胜的 AB 测试器"""
|
||
|
||
async def evaluate(self, test_id: str) -> ABTestResult | None:
|
||
return ABTestResult(
|
||
test_id=test_id,
|
||
control_metric=0.8,
|
||
experiment_metric=0.5,
|
||
control_samples=30,
|
||
experiment_samples=30,
|
||
is_significant=True,
|
||
winner="control",
|
||
p_value=0.01,
|
||
)
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_rollback_when_ab_test_shows_degradation():
|
||
"""AB 测试显示退化时执行回滚"""
|
||
reflector = LowQualityReflector()
|
||
optimizer = PromptOptimizer(max_demos=3, min_examples_for_optimization=1)
|
||
for i in range(3):
|
||
optimizer.add_example(
|
||
input_data={"query": f"q_{i}"},
|
||
output_data={"result": f"r_{i}"},
|
||
quality_score=0.9,
|
||
)
|
||
|
||
ab_tester = FailingABTester()
|
||
mixin = EvolutionMixin(
|
||
reflector=reflector,
|
||
prompt_optimizer=optimizer,
|
||
ab_tester=ab_tester,
|
||
)
|
||
original_module = _make_module()
|
||
mixin.set_current_module(original_module)
|
||
|
||
task = _make_task()
|
||
result = _make_result()
|
||
entry = await mixin.evolve_after_task(task, result)
|
||
|
||
assert entry.rolled_back is True
|
||
assert entry.applied is False
|
||
# 模块不应被更新
|
||
assert mixin._current_module.name == "test_module"
|
||
|
||
|
||
# ── 进化历史记录 ──────────────────────────────────────────────
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_evolution_history_is_recorded():
|
||
"""进化历史被正确记录"""
|
||
reflector = Reflector()
|
||
mixin = EvolutionMixin(reflector=reflector)
|
||
mixin.set_current_module(_make_module())
|
||
|
||
task = _make_task()
|
||
result = _make_result()
|
||
await mixin.evolve_after_task(task, result)
|
||
|
||
history = mixin.get_evolution_history()
|
||
assert len(history) == 1
|
||
assert history[0]["task_id"] == "test-001"
|
||
assert "reflection" in history[0]
|
||
assert history[0]["reflection"]["outcome"] == "success"
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_evolution_history_multiple_entries():
|
||
"""多次进化产生多条历史记录"""
|
||
reflector = Reflector()
|
||
mixin = EvolutionMixin(reflector=reflector)
|
||
mixin.set_current_module(_make_module())
|
||
|
||
for i in range(3):
|
||
task = TaskMessage(
|
||
task_id=f"test-{i:03d}",
|
||
agent_name="evolving_agent",
|
||
task_type="echo",
|
||
priority=0,
|
||
input_data={"query": f"hello_{i}"},
|
||
callback_url=None,
|
||
created_at=datetime.now(timezone.utc),
|
||
)
|
||
result = TaskResult(
|
||
task_id=f"test-{i:03d}",
|
||
agent_name="evolving_agent",
|
||
status=TaskStatus.COMPLETED,
|
||
output_data={"key": "value"},
|
||
error_message=None,
|
||
started_at=datetime.now(timezone.utc),
|
||
completed_at=datetime.now(timezone.utc),
|
||
metrics={"elapsed_seconds": 5.0},
|
||
)
|
||
await mixin.evolve_after_task(task, result)
|
||
|
||
history = mixin.get_evolution_history()
|
||
assert len(history) == 3
|
||
assert history[0]["task_id"] == "test-000"
|
||
assert history[1]["task_id"] == "test-001"
|
||
assert history[2]["task_id"] == "test-002"
|
||
|
||
|
||
# ── 无组件配置时的优雅降级 ──────────────────────────────────────
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_no_reflector_skips_evolution():
|
||
"""没有 Reflector 时跳过进化"""
|
||
mixin = EvolutionMixin()
|
||
mixin.set_current_module(_make_module())
|
||
|
||
task = _make_task()
|
||
result = _make_result()
|
||
entry = await mixin.evolve_after_task(task, result)
|
||
|
||
assert entry.reflection is None
|
||
assert entry.applied is False
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_no_evolution_store_applies_directly():
|
||
"""没有 EvolutionStore 时直接在内存中应用变更"""
|
||
reflector = LowQualityReflector()
|
||
optimizer = PromptOptimizer(max_demos=3, min_examples_for_optimization=1)
|
||
for i in range(3):
|
||
optimizer.add_example(
|
||
input_data={"query": f"q_{i}"},
|
||
output_data={"result": f"r_{i}"},
|
||
quality_score=0.9,
|
||
)
|
||
|
||
mixin = EvolutionMixin(reflector=reflector, prompt_optimizer=optimizer)
|
||
mixin.set_current_module(_make_module())
|
||
|
||
task = _make_task()
|
||
result = _make_result()
|
||
entry = await mixin.evolve_after_task(task, result)
|
||
|
||
# 没有 AB tester,也没有 store,直接应用
|
||
assert entry.applied is True
|
||
assert mixin._current_module.name == "test_module_optimized"
|