fischer-agentkit/tests/unit/test_evolution.py

132 lines
3.7 KiB
Python

"""Tests for Evolution system"""
import pytest
from agentkit.evolution.reflector import Reflector, Reflection
from agentkit.evolution.prompt_optimizer import PromptOptimizer, Signature, Module
from agentkit.evolution.strategy_tuner import StrategyTuner, StrategyConfig
from agentkit.evolution.ab_tester import ABTester, ABTestConfig
from agentkit.core.protocol import TaskMessage, TaskResult, TaskStatus
from datetime import datetime, timezone
def _make_task() -> TaskMessage:
return TaskMessage(
task_id="test-001",
agent_name="test",
task_type="echo",
priority=0,
input_data={},
callback_url=None,
created_at=datetime.now(timezone.utc),
)
def _make_result(status: str = TaskStatus.COMPLETED) -> TaskResult:
return TaskResult(
task_id="test-001",
agent_name="test",
status=status,
output_data={"key": "value"},
error_message=None,
started_at=datetime.now(timezone.utc),
completed_at=datetime.now(timezone.utc),
metrics={"elapsed_seconds": 5.0},
)
@pytest.mark.asyncio
async def test_reflector_success():
reflector = Reflector()
task = _make_task()
result = _make_result()
reflection = await reflector.reflect(task, result)
assert reflection.outcome == "success"
assert reflection.quality_score > 0
@pytest.mark.asyncio
async def test_reflector_failure():
reflector = Reflector()
task = _make_task()
result = _make_result(TaskStatus.FAILED)
result.error_message = "something went wrong"
reflection = await reflector.reflect(task, result)
assert reflection.outcome == "failure"
assert reflection.quality_score == 0.0
@pytest.mark.asyncio
async def test_prompt_optimizer():
optimizer = PromptOptimizer(max_demos=3, min_examples_for_optimization=2)
# Add examples
for i in range(5):
optimizer.add_example(
input_data={"query": f"query_{i}"},
output_data={"result": f"result_{i}"},
quality_score=0.8 + i * 0.02,
)
module = Module(
name="test_module",
signature=Signature(
input_fields={"query": "search query"},
output_fields={"result": "search result"},
instruction="Find the best result.",
),
)
optimized = await optimizer.optimize(module)
assert optimized.name == "test_module_optimized"
assert len(optimized.demos) == 3
@pytest.mark.asyncio
async def test_prompt_optimizer_not_enough_examples():
optimizer = PromptOptimizer(min_examples_for_optimization=10)
module = Module(
name="test",
signature=Signature(
input_fields={"x": "input"},
output_fields={"y": "output"},
),
)
optimized = await optimizer.optimize(module)
# Should return unchanged module
assert optimized.name == "test"
def test_strategy_tuner():
tuner = StrategyTuner()
config = StrategyConfig(temperature=0.5)
tuner.record(config, metric=0.6)
tuner.record(StrategyConfig(temperature=0.7), metric=0.8)
tuner.record(StrategyConfig(temperature=0.3), metric=0.4)
@pytest.mark.asyncio
async def test_ab_tester():
tester = ABTester()
test_config = ABTestConfig(
test_id="test-1",
agent_name="test_agent",
change_type="prompt",
min_samples=5,
)
tester.create_test(test_config)
# Record results
for _ in range(10):
group = tester.assign_group("test-1")
metric = 0.7 if group == "experiment" else 0.5
tester.record_result("test-1", group, metric)
result = await tester.evaluate("test-1")
assert result is not None
assert result.control_samples + result.experiment_samples == 10