"""Tests for Evolution system""" import pytest from agentkit.evolution.reflector import Reflector, Reflection from agentkit.evolution.prompt_optimizer import PromptOptimizer, Signature, Module from agentkit.evolution.strategy_tuner import StrategyTuner, StrategyConfig from agentkit.evolution.ab_tester import ABTester, ABTestConfig from agentkit.core.protocol import TaskMessage, TaskResult, TaskStatus from datetime import datetime, timezone def _make_task() -> TaskMessage: return TaskMessage( task_id="test-001", agent_name="test", task_type="echo", priority=0, input_data={}, callback_url=None, created_at=datetime.now(timezone.utc), ) def _make_result(status: str = TaskStatus.COMPLETED) -> TaskResult: return TaskResult( task_id="test-001", agent_name="test", status=status, output_data={"key": "value"}, error_message=None, started_at=datetime.now(timezone.utc), completed_at=datetime.now(timezone.utc), metrics={"elapsed_seconds": 5.0}, ) @pytest.mark.asyncio async def test_reflector_success(): reflector = Reflector() task = _make_task() result = _make_result() reflection = await reflector.reflect(task, result) assert reflection.outcome == "success" assert reflection.quality_score > 0 @pytest.mark.asyncio async def test_reflector_failure(): reflector = Reflector() task = _make_task() result = _make_result(TaskStatus.FAILED) result.error_message = "something went wrong" reflection = await reflector.reflect(task, result) assert reflection.outcome == "failure" assert reflection.quality_score == 0.0 @pytest.mark.asyncio async def test_prompt_optimizer(): optimizer = PromptOptimizer(max_demos=3, min_examples_for_optimization=2) # Add examples for i in range(5): optimizer.add_example( input_data={"query": f"query_{i}"}, output_data={"result": f"result_{i}"}, quality_score=0.8 + i * 0.02, ) module = Module( name="test_module", signature=Signature( input_fields={"query": "search query"}, output_fields={"result": "search result"}, instruction="Find the best result.", ), ) optimized = await optimizer.optimize(module) assert optimized.name == "test_module_optimized" assert len(optimized.demos) == 3 @pytest.mark.asyncio async def test_prompt_optimizer_not_enough_examples(): optimizer = PromptOptimizer(min_examples_for_optimization=10) module = Module( name="test", signature=Signature( input_fields={"x": "input"}, output_fields={"y": "output"}, ), ) optimized = await optimizer.optimize(module) # Should return unchanged module assert optimized.name == "test" def test_strategy_tuner(): tuner = StrategyTuner() config = StrategyConfig(temperature=0.5) tuner.record(config, metric=0.6) tuner.record(StrategyConfig(temperature=0.7), metric=0.8) tuner.record(StrategyConfig(temperature=0.3), metric=0.4) @pytest.mark.asyncio async def test_ab_tester(): tester = ABTester() test_config = ABTestConfig( test_id="test-1", agent_name="test_agent", change_type="prompt", min_samples=5, ) tester.create_test(test_config) # Record results for _ in range(10): group = tester.assign_group("test-1") metric = 0.7 if group == "experiment" else 0.5 tester.record_result("test-1", group, metric) result = await tester.evaluate("test-1") assert result is not None assert result.control_samples + result.experiment_samples == 10