fischer-agentkit/tests/unit/test_genetic_evolution.py

305 lines
11 KiB
Python

"""Tests for GEPA genetic evolution"""
import pytest
from agentkit.evolution.genetic import (
CrossoverOperator,
FitnessScore,
GEPAPopulation,
MutationOperator,
PromptChromosome,
)
from agentkit.evolution.prompt_optimizer import Module, Signature
class TestFitnessScore:
"""FitnessScore unit tests"""
def test_dominates(self):
a = FitnessScore(accuracy=0.9, latency_ms=100, cost_tokens=500)
b = FitnessScore(accuracy=0.7, latency_ms=200, cost_tokens=1000)
assert a.dominates(b)
assert not b.dominates(a)
def test_no_dominance_equal(self):
a = FitnessScore(accuracy=0.8, latency_ms=100)
b = FitnessScore(accuracy=0.8, latency_ms=100)
assert not a.dominates(b)
assert not b.dominates(a)
def test_partial_dominance(self):
a = FitnessScore(accuracy=0.9, latency_ms=200) # Higher accuracy but slower
b = FitnessScore(accuracy=0.7, latency_ms=100) # Faster but lower accuracy
assert not a.dominates(b) # a is not >= b in all dimensions
assert not b.dominates(a) # b is not >= a in all dimensions
def test_normalized_values(self):
score = FitnessScore(accuracy=0.8, latency_ms=1000, cost_tokens=2000)
n = score.normalized
assert n["accuracy"] == 0.8
assert 0 < n["latency"] < 1
assert 0 < n["cost"] < 1
def test_zero_fitness(self):
score = FitnessScore()
assert score.accuracy == 0.0
n = score.normalized
assert n["accuracy"] == 0.0
class TestPromptChromosome:
"""PromptChromosome unit tests"""
def test_from_module(self):
module = Module(
name="test",
signature=Signature(
input_fields={"query": "user query"},
output_fields={"answer": "response"},
instruction="Answer the question.\n- Must be accurate\n- Never hallucinate",
),
demos=[{"input": "test", "output": "result"}],
)
chromosome = PromptChromosome.from_module(module)
assert "Answer the question" in chromosome.instructions
assert len(chromosome.constraints) >= 1
assert len(chromosome.demos) == 1
def test_to_module(self):
chromosome = PromptChromosome(
instructions="Test instruction",
demos=[{"input": "q", "output": "a"}],
constraints=["Be accurate"],
)
module = chromosome.to_module("test_module")
assert module.name == "test_module"
assert "Test instruction" in module.signature.instruction
assert len(module.demos) == 1
def test_default_values(self):
c = PromptChromosome()
assert c.instructions == ""
assert c.demos == []
assert c.constraints == []
assert c.generation == 0
assert c.fitness.accuracy == 0.0
class TestCrossoverOperator:
"""CrossoverOperator unit tests"""
def setup_method(self):
self.crossover = CrossoverOperator()
def test_crossover_produces_child(self):
parent_a = PromptChromosome(
instructions="Instruction A paragraph 1\n\nInstruction A paragraph 2",
demos=[{"input": "a1", "output": "r1"}],
constraints=["Constraint A"],
)
parent_b = PromptChromosome(
instructions="Instruction B paragraph 1\n\nInstruction B paragraph 2",
demos=[{"input": "b1", "output": "r2"}],
constraints=["Constraint B"],
)
child = self.crossover.crossover(parent_a, parent_b)
assert child.generation == 1
assert len(child.parent_ids) == 2
assert parent_a.id in child.parent_ids
assert parent_b.id in child.parent_ids
def test_crossover_preserves_content(self):
parent_a = PromptChromosome(instructions="A", demos=[], constraints=["C1"])
parent_b = PromptChromosome(instructions="B", demos=[], constraints=["C2"])
child = self.crossover.crossover(parent_a, parent_b, crossover_rate=0.0)
# With rate=0, should take from parent_a
assert child.instructions == "A"
def test_crossover_demos(self):
parent_a = PromptChromosome(
demos=[{"input": "a1", "output": "r1"}, {"input": "a2", "output": "r2"}],
)
parent_b = PromptChromosome(
demos=[{"input": "b1", "output": "r3"}],
)
child = self.crossover.crossover(parent_a, parent_b)
# Child should have demos from both parents
assert len(child.demos) >= 0 # May be empty due to rate filtering
def test_crossover_constraints(self):
parent_a = PromptChromosome(constraints=["C1", "C2"])
parent_b = PromptChromosome(constraints=["C3", "C4"])
child = self.crossover.crossover(parent_a, parent_b)
# Child should have some constraints from parents
assert isinstance(child.constraints, list)
class TestMutationOperator:
"""MutationOperator unit tests"""
def setup_method(self):
self.mutation = MutationOperator()
@pytest.mark.asyncio
async def test_mutate_returns_new_chromosome(self):
original = PromptChromosome(
instructions="Test instruction",
demos=[{"input": "q", "output": "a"}],
constraints=["Be accurate"],
)
mutated = await self.mutation.mutate(original, mutation_rate=1.0)
assert mutated.parent_ids == [original.id]
assert mutated.generation == original.generation
@pytest.mark.asyncio
async def test_mutate_with_zero_rate(self):
original = PromptChromosome(
instructions="Test instruction",
demos=[{"input": "q", "output": "a"}],
constraints=["Be accurate"],
)
mutated = await self.mutation.mutate(original, mutation_rate=0.0)
# With rate=0, should be identical
assert mutated.instructions == original.instructions
assert mutated.demos == original.demos
assert mutated.constraints == original.constraints
@pytest.mark.asyncio
async def test_demo_mutation(self):
original = PromptChromosome(
demos=[
{"input": "q1", "output": "a1"},
{"input": "q2", "output": "a2"},
{"input": "q3", "output": "a3"},
],
)
mutated_demos = self.mutation._mutate_demos(original.demos)
assert isinstance(mutated_demos, list)
@pytest.mark.asyncio
async def test_constraint_mutation_add(self):
constraints = ["Be accurate"]
mutated = self.mutation._mutate_constraints(constraints)
assert isinstance(mutated, list)
@pytest.mark.asyncio
async def test_constraint_mutation_remove(self):
constraints = ["C1", "C2", "C3"]
mutated = self.mutation._mutate_constraints(constraints)
assert isinstance(mutated, list)
class TestGEPAPopulation:
"""GEPAPopulation unit tests"""
def setup_method(self):
self.population = GEPAPopulation(population_size=6, elite_size=2, tournament_size=3)
def test_initialize_with_seed(self):
seed = PromptChromosome(instructions="You are a helpful assistant.")
self.population.initialize(seed)
assert self.population.size == 6
assert self.population.generation == 0
def test_initialize_without_seed(self):
self.population.initialize()
assert self.population.size == 6
def test_get_elite(self):
self.population.initialize()
# Set fitness scores
for i, ind in enumerate(self.population.individuals):
ind.fitness = FitnessScore(accuracy=i * 0.1)
elite = self.population.get_elite()
assert len(elite) == 2
assert elite[0].fitness.accuracy >= elite[1].fitness.accuracy
def test_tournament_select(self):
self.population.initialize()
for i, ind in enumerate(self.population.individuals):
ind.fitness = FitnessScore(accuracy=i * 0.1)
selected = self.population.tournament_select()
assert isinstance(selected, PromptChromosome)
def test_tournament_select_empty_population(self):
with pytest.raises(ValueError, match="Population is empty"):
self.population.tournament_select()
def test_get_best(self):
self.population.initialize()
for i, ind in enumerate(self.population.individuals):
ind.fitness = FitnessScore(accuracy=i * 0.1)
best = self.population.get_best()
assert best.fitness.accuracy == 0.5 # Last individual (index 5 * 0.1)
def test_evolve(self):
self.population.initialize()
for i, ind in enumerate(self.population.individuals):
ind.fitness = FitnessScore(accuracy=i * 0.1)
crossover = CrossoverOperator()
mutation = MutationOperator()
new_gen = self.population.evolve(crossover, mutation)
assert self.population.generation == 1
assert len(new_gen) == 6
def test_multiple_generations(self):
self.population.initialize()
for i, ind in enumerate(self.population.individuals):
ind.fitness = FitnessScore(accuracy=i * 0.1)
crossover = CrossoverOperator()
mutation = MutationOperator()
for _ in range(5):
self.population.evolve(crossover, mutation)
# Re-evaluate fitness (simulated)
for i, ind in enumerate(self.population.individuals):
ind.fitness = FitnessScore(accuracy=min(1.0, i * 0.1 + 0.3))
assert self.population.generation == 5
def test_get_pareto_front(self):
self.population.initialize()
# Set diverse fitness
self.population.individuals[0].fitness = FitnessScore(accuracy=0.9, latency_ms=500)
self.population.individuals[1].fitness = FitnessScore(accuracy=0.7, latency_ms=100)
self.population.individuals[2].fitness = FitnessScore(accuracy=0.5, latency_ms=50)
self.population.individuals[3].fitness = FitnessScore(accuracy=0.3, latency_ms=30)
self.population.individuals[4].fitness = FitnessScore(accuracy=0.8, latency_ms=200)
self.population.individuals[5].fitness = FitnessScore(accuracy=0.6, latency_ms=150)
front = self.population.get_pareto_front()
assert len(front) >= 1
# The front should contain non-dominated individuals
def test_get_statistics(self):
self.population.initialize()
for i, ind in enumerate(self.population.individuals):
ind.fitness = FitnessScore(accuracy=i * 0.1 + 0.3)
stats = self.population.get_statistics()
assert stats["generation"] == 0
assert stats["size"] == 6
assert "best_accuracy" in stats
assert "avg_accuracy" in stats
def test_get_statistics_empty(self):
stats = self.population.get_statistics()
assert stats["size"] == 0
def test_add_individual(self):
self.population.initialize()
initial_size = self.population.size
new_individual = PromptChromosome(instructions="New individual")
self.population.add(new_individual)
assert self.population.size == initial_size + 1