305 lines
11 KiB
Python
305 lines
11 KiB
Python
"""Tests for GEPA genetic evolution"""
|
|
|
|
import pytest
|
|
|
|
from agentkit.evolution.genetic import (
|
|
CrossoverOperator,
|
|
FitnessScore,
|
|
GEPAPopulation,
|
|
MutationOperator,
|
|
PromptChromosome,
|
|
)
|
|
from agentkit.evolution.prompt_optimizer import Module, Signature
|
|
|
|
|
|
class TestFitnessScore:
|
|
"""FitnessScore unit tests"""
|
|
|
|
def test_dominates(self):
|
|
a = FitnessScore(accuracy=0.9, latency_ms=100, cost_tokens=500)
|
|
b = FitnessScore(accuracy=0.7, latency_ms=200, cost_tokens=1000)
|
|
assert a.dominates(b)
|
|
assert not b.dominates(a)
|
|
|
|
def test_no_dominance_equal(self):
|
|
a = FitnessScore(accuracy=0.8, latency_ms=100)
|
|
b = FitnessScore(accuracy=0.8, latency_ms=100)
|
|
assert not a.dominates(b)
|
|
assert not b.dominates(a)
|
|
|
|
def test_partial_dominance(self):
|
|
a = FitnessScore(accuracy=0.9, latency_ms=200) # Higher accuracy but slower
|
|
b = FitnessScore(accuracy=0.7, latency_ms=100) # Faster but lower accuracy
|
|
assert not a.dominates(b) # a is not >= b in all dimensions
|
|
assert not b.dominates(a) # b is not >= a in all dimensions
|
|
|
|
def test_normalized_values(self):
|
|
score = FitnessScore(accuracy=0.8, latency_ms=1000, cost_tokens=2000)
|
|
n = score.normalized
|
|
assert n["accuracy"] == 0.8
|
|
assert 0 < n["latency"] < 1
|
|
assert 0 < n["cost"] < 1
|
|
|
|
def test_zero_fitness(self):
|
|
score = FitnessScore()
|
|
assert score.accuracy == 0.0
|
|
n = score.normalized
|
|
assert n["accuracy"] == 0.0
|
|
|
|
|
|
class TestPromptChromosome:
|
|
"""PromptChromosome unit tests"""
|
|
|
|
def test_from_module(self):
|
|
module = Module(
|
|
name="test",
|
|
signature=Signature(
|
|
input_fields={"query": "user query"},
|
|
output_fields={"answer": "response"},
|
|
instruction="Answer the question.\n- Must be accurate\n- Never hallucinate",
|
|
),
|
|
demos=[{"input": "test", "output": "result"}],
|
|
)
|
|
chromosome = PromptChromosome.from_module(module)
|
|
assert "Answer the question" in chromosome.instructions
|
|
assert len(chromosome.constraints) >= 1
|
|
assert len(chromosome.demos) == 1
|
|
|
|
def test_to_module(self):
|
|
chromosome = PromptChromosome(
|
|
instructions="Test instruction",
|
|
demos=[{"input": "q", "output": "a"}],
|
|
constraints=["Be accurate"],
|
|
)
|
|
module = chromosome.to_module("test_module")
|
|
assert module.name == "test_module"
|
|
assert "Test instruction" in module.signature.instruction
|
|
assert len(module.demos) == 1
|
|
|
|
def test_default_values(self):
|
|
c = PromptChromosome()
|
|
assert c.instructions == ""
|
|
assert c.demos == []
|
|
assert c.constraints == []
|
|
assert c.generation == 0
|
|
assert c.fitness.accuracy == 0.0
|
|
|
|
|
|
class TestCrossoverOperator:
|
|
"""CrossoverOperator unit tests"""
|
|
|
|
def setup_method(self):
|
|
self.crossover = CrossoverOperator()
|
|
|
|
def test_crossover_produces_child(self):
|
|
parent_a = PromptChromosome(
|
|
instructions="Instruction A paragraph 1\n\nInstruction A paragraph 2",
|
|
demos=[{"input": "a1", "output": "r1"}],
|
|
constraints=["Constraint A"],
|
|
)
|
|
parent_b = PromptChromosome(
|
|
instructions="Instruction B paragraph 1\n\nInstruction B paragraph 2",
|
|
demos=[{"input": "b1", "output": "r2"}],
|
|
constraints=["Constraint B"],
|
|
)
|
|
|
|
child = self.crossover.crossover(parent_a, parent_b)
|
|
assert child.generation == 1
|
|
assert len(child.parent_ids) == 2
|
|
assert parent_a.id in child.parent_ids
|
|
assert parent_b.id in child.parent_ids
|
|
|
|
def test_crossover_preserves_content(self):
|
|
parent_a = PromptChromosome(instructions="A", demos=[], constraints=["C1"])
|
|
parent_b = PromptChromosome(instructions="B", demos=[], constraints=["C2"])
|
|
|
|
child = self.crossover.crossover(parent_a, parent_b, crossover_rate=0.0)
|
|
# With rate=0, should take from parent_a
|
|
assert child.instructions == "A"
|
|
|
|
def test_crossover_demos(self):
|
|
parent_a = PromptChromosome(
|
|
demos=[{"input": "a1", "output": "r1"}, {"input": "a2", "output": "r2"}],
|
|
)
|
|
parent_b = PromptChromosome(
|
|
demos=[{"input": "b1", "output": "r3"}],
|
|
)
|
|
|
|
child = self.crossover.crossover(parent_a, parent_b)
|
|
# Child should have demos from both parents
|
|
assert len(child.demos) >= 0 # May be empty due to rate filtering
|
|
|
|
def test_crossover_constraints(self):
|
|
parent_a = PromptChromosome(constraints=["C1", "C2"])
|
|
parent_b = PromptChromosome(constraints=["C3", "C4"])
|
|
|
|
child = self.crossover.crossover(parent_a, parent_b)
|
|
# Child should have some constraints from parents
|
|
assert isinstance(child.constraints, list)
|
|
|
|
|
|
class TestMutationOperator:
|
|
"""MutationOperator unit tests"""
|
|
|
|
def setup_method(self):
|
|
self.mutation = MutationOperator()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_mutate_returns_new_chromosome(self):
|
|
original = PromptChromosome(
|
|
instructions="Test instruction",
|
|
demos=[{"input": "q", "output": "a"}],
|
|
constraints=["Be accurate"],
|
|
)
|
|
mutated = await self.mutation.mutate(original, mutation_rate=1.0)
|
|
assert mutated.parent_ids == [original.id]
|
|
assert mutated.generation == original.generation
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_mutate_with_zero_rate(self):
|
|
original = PromptChromosome(
|
|
instructions="Test instruction",
|
|
demos=[{"input": "q", "output": "a"}],
|
|
constraints=["Be accurate"],
|
|
)
|
|
mutated = await self.mutation.mutate(original, mutation_rate=0.0)
|
|
# With rate=0, should be identical
|
|
assert mutated.instructions == original.instructions
|
|
assert mutated.demos == original.demos
|
|
assert mutated.constraints == original.constraints
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_demo_mutation(self):
|
|
original = PromptChromosome(
|
|
demos=[
|
|
{"input": "q1", "output": "a1"},
|
|
{"input": "q2", "output": "a2"},
|
|
{"input": "q3", "output": "a3"},
|
|
],
|
|
)
|
|
mutated_demos = self.mutation._mutate_demos(original.demos)
|
|
assert isinstance(mutated_demos, list)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_constraint_mutation_add(self):
|
|
constraints = ["Be accurate"]
|
|
mutated = self.mutation._mutate_constraints(constraints)
|
|
assert isinstance(mutated, list)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_constraint_mutation_remove(self):
|
|
constraints = ["C1", "C2", "C3"]
|
|
mutated = self.mutation._mutate_constraints(constraints)
|
|
assert isinstance(mutated, list)
|
|
|
|
|
|
class TestGEPAPopulation:
|
|
"""GEPAPopulation unit tests"""
|
|
|
|
def setup_method(self):
|
|
self.population = GEPAPopulation(population_size=6, elite_size=2, tournament_size=3)
|
|
|
|
def test_initialize_with_seed(self):
|
|
seed = PromptChromosome(instructions="You are a helpful assistant.")
|
|
self.population.initialize(seed)
|
|
assert self.population.size == 6
|
|
assert self.population.generation == 0
|
|
|
|
def test_initialize_without_seed(self):
|
|
self.population.initialize()
|
|
assert self.population.size == 6
|
|
|
|
def test_get_elite(self):
|
|
self.population.initialize()
|
|
# Set fitness scores
|
|
for i, ind in enumerate(self.population.individuals):
|
|
ind.fitness = FitnessScore(accuracy=i * 0.1)
|
|
|
|
elite = self.population.get_elite()
|
|
assert len(elite) == 2
|
|
assert elite[0].fitness.accuracy >= elite[1].fitness.accuracy
|
|
|
|
def test_tournament_select(self):
|
|
self.population.initialize()
|
|
for i, ind in enumerate(self.population.individuals):
|
|
ind.fitness = FitnessScore(accuracy=i * 0.1)
|
|
|
|
selected = self.population.tournament_select()
|
|
assert isinstance(selected, PromptChromosome)
|
|
|
|
def test_tournament_select_empty_population(self):
|
|
with pytest.raises(ValueError, match="Population is empty"):
|
|
self.population.tournament_select()
|
|
|
|
def test_get_best(self):
|
|
self.population.initialize()
|
|
for i, ind in enumerate(self.population.individuals):
|
|
ind.fitness = FitnessScore(accuracy=i * 0.1)
|
|
|
|
best = self.population.get_best()
|
|
assert best.fitness.accuracy == 0.5 # Last individual (index 5 * 0.1)
|
|
|
|
def test_evolve(self):
|
|
self.population.initialize()
|
|
for i, ind in enumerate(self.population.individuals):
|
|
ind.fitness = FitnessScore(accuracy=i * 0.1)
|
|
|
|
crossover = CrossoverOperator()
|
|
mutation = MutationOperator()
|
|
|
|
new_gen = self.population.evolve(crossover, mutation)
|
|
assert self.population.generation == 1
|
|
assert len(new_gen) == 6
|
|
|
|
def test_multiple_generations(self):
|
|
self.population.initialize()
|
|
for i, ind in enumerate(self.population.individuals):
|
|
ind.fitness = FitnessScore(accuracy=i * 0.1)
|
|
|
|
crossover = CrossoverOperator()
|
|
mutation = MutationOperator()
|
|
|
|
for _ in range(5):
|
|
self.population.evolve(crossover, mutation)
|
|
# Re-evaluate fitness (simulated)
|
|
for i, ind in enumerate(self.population.individuals):
|
|
ind.fitness = FitnessScore(accuracy=min(1.0, i * 0.1 + 0.3))
|
|
|
|
assert self.population.generation == 5
|
|
|
|
def test_get_pareto_front(self):
|
|
self.population.initialize()
|
|
# Set diverse fitness
|
|
self.population.individuals[0].fitness = FitnessScore(accuracy=0.9, latency_ms=500)
|
|
self.population.individuals[1].fitness = FitnessScore(accuracy=0.7, latency_ms=100)
|
|
self.population.individuals[2].fitness = FitnessScore(accuracy=0.5, latency_ms=50)
|
|
self.population.individuals[3].fitness = FitnessScore(accuracy=0.3, latency_ms=30)
|
|
self.population.individuals[4].fitness = FitnessScore(accuracy=0.8, latency_ms=200)
|
|
self.population.individuals[5].fitness = FitnessScore(accuracy=0.6, latency_ms=150)
|
|
|
|
front = self.population.get_pareto_front()
|
|
assert len(front) >= 1
|
|
# The front should contain non-dominated individuals
|
|
|
|
def test_get_statistics(self):
|
|
self.population.initialize()
|
|
for i, ind in enumerate(self.population.individuals):
|
|
ind.fitness = FitnessScore(accuracy=i * 0.1 + 0.3)
|
|
|
|
stats = self.population.get_statistics()
|
|
assert stats["generation"] == 0
|
|
assert stats["size"] == 6
|
|
assert "best_accuracy" in stats
|
|
assert "avg_accuracy" in stats
|
|
|
|
def test_get_statistics_empty(self):
|
|
stats = self.population.get_statistics()
|
|
assert stats["size"] == 0
|
|
|
|
def test_add_individual(self):
|
|
self.population.initialize()
|
|
initial_size = self.population.size
|
|
new_individual = PromptChromosome(instructions="New individual")
|
|
self.population.add(new_individual)
|
|
assert self.population.size == initial_size + 1
|