"""Tests for MultiObjectiveFitness and ExtendedStrategyTuner""" import pytest from agentkit.evolution.fitness import ( ExtendedStrategyConfig, ExtendedStrategyTuner, FitnessWeights, MultiObjectiveFitness, ) from agentkit.evolution.genetic import FitnessScore class TestFitnessWeights: """FitnessWeights unit tests""" def test_default_weights(self): w = FitnessWeights() assert abs(w.accuracy - 0.6) < 0.01 assert abs(w.latency - 0.2) < 0.01 assert abs(w.cost - 0.2) < 0.01 def test_custom_weights(self): w = FitnessWeights(accuracy=0.5, latency=0.3, cost=0.2) assert abs(w.accuracy - 0.5) < 0.01 def test_auto_normalization(self): w = FitnessWeights(accuracy=1.0, latency=1.0, cost=1.0) assert abs(w.accuracy - 1/3) < 0.01 assert abs(w.latency - 1/3) < 0.01 assert abs(w.cost - 1/3) < 0.01 class TestMultiObjectiveFitness: """MultiObjectiveFitness unit tests""" def setup_method(self): self.evaluator = MultiObjectiveFitness() def test_evaluate(self): score = self.evaluator.evaluate(accuracy=0.9, latency_ms=500, cost_tokens=2000) assert score.accuracy == 0.9 assert score.latency_ms == 500 assert score.cost_tokens == 2000 def test_evaluate_clamps_accuracy(self): score = self.evaluator.evaluate(accuracy=1.5) assert score.accuracy == 1.0 score = self.evaluator.evaluate(accuracy=-0.1) assert score.accuracy == 0.0 def test_weighted_score(self): score = self.evaluator.evaluate(accuracy=1.0, latency_ms=0, cost_tokens=0) weighted = self.evaluator.weighted_score(score) assert weighted == 1.0 # Perfect on all dimensions def test_weighted_score_zero(self): score = self.evaluator.evaluate(accuracy=0.0, latency_ms=10000, cost_tokens=10000) weighted = self.evaluator.weighted_score(score) assert weighted == 0.0 # Worst on all dimensions def test_pareto_rank_simple(self): scores = [ FitnessScore(accuracy=0.9, latency_ms=100), # Dominates all FitnessScore(accuracy=0.5, latency_ms=500), # Dominated by 0 FitnessScore(accuracy=0.3, latency_ms=1000), # Dominated by 0, 1 ] ranks = self.evaluator.pareto_rank(scores) assert ranks[0] == 0 # Front assert ranks[1] >= 1 assert ranks[2] >= ranks[1] def test_pareto_rank_empty(self): ranks = self.evaluator.pareto_rank([]) assert ranks == [] def test_pareto_rank_non_dominated(self): scores = [ FitnessScore(accuracy=0.9, latency_ms=500), # High accuracy, slow FitnessScore(accuracy=0.5, latency_ms=100), # Low accuracy, fast ] ranks = self.evaluator.pareto_rank(scores) # Neither dominates the other — both on front assert ranks[0] == 0 assert ranks[1] == 0 def test_crowding_distance(self): scores = [ FitnessScore(accuracy=0.9, latency_ms=100), FitnessScore(accuracy=0.7, latency_ms=300), FitnessScore(accuracy=0.5, latency_ms=500), ] distances = self.evaluator.crowding_distance(scores) assert len(distances) == 3 assert distances[0] == float("inf") # Boundary assert distances[2] == float("inf") # Boundary assert distances[1] > 0 # Interior point def test_crowding_distance_small(self): scores = [FitnessScore(accuracy=0.5)] distances = self.evaluator.crowding_distance(scores) assert distances[0] == float("inf") def test_custom_weights_evaluator(self): evaluator = MultiObjectiveFitness(weights=FitnessWeights(accuracy=1.0, latency=0.0, cost=0.0)) score = evaluator.evaluate(accuracy=0.8, latency_ms=5000, cost_tokens=5000) weighted = evaluator.weighted_score(score) # Only accuracy matters assert abs(weighted - 0.8) < 0.01 class TestExtendedStrategyTuner: """ExtendedStrategyTuner unit tests""" def setup_method(self): self.tuner = ExtendedStrategyTuner() def test_record_and_suggest(self): config = ExtendedStrategyConfig(temperature=0.5, max_iterations=5, top_k=5) self.tuner.record(config, 0.7) self.tuner.record(config, 0.8) self.tuner.record(config, 0.9) @pytest.mark.asyncio async def test_suggest_with_history(self): config = ExtendedStrategyConfig(temperature=0.7, max_iterations=5, top_k=5) for i in range(5): self.tuner.record(config, 0.5 + i * 0.1) suggested = await self.tuner.suggest(config) assert isinstance(suggested, ExtendedStrategyConfig) assert 0.0 <= suggested.temperature <= 2.0 assert 1 <= suggested.max_iterations <= 10 assert 1 <= suggested.top_k <= 20 @pytest.mark.asyncio async def test_suggest_without_history(self): config = ExtendedStrategyConfig() suggested = await self.tuner.suggest(config) # Should return current config unchanged assert suggested.temperature == config.temperature assert suggested.max_iterations == config.max_iterations @pytest.mark.asyncio async def test_retrieval_mode_suggestion(self): config = ExtendedStrategyConfig(retrieval_mode="standard") enhanced_config = ExtendedStrategyConfig(retrieval_mode="enhanced") # Record mostly enhanced results for _ in range(4): self.tuner.record(enhanced_config, 0.9) self.tuner.record(config, 0.5) suggested = await self.tuner.suggest(config) assert suggested.retrieval_mode == "enhanced" def test_history_size(self): assert self.tuner.history_size == 0 self.tuner.record(ExtendedStrategyConfig(), 0.5) assert self.tuner.history_size == 1 class TestExtendedStrategyConfig: """ExtendedStrategyConfig unit tests""" def test_default_values(self): config = ExtendedStrategyConfig() assert config.temperature == 0.5 assert config.max_iterations == 5 assert config.top_k == 5 assert config.retrieval_mode == "enhanced" assert config.tool_weights == {} def test_custom_values(self): config = ExtendedStrategyConfig( temperature=0.8, max_iterations=10, top_k=15, retrieval_mode="standard", tool_weights={"search": 0.7, "analyze": 0.3}, ) assert config.temperature == 0.8 assert config.max_iterations == 10 assert config.top_k == 15 assert config.retrieval_mode == "standard" assert config.tool_weights["search"] == 0.7