"""集成测试 - Soul 进化触发条件 测试 EvolutionMixin.evolve_soul 的多维触发逻辑: - 时间窗口内反思计数 - 质量梯度(下降分数)触发早期进化 - 任务类型权重调整触发阈值 - 时间衰减降低旧反思的有效计数 仅 mock MemoryTool(文件 I/O),使用真实 EvolutionMixin + SoulEvolutionConfig 实例。 """ from __future__ import annotations from datetime import datetime, timedelta, timezone from unittest.mock import AsyncMock, MagicMock, patch import pytest from agentkit.core.protocol import TaskMessage, TaskResult, TaskStatus from agentkit.evolution.lifecycle import EvolutionMixin, SoulEvolutionConfig from agentkit.evolution.reflector import Reflection from agentkit.memory.profile import MemoryStore # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def make_task(task_id: str = "task-1") -> TaskMessage: return TaskMessage( task_id=task_id, agent_name="test_agent", task_type="analysis", priority=1, input_data={"content": "test task"}, callback_url=None, created_at=datetime.now(timezone.utc), ) def make_result(task_id: str = "task-1") -> TaskResult: return TaskResult( task_id=task_id, agent_name="test_agent", status=TaskStatus.COMPLETED, output_data={"result": "done"}, error_message=None, started_at=datetime.now(timezone.utc), completed_at=datetime.now(timezone.utc), ) def make_reflection( quality_score: float = 0.3, patterns: list[str] | None = None, suggestions: list[str] | None = None, ) -> Reflection: # Use explicit None check to allow empty list for suggestions if suggestions is None: suggestions = ["Add more detail", "Be more specific"] return Reflection( task_id="task-1", agent_name="test_agent", outcome="partial", quality_score=quality_score, patterns=patterns or ["reasoning"], insights=["Needs improvement"], suggestions=suggestions, ) def make_mock_memory_store() -> MagicMock: """创建 mock MemoryStore,模拟 get_file 返回可操作的 MemoryFile""" store = MagicMock(spec=MemoryStore) mock_file = MagicMock() mock_file.read_section.return_value = "版本: 1\n更新时间: 2025-01-01T00:00:00" mock_file.list_sections.return_value = ["身份", "版本"] store.get_file.return_value = mock_file return store # --------------------------------------------------------------------------- # Test 1: 3 reflections within window trigger evolution # --------------------------------------------------------------------------- class TestReflectionCountTrigger: """时间窗口内 3 次反思触发进化""" @pytest.mark.asyncio async def test_three_reflections_trigger_evolution(self): config = SoulEvolutionConfig( min_reflections=3, reflection_window_seconds=3600, time_decay_factor=1.0, # No decay for this test ) mixin = EvolutionMixin(evolution_config=config) memory_store = make_mock_memory_store() # Record 3 reflections within the window task = make_task() result = make_result() with patch("agentkit.tools.memory_tool.MemoryTool.execute", new_callable=AsyncMock) as mock_execute: mock_execute.return_value = {"success": True, "version": 2} # First reflection — should not trigger reflection1 = make_reflection(quality_score=0.3, patterns=["reasoning"]) evolved = await mixin.evolve_soul(task, result, memory_store, reflection=reflection1) assert evolved is False # Second reflection — should not trigger reflection2 = make_reflection(quality_score=0.25, patterns=["reasoning"]) evolved = await mixin.evolve_soul(task, result, memory_store, reflection=reflection2) assert evolved is False # Third reflection — should trigger (3 >= min_reflections=3) reflection3 = make_reflection(quality_score=0.2, patterns=["reasoning"]) evolved = await mixin.evolve_soul(task, result, memory_store, reflection=reflection3) assert evolved is True # MemoryTool.execute should have been called for the soul update mock_execute.assert_called_once() call_kwargs = mock_execute.call_args[1] assert call_kwargs["action"] == "update_soul" assert call_kwargs["file"] == "soul" @pytest.mark.asyncio async def test_two_reflections_do_not_trigger(self): config = SoulEvolutionConfig( min_reflections=3, time_decay_factor=1.0, ) mixin = EvolutionMixin(evolution_config=config) memory_store = make_mock_memory_store() task = make_task() result = make_result() # First reflection reflection1 = make_reflection(quality_score=0.3, patterns=["reasoning"]) evolved = await mixin.evolve_soul(task, result, memory_store, reflection=reflection1) assert evolved is False # Second reflection — still not enough reflection2 = make_reflection(quality_score=0.25, patterns=["reasoning"]) evolved = await mixin.evolve_soul(task, result, memory_store, reflection=reflection2) assert evolved is False # --------------------------------------------------------------------------- # Test 2: Quality gradient (declining scores) triggers early evolution # --------------------------------------------------------------------------- class TestQualityGradientTrigger: """质量梯度(持续下降分数)触发早期进化""" @pytest.mark.asyncio async def test_declining_scores_trigger_early_evolution(self): config = SoulEvolutionConfig( min_reflections=10, # High threshold — won't trigger by count quality_gradient_threshold=-0.15, time_decay_factor=1.0, ) mixin = EvolutionMixin(evolution_config=config) memory_store = make_mock_memory_store() task = make_task() result = make_result() with patch("agentkit.tools.memory_tool.MemoryTool.execute", new_callable=AsyncMock) as mock_execute: mock_execute.return_value = {"success": True, "version": 2} # Record 3 reflections with declining scores (all < 0.5 to pass the quality check) # Score drops: 0.45 → 0.25 → 0.05 (each drop > 0.15) reflection1 = make_reflection(quality_score=0.45, patterns=["reasoning"]) await mixin.evolve_soul(task, result, memory_store, reflection=reflection1) reflection2 = make_reflection(quality_score=0.25, patterns=["reasoning"]) await mixin.evolve_soul(task, result, memory_store, reflection=reflection2) # Third reflection with continued decline should trigger quality gradient reflection3 = make_reflection(quality_score=0.05, patterns=["reasoning"]) evolved = await mixin.evolve_soul(task, result, memory_store, reflection=reflection3) # Quality gradient: 0.45→0.25 (drop=-0.2), 0.25→0.05 (drop=-0.2) # Both drops <= -0.15, so quality_gradient_triggered = True assert evolved is True mock_execute.assert_called_once() @pytest.mark.asyncio async def test_stable_scores_do_not_trigger_gradient(self): config = SoulEvolutionConfig( min_reflections=10, # High threshold quality_gradient_threshold=-0.15, time_decay_factor=1.0, ) mixin = EvolutionMixin(evolution_config=config) memory_store = make_mock_memory_store() task = make_task() result = make_result() # Record 3 reflections with stable/improving scores reflection1 = make_reflection(quality_score=0.3, patterns=["reasoning"]) await mixin.evolve_soul(task, result, memory_store, reflection=reflection1) reflection2 = make_reflection(quality_score=0.35, patterns=["reasoning"]) await mixin.evolve_soul(task, result, memory_store, reflection=reflection2) reflection3 = make_reflection(quality_score=0.4, patterns=["reasoning"]) evolved = await mixin.evolve_soul(task, result, memory_store, reflection=reflection3) # Scores are improving, no quality gradient trigger assert evolved is False # --------------------------------------------------------------------------- # Test 3: Task type weight adjusts trigger threshold # --------------------------------------------------------------------------- class TestTaskTypeWeightTrigger: """任务类型权重调整触发阈值""" @pytest.mark.asyncio async def test_high_weight_reduces_effective_threshold(self): """高权重降低有效触发阈值:2 次反思 × 权重 2.0 = 有效 4.0 >= min_reflections 3""" config = SoulEvolutionConfig( min_reflections=3, time_decay_factor=1.0, task_type_weights={"critical": 2.0}, ) mixin = EvolutionMixin(evolution_config=config) memory_store = make_mock_memory_store() task = make_task() result = make_result() with patch("agentkit.tools.memory_tool.MemoryTool.execute", new_callable=AsyncMock) as mock_execute: mock_execute.return_value = {"success": True, "version": 2} # First reflection with critical task type reflection1 = make_reflection(quality_score=0.3, patterns=["reasoning"]) evolved = await mixin.evolve_soul( task, result, memory_store, reflection=reflection1, task_type="critical", ) # 1 reflection × weight 2.0 = 2.0 < 3 assert evolved is False # Second reflection with critical task type reflection2 = make_reflection(quality_score=0.25, patterns=["reasoning"]) evolved = await mixin.evolve_soul( task, result, memory_store, reflection=reflection2, task_type="critical", ) # 2 reflections × weight 2.0 = 4.0 >= 3 assert evolved is True mock_execute.assert_called_once() @pytest.mark.asyncio async def test_low_weight_increases_effective_threshold(self): """低权重增加有效触发阈值:3 次反思 × 权重 0.5 = 有效 1.5 < min_reflections 3""" config = SoulEvolutionConfig( min_reflections=3, time_decay_factor=1.0, task_type_weights={"low_priority": 0.5}, ) mixin = EvolutionMixin(evolution_config=config) memory_store = make_mock_memory_store() task = make_task() result = make_result() # 3 reflections with low_priority task type for i in range(3): reflection = make_reflection(quality_score=0.3, patterns=["reasoning"]) evolved = await mixin.evolve_soul( task, result, memory_store, reflection=reflection, task_type="low_priority", ) # 3 × 0.5 = 1.5 < 3 → should not trigger assert evolved is False # --------------------------------------------------------------------------- # Test 4: Time decay reduces effective count for old reflections # --------------------------------------------------------------------------- class TestTimeDecayReducesEffectiveCount: """时间衰减降低旧反思的有效计数""" @pytest.mark.asyncio async def test_old_reflections_decay_below_threshold(self): """旧反思因时间衰减导致有效计数不足""" config = SoulEvolutionConfig( min_reflections=3, reflection_window_seconds=3600, time_decay_factor=0.5, # Half-life of 1 hour ) mixin = EvolutionMixin(evolution_config=config) memory_store = make_mock_memory_store() task = make_task() result = make_result() # Manually add old reflections to pending_soul_updates now = datetime.now(timezone.utc) old_timestamp = now - timedelta(hours=3) # 3 hours ago # Add 2 old reflections manually mixin.pending_soul_updates["reasoning"] = [ { "reflection": make_reflection(quality_score=0.3), "timestamp": old_timestamp, "score": 0.3, "task_type": "", }, { "reflection": make_reflection(quality_score=0.25), "timestamp": old_timestamp, "score": 0.25, "task_type": "", }, ] # Add a recent reflection via evolve_soul # Time decay: 0.5^3 = 0.125 per old reflection → 2 × 0.125 = 0.25 # Plus 1 new reflection → total effective ≈ 1.25 < 3 recent_reflection = make_reflection(quality_score=0.2, patterns=["reasoning"]) evolved = await mixin.evolve_soul(task, result, memory_store, reflection=recent_reflection) # Effective count should be well below 3 due to decay assert evolved is False @pytest.mark.asyncio async def test_recent_reflections_no_decay(self): """近期反思不受时间衰减影响""" config = SoulEvolutionConfig( min_reflections=3, time_decay_factor=0.5, ) mixin = EvolutionMixin(evolution_config=config) memory_store = make_mock_memory_store() task = make_task() result = make_result() with patch("agentkit.tools.memory_tool.MemoryTool.execute", new_callable=AsyncMock) as mock_execute: mock_execute.return_value = {"success": True, "version": 2} # 3 recent reflections should trigger (no significant decay) for i in range(3): reflection = make_reflection(quality_score=0.3, patterns=["reasoning"]) evolved = await mixin.evolve_soul(task, result, memory_store, reflection=reflection) assert evolved is True @pytest.mark.asyncio async def test_no_memory_store_returns_false(self): """无 MemoryStore 时不触发进化""" config = SoulEvolutionConfig(min_reflections=1) mixin = EvolutionMixin(evolution_config=config) task = make_task() result = make_result() reflection = make_reflection(quality_score=0.3, patterns=["reasoning"]) evolved = await mixin.evolve_soul(task, result, memory_store=None, reflection=reflection) assert evolved is False @pytest.mark.asyncio async def test_high_quality_reflection_does_not_trigger(self): """高质量反思不触发进化(quality_score >= 0.5)""" config = SoulEvolutionConfig(min_reflections=1) mixin = EvolutionMixin(evolution_config=config) memory_store = make_mock_memory_store() task = make_task() result = make_result() # High quality reflection — should not even be recorded reflection = make_reflection(quality_score=0.8, patterns=["reasoning"]) evolved = await mixin.evolve_soul(task, result, memory_store, reflection=reflection) assert evolved is False @pytest.mark.asyncio async def test_no_suggestions_does_not_trigger(self): """无建议的反思不触发进化""" config = SoulEvolutionConfig(min_reflections=1) mixin = EvolutionMixin(evolution_config=config) memory_store = make_mock_memory_store() task = make_task() result = make_result() # Low quality but no suggestions reflection = make_reflection(quality_score=0.3, patterns=["reasoning"], suggestions=[]) evolved = await mixin.evolve_soul(task, result, memory_store, reflection=reflection) assert evolved is False