"""Tests for U8: Soul Dynamic Evolution — SOUL 动态进化与版本追踪.""" from __future__ import annotations from datetime import datetime, timedelta, timezone from pathlib import Path from unittest.mock import AsyncMock import pytest from agentkit.core.protocol import TaskMessage, TaskResult, TaskStatus from agentkit.evolution.lifecycle import EvolutionMixin, SoulEvolutionConfig from agentkit.evolution.reflector import Reflection, Reflector from agentkit.memory.profile import MemoryStore from agentkit.tools.memory_tool import MemoryTool # ── Helpers ────────────────────────────────────────────────── @pytest.fixture def store(tmp_path: Path) -> MemoryStore: return MemoryStore(base_dir=tmp_path) @pytest.fixture def tool(store: MemoryStore) -> MemoryTool: return MemoryTool(memory_store=store) def _make_task(task_id: str = "test-001") -> TaskMessage: return TaskMessage( task_id=task_id, agent_name="evolving_agent", task_type="echo", priority=0, input_data={"query": "hello"}, callback_url=None, created_at=datetime.now(timezone.utc), ) def _make_result(status: str = TaskStatus.COMPLETED) -> TaskResult: return TaskResult( task_id="test-001", agent_name="evolving_agent", status=status, output_data={"key": "value"}, error_message=None, started_at=datetime.now(timezone.utc), completed_at=datetime.now(timezone.utc), metrics={"elapsed_seconds": 5.0}, ) class LowQualityReflector(Reflector): """总是产生低质量结果和改进建议的 Reflector.""" async def reflect(self, task, result): return Reflection( task_id=task.task_id, agent_name=result.agent_name, outcome="failure", quality_score=0.2, patterns=["slow_execution"], insights=["Low quality score indicates potential issues"], suggestions=["Consider prompt optimization for this task type"], ) class HighQualityReflector(Reflector): """总是产生高质量结果的 Reflector.""" async def reflect(self, task, result): return Reflection( task_id=task.task_id, agent_name=result.agent_name, outcome="success", quality_score=0.8, patterns=["fast_execution"], insights=[], suggestions=[], ) class LowQualityNoSuggestionsReflector(Reflector): """低质量但没有建议的 Reflector.""" async def reflect(self, task, result): return Reflection( task_id=task.task_id, agent_name=result.agent_name, outcome="failure", quality_score=0.2, patterns=["slow_execution"], insights=["Low quality"], suggestions=[], ) # ── MemoryTool update_soul action 测试 ────────────────────── class TestMemoryToolUpdateSoul: """MemoryTool update_soul 操作测试.""" async def test_basic_update_increments_version(self, tool: MemoryTool, store: MemoryStore): """基本更新会递增版本号.""" # 初始化 SOUL store.get_file("soul").write("## 身份\n我是助手") result = await tool.execute( action="update_soul", file="soul", section="性格", content="更加耐心", ) assert result["success"] is True assert result["version"] == 2 # 验证版本 section version_content = store.get_file("soul").read_section("版本") assert "版本: 2" in version_content async def test_creates_version_section_if_missing(self, tool: MemoryTool, store: MemoryStore): """如果不存在版本 section 则创建.""" store.get_file("soul").write("## 身份\n我是助手") result = await tool.execute( action="update_soul", file="soul", section="性格", content="友好", ) assert result["success"] is True assert result["version"] == 2 # 版本 section 应该存在 sections = store.get_file("soul").list_sections() assert "版本" in sections async def test_adds_update_history_entry(self, tool: MemoryTool, store: MemoryStore): """更新历史条目被正确添加.""" store.get_file("soul").write("## 身份\n我是助手") result = await tool.execute( action="update_soul", file="soul", section="性格", content="更加耐心", reason="用户反馈需要更耐心", ) assert result["success"] is True history_content = store.get_file("soul").read_section("更新历史") assert "v2" in history_content assert "性格" in history_content assert "用户反馈需要更耐心" in history_content async def test_history_limited_to_10_entries(self, tool: MemoryTool, store: MemoryStore): """更新历史最多保留 10 条.""" store.get_file("soul").write("## 身份\n我是助手") # 执行 12 次更新 for i in range(12): result = await tool.execute( action="update_soul", file="soul", section=f"section_{i}", content=f"content_{i}", ) assert result["success"] is True history_content = store.get_file("soul").read_section("更新历史") lines = [line for line in history_content.strip().split("\n") if line.strip()] assert len(lines) <= 10 async def test_requires_section_and_content(self, tool: MemoryTool, store: MemoryStore): """缺少 section 或 content 时返回错误.""" store.get_file("soul").write("## 身份\n我是助手") # 缺少 section result = await tool.execute( action="update_soul", file="soul", content="内容", ) assert result["success"] is False assert "section" in result.get("error", "").lower() # 缺少 content result = await tool.execute( action="update_soul", file="soul", section="性格", ) assert result["success"] is False assert "content" in result.get("error", "").lower() async def test_invalid_action_still_rejected(self, tool: MemoryTool): """无效 action 仍然被拒绝.""" result = await tool.execute(action="delete_everything", file="soul") assert result["success"] is False assert "Unknown action" in result.get("error", "") # ── EvolutionMixin.evolve_soul 测试 ────────────────────────── class TestEvolveSoul: """EvolutionMixin.evolve_soul 测试.""" async def test_no_update_when_fewer_than_3_reflections(self, store: MemoryStore): """少于 3 次同类反思时不触发 soul 更新.""" reflector = LowQualityReflector() mixin = EvolutionMixin(reflector=reflector) task = _make_task() result = _make_result() # 只调用 2 次,不够 3 次阈值 for _ in range(2): updated = await mixin.evolve_soul(task, result, memory_store=store) assert updated is False async def test_triggers_update_when_3_same_category_reflections(self, store: MemoryStore): """同类反思累积 >= 3 次时触发 soul 更新.""" reflector = LowQualityReflector() mixin = EvolutionMixin(reflector=reflector) task = _make_task() result = _make_result() # 前 2 次不触发 for _ in range(2): updated = await mixin.evolve_soul(task, result, memory_store=store) assert updated is False # 第 3 次触发 updated = await mixin.evolve_soul(task, result, memory_store=store) assert updated is True # 验证 SOUL 被更新了 soul_content = store.get_file("soul").read() assert "slow_execution" in soul_content async def test_no_update_without_memory_store(self): """没有 memory_store 时不触发更新.""" reflector = LowQualityReflector() mixin = EvolutionMixin(reflector=reflector) task = _make_task() result = _make_result() updated = await mixin.evolve_soul(task, result, memory_store=None) assert updated is False async def test_no_update_when_quality_score_above_threshold(self, store: MemoryStore): """quality_score >= 0.5 时不触发更新.""" reflector = HighQualityReflector() mixin = EvolutionMixin(reflector=reflector) task = _make_task() result = _make_result() updated = await mixin.evolve_soul(task, result, memory_store=store) assert updated is False # ── Multi-dimensional trigger tests ────────────────────────── class TestTimeDecay: """时间衰减触发测试.""" async def test_recent_reflections_count_fully(self, store: MemoryStore): """窗口内的反思完全计入有效数量.""" config = SoulEvolutionConfig( min_reflections=3, reflection_window_seconds=3600, time_decay_factor=0.5, ) reflector = LowQualityReflector() mixin = EvolutionMixin(reflector=reflector, evolution_config=config) task = _make_task() result = _make_result() # 3 次近期反思应触发 for _ in range(2): updated = await mixin.evolve_soul(task, result, memory_store=store) assert updated is False updated = await mixin.evolve_soul(task, result, memory_store=store) assert updated is True async def test_old_reflections_decay(self, store: MemoryStore): """旧反思因时间衰减导致有效数量不足,不触发.""" config = SoulEvolutionConfig( min_reflections=3, reflection_window_seconds=3600, time_decay_factor=0.5, ) reflector = LowQualityReflector() mixin = EvolutionMixin(reflector=reflector, evolution_config=config) task = _make_task() result = _make_result() # 手动插入 2 个旧反思(10 小时前) old_time = datetime.now(timezone.utc) - timedelta(hours=10) for _ in range(2): mixin.pending_soul_updates.setdefault("slow_execution", []).append( { "reflection": Reflection( task_id="old", agent_name="evolving_agent", outcome="failure", quality_score=0.2, patterns=["slow_execution"], insights=[], suggestions=["Improve speed"], ), "timestamp": old_time, "score": 0.2, "task_type": "", } ) # 1 个新反思:2*0.5^10 + 1 ≈ 1.002 < 3,不触发 updated = await mixin.evolve_soul(task, result, memory_store=store) assert updated is False class TestQualityGradient: """质量梯度触发测试.""" async def test_declining_scores_trigger_early(self, store: MemoryStore): """连续 3 次分数下降超过阈值时提前触发.""" config = SoulEvolutionConfig( min_reflections=3, quality_gradient_threshold=-0.15, ) reflector = LowQualityReflector() mixin = EvolutionMixin(reflector=reflector, evolution_config=config) task = _make_task() result = _make_result() # 手动插入 2 个反思,分数递减 mixin.pending_soul_updates.setdefault("slow_execution", []).append( { "reflection": Reflection( task_id="g1", agent_name="evolving_agent", outcome="failure", quality_score=0.4, patterns=["slow_execution"], insights=[], suggestions=["Improve"], ), "timestamp": datetime.now(timezone.utc), "score": 0.4, "task_type": "", } ) mixin.pending_soul_updates["slow_execution"].append( { "reflection": Reflection( task_id="g2", agent_name="evolving_agent", outcome="failure", quality_score=0.2, patterns=["slow_execution"], insights=[], suggestions=["Improve more"], ), "timestamp": datetime.now(timezone.utc), "score": 0.2, "task_type": "", } ) # 第 3 个反思:score=0.0,下降 0.2 > 0.15 阈值,触发 updated = await mixin.evolve_soul( task, result, memory_store=store, score=0.0 ) assert updated is True async def test_stable_scores_do_not_trigger_early(self, store: MemoryStore): """分数稳定时不提前触发.""" config = SoulEvolutionConfig( min_reflections=3, quality_gradient_threshold=-0.15, ) reflector = LowQualityReflector() mixin = EvolutionMixin(reflector=reflector, evolution_config=config) task = _make_task() result = _make_result() # 2 个分数稳定的反思 mixin.pending_soul_updates.setdefault("slow_execution", []).append( { "reflection": Reflection( task_id="s1", agent_name="evolving_agent", outcome="failure", quality_score=0.2, patterns=["slow_execution"], insights=[], suggestions=["Improve"], ), "timestamp": datetime.now(timezone.utc), "score": 0.2, "task_type": "", } ) mixin.pending_soul_updates["slow_execution"].append( { "reflection": Reflection( task_id="s2", agent_name="evolving_agent", outcome="failure", quality_score=0.19, patterns=["slow_execution"], insights=[], suggestions=["Improve more"], ), "timestamp": datetime.now(timezone.utc), "score": 0.19, "task_type": "", } ) # 第 3 个反思:下降 0.01 < 0.15 阈值,不提前触发 # 但 effective_count=3 >= min_reflections=3,所以仍会触发 # 需要改为只有 2 个反思来测试"不提前触发" mixin2 = EvolutionMixin(reflector=reflector, evolution_config=config) mixin2.pending_soul_updates.setdefault("slow_execution", []).append( { "reflection": Reflection( task_id="s1", agent_name="evolving_agent", outcome="failure", quality_score=0.2, patterns=["slow_execution"], insights=[], suggestions=["Improve"], ), "timestamp": datetime.now(timezone.utc), "score": 0.2, "task_type": "", } ) # 只有 2 个反思,分数稳定,不应触发 updated = await mixin2.evolve_soul( task, result, memory_store=store, score=0.19 ) assert updated is False class TestTaskTypeWeight: """任务类型权重触发测试.""" async def test_code_generation_triggers_at_2(self, store: MemoryStore): """code_generation 类型权重 1.5,2 次反思即可触发 (2*1.5=3 >= 3).""" config = SoulEvolutionConfig( min_reflections=3, task_type_weights={"code_generation": 1.5, "chat": 0.5}, ) reflector = LowQualityReflector() mixin = EvolutionMixin(reflector=reflector, evolution_config=config) task = _make_task() result = _make_result() # 第 1 次 updated = await mixin.evolve_soul( task, result, memory_store=store, task_type="code_generation" ) assert updated is False # 第 2 次:effective_count=2, weight=1.5, weighted=3.0 >= 3,触发 updated = await mixin.evolve_soul( task, result, memory_store=store, task_type="code_generation" ) assert updated is True async def test_chat_needs_more_reflections(self, store: MemoryStore): """chat 类型权重 0.5,需要更多反思才能触发.""" config = SoulEvolutionConfig( min_reflections=3, task_type_weights={"code_generation": 1.5, "chat": 0.5}, ) reflector = LowQualityReflector() mixin = EvolutionMixin(reflector=reflector, evolution_config=config) task = _make_task() result = _make_result() # 4 次 chat 反思:effective_count=4, weight=0.5, weighted=2.0 < 3,不触发 for _ in range(4): updated = await mixin.evolve_soul( task, result, memory_store=store, task_type="chat" ) assert updated is False # 第 5 次触发:5 * 0.5 = 2.5 < 3,仍不触发 updated = await mixin.evolve_soul( task, result, memory_store=store, task_type="chat" ) assert updated is False # 第 6 次:6 * 0.5 = 3.0 >= 3,触发 updated = await mixin.evolve_soul( task, result, memory_store=store, task_type="chat" ) assert updated is True class TestBackwardCompatibility: """向后兼容性测试:无 config 时行为与之前一致.""" async def test_no_config_3_reflections_trigger(self, store: MemoryStore): """无 evolution_config 时,3 次反思触发(与原行为一致).""" reflector = LowQualityReflector() mixin = EvolutionMixin(reflector=reflector) task = _make_task() result = _make_result() # 前 2 次不触发 for _ in range(2): updated = await mixin.evolve_soul(task, result, memory_store=store) assert updated is False # 第 3 次触发 updated = await mixin.evolve_soul(task, result, memory_store=store) assert updated is True async def test_no_config_fewer_than_3_no_trigger(self, store: MemoryStore): """无 evolution_config 时,少于 3 次不触发.""" reflector = LowQualityReflector() mixin = EvolutionMixin(reflector=reflector) task = _make_task() result = _make_result() for _ in range(2): updated = await mixin.evolve_soul(task, result, memory_store=store) assert updated is False