553 lines
19 KiB
Python
553 lines
19 KiB
Python
"""Tests for U8: Soul Dynamic Evolution — SOUL 动态进化与版本追踪."""
|
||
|
||
from __future__ import annotations
|
||
|
||
from datetime import datetime, timedelta, timezone
|
||
from pathlib import Path
|
||
from unittest.mock import AsyncMock
|
||
|
||
import pytest
|
||
|
||
from agentkit.core.protocol import TaskMessage, TaskResult, TaskStatus
|
||
from agentkit.evolution.lifecycle import EvolutionMixin, SoulEvolutionConfig
|
||
from agentkit.evolution.reflector import Reflection, Reflector
|
||
from agentkit.memory.profile import MemoryStore
|
||
from agentkit.tools.memory_tool import MemoryTool
|
||
|
||
|
||
# ── Helpers ──────────────────────────────────────────────────
|
||
|
||
|
||
@pytest.fixture
|
||
def store(tmp_path: Path) -> MemoryStore:
|
||
return MemoryStore(base_dir=tmp_path)
|
||
|
||
|
||
@pytest.fixture
|
||
def tool(store: MemoryStore) -> MemoryTool:
|
||
return MemoryTool(memory_store=store)
|
||
|
||
|
||
def _make_task(task_id: str = "test-001") -> TaskMessage:
|
||
return TaskMessage(
|
||
task_id=task_id,
|
||
agent_name="evolving_agent",
|
||
task_type="echo",
|
||
priority=0,
|
||
input_data={"query": "hello"},
|
||
callback_url=None,
|
||
created_at=datetime.now(timezone.utc),
|
||
)
|
||
|
||
|
||
def _make_result(status: str = TaskStatus.COMPLETED) -> TaskResult:
|
||
return TaskResult(
|
||
task_id="test-001",
|
||
agent_name="evolving_agent",
|
||
status=status,
|
||
output_data={"key": "value"},
|
||
error_message=None,
|
||
started_at=datetime.now(timezone.utc),
|
||
completed_at=datetime.now(timezone.utc),
|
||
metrics={"elapsed_seconds": 5.0},
|
||
)
|
||
|
||
|
||
class LowQualityReflector(Reflector):
|
||
"""总是产生低质量结果和改进建议的 Reflector."""
|
||
|
||
async def reflect(self, task, result):
|
||
return Reflection(
|
||
task_id=task.task_id,
|
||
agent_name=result.agent_name,
|
||
outcome="failure",
|
||
quality_score=0.2,
|
||
patterns=["slow_execution"],
|
||
insights=["Low quality score indicates potential issues"],
|
||
suggestions=["Consider prompt optimization for this task type"],
|
||
)
|
||
|
||
|
||
class HighQualityReflector(Reflector):
|
||
"""总是产生高质量结果的 Reflector."""
|
||
|
||
async def reflect(self, task, result):
|
||
return Reflection(
|
||
task_id=task.task_id,
|
||
agent_name=result.agent_name,
|
||
outcome="success",
|
||
quality_score=0.8,
|
||
patterns=["fast_execution"],
|
||
insights=[],
|
||
suggestions=[],
|
||
)
|
||
|
||
|
||
class LowQualityNoSuggestionsReflector(Reflector):
|
||
"""低质量但没有建议的 Reflector."""
|
||
|
||
async def reflect(self, task, result):
|
||
return Reflection(
|
||
task_id=task.task_id,
|
||
agent_name=result.agent_name,
|
||
outcome="failure",
|
||
quality_score=0.2,
|
||
patterns=["slow_execution"],
|
||
insights=["Low quality"],
|
||
suggestions=[],
|
||
)
|
||
|
||
|
||
# ── MemoryTool update_soul action 测试 ──────────────────────
|
||
|
||
|
||
class TestMemoryToolUpdateSoul:
|
||
"""MemoryTool update_soul 操作测试."""
|
||
|
||
async def test_basic_update_increments_version(self, tool: MemoryTool, store: MemoryStore):
|
||
"""基本更新会递增版本号."""
|
||
# 初始化 SOUL
|
||
store.get_file("soul").write("## 身份\n我是助手")
|
||
|
||
result = await tool.execute(
|
||
action="update_soul",
|
||
file="soul",
|
||
section="性格",
|
||
content="更加耐心",
|
||
)
|
||
assert result["success"] is True
|
||
assert result["version"] == 2
|
||
|
||
# 验证版本 section
|
||
version_content = store.get_file("soul").read_section("版本")
|
||
assert "版本: 2" in version_content
|
||
|
||
async def test_creates_version_section_if_missing(self, tool: MemoryTool, store: MemoryStore):
|
||
"""如果不存在版本 section 则创建."""
|
||
store.get_file("soul").write("## 身份\n我是助手")
|
||
|
||
result = await tool.execute(
|
||
action="update_soul",
|
||
file="soul",
|
||
section="性格",
|
||
content="友好",
|
||
)
|
||
assert result["success"] is True
|
||
assert result["version"] == 2
|
||
|
||
# 版本 section 应该存在
|
||
sections = store.get_file("soul").list_sections()
|
||
assert "版本" in sections
|
||
|
||
async def test_adds_update_history_entry(self, tool: MemoryTool, store: MemoryStore):
|
||
"""更新历史条目被正确添加."""
|
||
store.get_file("soul").write("## 身份\n我是助手")
|
||
|
||
result = await tool.execute(
|
||
action="update_soul",
|
||
file="soul",
|
||
section="性格",
|
||
content="更加耐心",
|
||
reason="用户反馈需要更耐心",
|
||
)
|
||
assert result["success"] is True
|
||
|
||
history_content = store.get_file("soul").read_section("更新历史")
|
||
assert "v2" in history_content
|
||
assert "性格" in history_content
|
||
assert "用户反馈需要更耐心" in history_content
|
||
|
||
async def test_history_limited_to_10_entries(self, tool: MemoryTool, store: MemoryStore):
|
||
"""更新历史最多保留 10 条."""
|
||
store.get_file("soul").write("## 身份\n我是助手")
|
||
|
||
# 执行 12 次更新
|
||
for i in range(12):
|
||
result = await tool.execute(
|
||
action="update_soul",
|
||
file="soul",
|
||
section=f"section_{i}",
|
||
content=f"content_{i}",
|
||
)
|
||
assert result["success"] is True
|
||
|
||
history_content = store.get_file("soul").read_section("更新历史")
|
||
lines = [line for line in history_content.strip().split("\n") if line.strip()]
|
||
assert len(lines) <= 10
|
||
|
||
async def test_requires_section_and_content(self, tool: MemoryTool, store: MemoryStore):
|
||
"""缺少 section 或 content 时返回错误."""
|
||
store.get_file("soul").write("## 身份\n我是助手")
|
||
|
||
# 缺少 section
|
||
result = await tool.execute(
|
||
action="update_soul",
|
||
file="soul",
|
||
content="内容",
|
||
)
|
||
assert result["success"] is False
|
||
assert "section" in result.get("error", "").lower()
|
||
|
||
# 缺少 content
|
||
result = await tool.execute(
|
||
action="update_soul",
|
||
file="soul",
|
||
section="性格",
|
||
)
|
||
assert result["success"] is False
|
||
assert "content" in result.get("error", "").lower()
|
||
|
||
async def test_invalid_action_still_rejected(self, tool: MemoryTool):
|
||
"""无效 action 仍然被拒绝."""
|
||
result = await tool.execute(action="delete_everything", file="soul")
|
||
assert result["success"] is False
|
||
assert "Unknown action" in result.get("error", "")
|
||
|
||
|
||
# ── EvolutionMixin.evolve_soul 测试 ──────────────────────────
|
||
|
||
|
||
class TestEvolveSoul:
|
||
"""EvolutionMixin.evolve_soul 测试."""
|
||
|
||
async def test_no_update_when_fewer_than_3_reflections(self, store: MemoryStore):
|
||
"""少于 3 次同类反思时不触发 soul 更新."""
|
||
reflector = LowQualityReflector()
|
||
mixin = EvolutionMixin(reflector=reflector)
|
||
|
||
task = _make_task()
|
||
result = _make_result()
|
||
|
||
# 只调用 2 次,不够 3 次阈值
|
||
for _ in range(2):
|
||
updated = await mixin.evolve_soul(task, result, memory_store=store)
|
||
assert updated is False
|
||
|
||
async def test_triggers_update_when_3_same_category_reflections(self, store: MemoryStore):
|
||
"""同类反思累积 >= 3 次时触发 soul 更新."""
|
||
reflector = LowQualityReflector()
|
||
mixin = EvolutionMixin(reflector=reflector)
|
||
|
||
task = _make_task()
|
||
result = _make_result()
|
||
|
||
# 前 2 次不触发
|
||
for _ in range(2):
|
||
updated = await mixin.evolve_soul(task, result, memory_store=store)
|
||
assert updated is False
|
||
|
||
# 第 3 次触发
|
||
updated = await mixin.evolve_soul(task, result, memory_store=store)
|
||
assert updated is True
|
||
|
||
# 验证 SOUL 被更新了
|
||
soul_content = store.get_file("soul").read()
|
||
assert "slow_execution" in soul_content
|
||
|
||
async def test_no_update_without_memory_store(self):
|
||
"""没有 memory_store 时不触发更新."""
|
||
reflector = LowQualityReflector()
|
||
mixin = EvolutionMixin(reflector=reflector)
|
||
|
||
task = _make_task()
|
||
result = _make_result()
|
||
|
||
updated = await mixin.evolve_soul(task, result, memory_store=None)
|
||
assert updated is False
|
||
|
||
async def test_no_update_when_quality_score_above_threshold(self, store: MemoryStore):
|
||
"""quality_score >= 0.5 时不触发更新."""
|
||
reflector = HighQualityReflector()
|
||
mixin = EvolutionMixin(reflector=reflector)
|
||
|
||
task = _make_task()
|
||
result = _make_result()
|
||
|
||
updated = await mixin.evolve_soul(task, result, memory_store=store)
|
||
assert updated is False
|
||
|
||
|
||
# ── Multi-dimensional trigger tests ──────────────────────────
|
||
|
||
|
||
class TestTimeDecay:
|
||
"""时间衰减触发测试."""
|
||
|
||
async def test_recent_reflections_count_fully(self, store: MemoryStore):
|
||
"""窗口内的反思完全计入有效数量."""
|
||
config = SoulEvolutionConfig(
|
||
min_reflections=3,
|
||
reflection_window_seconds=3600,
|
||
time_decay_factor=0.5,
|
||
)
|
||
reflector = LowQualityReflector()
|
||
mixin = EvolutionMixin(reflector=reflector, evolution_config=config)
|
||
|
||
task = _make_task()
|
||
result = _make_result()
|
||
|
||
# 3 次近期反思应触发
|
||
for _ in range(2):
|
||
updated = await mixin.evolve_soul(task, result, memory_store=store)
|
||
assert updated is False
|
||
|
||
updated = await mixin.evolve_soul(task, result, memory_store=store)
|
||
assert updated is True
|
||
|
||
async def test_old_reflections_decay(self, store: MemoryStore):
|
||
"""旧反思因时间衰减导致有效数量不足,不触发."""
|
||
config = SoulEvolutionConfig(
|
||
min_reflections=3,
|
||
reflection_window_seconds=3600,
|
||
time_decay_factor=0.5,
|
||
)
|
||
reflector = LowQualityReflector()
|
||
mixin = EvolutionMixin(reflector=reflector, evolution_config=config)
|
||
|
||
task = _make_task()
|
||
result = _make_result()
|
||
|
||
# 手动插入 2 个旧反思(10 小时前)
|
||
old_time = datetime.now(timezone.utc) - timedelta(hours=10)
|
||
for _ in range(2):
|
||
mixin.pending_soul_updates.setdefault("slow_execution", []).append(
|
||
{
|
||
"reflection": Reflection(
|
||
task_id="old",
|
||
agent_name="evolving_agent",
|
||
outcome="failure",
|
||
quality_score=0.2,
|
||
patterns=["slow_execution"],
|
||
insights=[],
|
||
suggestions=["Improve speed"],
|
||
),
|
||
"timestamp": old_time,
|
||
"score": 0.2,
|
||
"task_type": "",
|
||
}
|
||
)
|
||
|
||
# 1 个新反思:2*0.5^10 + 1 ≈ 1.002 < 3,不触发
|
||
updated = await mixin.evolve_soul(task, result, memory_store=store)
|
||
assert updated is False
|
||
|
||
|
||
class TestQualityGradient:
|
||
"""质量梯度触发测试."""
|
||
|
||
async def test_declining_scores_trigger_early(self, store: MemoryStore):
|
||
"""连续 3 次分数下降超过阈值时提前触发."""
|
||
config = SoulEvolutionConfig(
|
||
min_reflections=3,
|
||
quality_gradient_threshold=-0.15,
|
||
)
|
||
reflector = LowQualityReflector()
|
||
mixin = EvolutionMixin(reflector=reflector, evolution_config=config)
|
||
|
||
task = _make_task()
|
||
result = _make_result()
|
||
|
||
# 手动插入 2 个反思,分数递减
|
||
mixin.pending_soul_updates.setdefault("slow_execution", []).append(
|
||
{
|
||
"reflection": Reflection(
|
||
task_id="g1",
|
||
agent_name="evolving_agent",
|
||
outcome="failure",
|
||
quality_score=0.4,
|
||
patterns=["slow_execution"],
|
||
insights=[],
|
||
suggestions=["Improve"],
|
||
),
|
||
"timestamp": datetime.now(timezone.utc),
|
||
"score": 0.4,
|
||
"task_type": "",
|
||
}
|
||
)
|
||
mixin.pending_soul_updates["slow_execution"].append(
|
||
{
|
||
"reflection": Reflection(
|
||
task_id="g2",
|
||
agent_name="evolving_agent",
|
||
outcome="failure",
|
||
quality_score=0.2,
|
||
patterns=["slow_execution"],
|
||
insights=[],
|
||
suggestions=["Improve more"],
|
||
),
|
||
"timestamp": datetime.now(timezone.utc),
|
||
"score": 0.2,
|
||
"task_type": "",
|
||
}
|
||
)
|
||
|
||
# 第 3 个反思:score=0.0,下降 0.2 > 0.15 阈值,触发
|
||
updated = await mixin.evolve_soul(
|
||
task, result, memory_store=store, score=0.0
|
||
)
|
||
assert updated is True
|
||
|
||
async def test_stable_scores_do_not_trigger_early(self, store: MemoryStore):
|
||
"""分数稳定时不提前触发."""
|
||
config = SoulEvolutionConfig(
|
||
min_reflections=3,
|
||
quality_gradient_threshold=-0.15,
|
||
)
|
||
reflector = LowQualityReflector()
|
||
mixin = EvolutionMixin(reflector=reflector, evolution_config=config)
|
||
|
||
task = _make_task()
|
||
result = _make_result()
|
||
|
||
# 2 个分数稳定的反思
|
||
mixin.pending_soul_updates.setdefault("slow_execution", []).append(
|
||
{
|
||
"reflection": Reflection(
|
||
task_id="s1",
|
||
agent_name="evolving_agent",
|
||
outcome="failure",
|
||
quality_score=0.2,
|
||
patterns=["slow_execution"],
|
||
insights=[],
|
||
suggestions=["Improve"],
|
||
),
|
||
"timestamp": datetime.now(timezone.utc),
|
||
"score": 0.2,
|
||
"task_type": "",
|
||
}
|
||
)
|
||
mixin.pending_soul_updates["slow_execution"].append(
|
||
{
|
||
"reflection": Reflection(
|
||
task_id="s2",
|
||
agent_name="evolving_agent",
|
||
outcome="failure",
|
||
quality_score=0.19,
|
||
patterns=["slow_execution"],
|
||
insights=[],
|
||
suggestions=["Improve more"],
|
||
),
|
||
"timestamp": datetime.now(timezone.utc),
|
||
"score": 0.19,
|
||
"task_type": "",
|
||
}
|
||
)
|
||
|
||
# 第 3 个反思:下降 0.01 < 0.15 阈值,不提前触发
|
||
# 但 effective_count=3 >= min_reflections=3,所以仍会触发
|
||
# 需要改为只有 2 个反思来测试"不提前触发"
|
||
mixin2 = EvolutionMixin(reflector=reflector, evolution_config=config)
|
||
mixin2.pending_soul_updates.setdefault("slow_execution", []).append(
|
||
{
|
||
"reflection": Reflection(
|
||
task_id="s1",
|
||
agent_name="evolving_agent",
|
||
outcome="failure",
|
||
quality_score=0.2,
|
||
patterns=["slow_execution"],
|
||
insights=[],
|
||
suggestions=["Improve"],
|
||
),
|
||
"timestamp": datetime.now(timezone.utc),
|
||
"score": 0.2,
|
||
"task_type": "",
|
||
}
|
||
)
|
||
# 只有 2 个反思,分数稳定,不应触发
|
||
updated = await mixin2.evolve_soul(
|
||
task, result, memory_store=store, score=0.19
|
||
)
|
||
assert updated is False
|
||
|
||
|
||
class TestTaskTypeWeight:
|
||
"""任务类型权重触发测试."""
|
||
|
||
async def test_code_generation_triggers_at_2(self, store: MemoryStore):
|
||
"""code_generation 类型权重 1.5,2 次反思即可触发 (2*1.5=3 >= 3)."""
|
||
config = SoulEvolutionConfig(
|
||
min_reflections=3,
|
||
task_type_weights={"code_generation": 1.5, "chat": 0.5},
|
||
)
|
||
reflector = LowQualityReflector()
|
||
mixin = EvolutionMixin(reflector=reflector, evolution_config=config)
|
||
|
||
task = _make_task()
|
||
result = _make_result()
|
||
|
||
# 第 1 次
|
||
updated = await mixin.evolve_soul(
|
||
task, result, memory_store=store, task_type="code_generation"
|
||
)
|
||
assert updated is False
|
||
|
||
# 第 2 次:effective_count=2, weight=1.5, weighted=3.0 >= 3,触发
|
||
updated = await mixin.evolve_soul(
|
||
task, result, memory_store=store, task_type="code_generation"
|
||
)
|
||
assert updated is True
|
||
|
||
async def test_chat_needs_more_reflections(self, store: MemoryStore):
|
||
"""chat 类型权重 0.5,需要更多反思才能触发."""
|
||
config = SoulEvolutionConfig(
|
||
min_reflections=3,
|
||
task_type_weights={"code_generation": 1.5, "chat": 0.5},
|
||
)
|
||
reflector = LowQualityReflector()
|
||
mixin = EvolutionMixin(reflector=reflector, evolution_config=config)
|
||
|
||
task = _make_task()
|
||
result = _make_result()
|
||
|
||
# 4 次 chat 反思:effective_count=4, weight=0.5, weighted=2.0 < 3,不触发
|
||
for _ in range(4):
|
||
updated = await mixin.evolve_soul(
|
||
task, result, memory_store=store, task_type="chat"
|
||
)
|
||
assert updated is False
|
||
|
||
# 第 5 次触发:5 * 0.5 = 2.5 < 3,仍不触发
|
||
updated = await mixin.evolve_soul(
|
||
task, result, memory_store=store, task_type="chat"
|
||
)
|
||
assert updated is False
|
||
|
||
# 第 6 次:6 * 0.5 = 3.0 >= 3,触发
|
||
updated = await mixin.evolve_soul(
|
||
task, result, memory_store=store, task_type="chat"
|
||
)
|
||
assert updated is True
|
||
|
||
|
||
class TestBackwardCompatibility:
|
||
"""向后兼容性测试:无 config 时行为与之前一致."""
|
||
|
||
async def test_no_config_3_reflections_trigger(self, store: MemoryStore):
|
||
"""无 evolution_config 时,3 次反思触发(与原行为一致)."""
|
||
reflector = LowQualityReflector()
|
||
mixin = EvolutionMixin(reflector=reflector)
|
||
|
||
task = _make_task()
|
||
result = _make_result()
|
||
|
||
# 前 2 次不触发
|
||
for _ in range(2):
|
||
updated = await mixin.evolve_soul(task, result, memory_store=store)
|
||
assert updated is False
|
||
|
||
# 第 3 次触发
|
||
updated = await mixin.evolve_soul(task, result, memory_store=store)
|
||
assert updated is True
|
||
|
||
async def test_no_config_fewer_than_3_no_trigger(self, store: MemoryStore):
|
||
"""无 evolution_config 时,少于 3 次不触发."""
|
||
reflector = LowQualityReflector()
|
||
mixin = EvolutionMixin(reflector=reflector)
|
||
|
||
task = _make_task()
|
||
result = _make_result()
|
||
|
||
for _ in range(2):
|
||
updated = await mixin.evolve_soul(task, result, memory_store=store)
|
||
assert updated is False
|