268 lines
8.8 KiB
Python
268 lines
8.8 KiB
Python
"""Tests for U8: Soul Dynamic Evolution — SOUL 动态进化与版本追踪."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from unittest.mock import AsyncMock
|
|
|
|
import pytest
|
|
|
|
from agentkit.core.protocol import TaskMessage, TaskResult, TaskStatus
|
|
from agentkit.evolution.lifecycle import EvolutionMixin
|
|
from agentkit.evolution.reflector import Reflection, Reflector
|
|
from agentkit.memory.profile import MemoryStore
|
|
from agentkit.tools.memory_tool import MemoryTool
|
|
|
|
|
|
# ── Helpers ──────────────────────────────────────────────────
|
|
|
|
|
|
@pytest.fixture
|
|
def store(tmp_path: Path) -> MemoryStore:
|
|
return MemoryStore(base_dir=tmp_path)
|
|
|
|
|
|
@pytest.fixture
|
|
def tool(store: MemoryStore) -> MemoryTool:
|
|
return MemoryTool(memory_store=store)
|
|
|
|
|
|
def _make_task(task_id: str = "test-001") -> TaskMessage:
|
|
return TaskMessage(
|
|
task_id=task_id,
|
|
agent_name="evolving_agent",
|
|
task_type="echo",
|
|
priority=0,
|
|
input_data={"query": "hello"},
|
|
callback_url=None,
|
|
created_at=datetime.now(timezone.utc),
|
|
)
|
|
|
|
|
|
def _make_result(status: str = TaskStatus.COMPLETED) -> TaskResult:
|
|
return TaskResult(
|
|
task_id="test-001",
|
|
agent_name="evolving_agent",
|
|
status=status,
|
|
output_data={"key": "value"},
|
|
error_message=None,
|
|
started_at=datetime.now(timezone.utc),
|
|
completed_at=datetime.now(timezone.utc),
|
|
metrics={"elapsed_seconds": 5.0},
|
|
)
|
|
|
|
|
|
class LowQualityReflector(Reflector):
|
|
"""总是产生低质量结果和改进建议的 Reflector."""
|
|
|
|
async def reflect(self, task, result):
|
|
return Reflection(
|
|
task_id=task.task_id,
|
|
agent_name=result.agent_name,
|
|
outcome="failure",
|
|
quality_score=0.2,
|
|
patterns=["slow_execution"],
|
|
insights=["Low quality score indicates potential issues"],
|
|
suggestions=["Consider prompt optimization for this task type"],
|
|
)
|
|
|
|
|
|
class HighQualityReflector(Reflector):
|
|
"""总是产生高质量结果的 Reflector."""
|
|
|
|
async def reflect(self, task, result):
|
|
return Reflection(
|
|
task_id=task.task_id,
|
|
agent_name=result.agent_name,
|
|
outcome="success",
|
|
quality_score=0.8,
|
|
patterns=["fast_execution"],
|
|
insights=[],
|
|
suggestions=[],
|
|
)
|
|
|
|
|
|
class LowQualityNoSuggestionsReflector(Reflector):
|
|
"""低质量但没有建议的 Reflector."""
|
|
|
|
async def reflect(self, task, result):
|
|
return Reflection(
|
|
task_id=task.task_id,
|
|
agent_name=result.agent_name,
|
|
outcome="failure",
|
|
quality_score=0.2,
|
|
patterns=["slow_execution"],
|
|
insights=["Low quality"],
|
|
suggestions=[],
|
|
)
|
|
|
|
|
|
# ── MemoryTool update_soul action 测试 ──────────────────────
|
|
|
|
|
|
class TestMemoryToolUpdateSoul:
|
|
"""MemoryTool update_soul 操作测试."""
|
|
|
|
async def test_basic_update_increments_version(self, tool: MemoryTool, store: MemoryStore):
|
|
"""基本更新会递增版本号."""
|
|
# 初始化 SOUL
|
|
store.get_file("soul").write("## 身份\n我是助手")
|
|
|
|
result = await tool.execute(
|
|
action="update_soul",
|
|
file="soul",
|
|
section="性格",
|
|
content="更加耐心",
|
|
)
|
|
assert result["success"] is True
|
|
assert result["version"] == 2
|
|
|
|
# 验证版本 section
|
|
version_content = store.get_file("soul").read_section("版本")
|
|
assert "版本: 2" in version_content
|
|
|
|
async def test_creates_version_section_if_missing(self, tool: MemoryTool, store: MemoryStore):
|
|
"""如果不存在版本 section 则创建."""
|
|
store.get_file("soul").write("## 身份\n我是助手")
|
|
|
|
result = await tool.execute(
|
|
action="update_soul",
|
|
file="soul",
|
|
section="性格",
|
|
content="友好",
|
|
)
|
|
assert result["success"] is True
|
|
assert result["version"] == 2
|
|
|
|
# 版本 section 应该存在
|
|
sections = store.get_file("soul").list_sections()
|
|
assert "版本" in sections
|
|
|
|
async def test_adds_update_history_entry(self, tool: MemoryTool, store: MemoryStore):
|
|
"""更新历史条目被正确添加."""
|
|
store.get_file("soul").write("## 身份\n我是助手")
|
|
|
|
result = await tool.execute(
|
|
action="update_soul",
|
|
file="soul",
|
|
section="性格",
|
|
content="更加耐心",
|
|
reason="用户反馈需要更耐心",
|
|
)
|
|
assert result["success"] is True
|
|
|
|
history_content = store.get_file("soul").read_section("更新历史")
|
|
assert "v2" in history_content
|
|
assert "性格" in history_content
|
|
assert "用户反馈需要更耐心" in history_content
|
|
|
|
async def test_history_limited_to_10_entries(self, tool: MemoryTool, store: MemoryStore):
|
|
"""更新历史最多保留 10 条."""
|
|
store.get_file("soul").write("## 身份\n我是助手")
|
|
|
|
# 执行 12 次更新
|
|
for i in range(12):
|
|
result = await tool.execute(
|
|
action="update_soul",
|
|
file="soul",
|
|
section=f"section_{i}",
|
|
content=f"content_{i}",
|
|
)
|
|
assert result["success"] is True
|
|
|
|
history_content = store.get_file("soul").read_section("更新历史")
|
|
lines = [line for line in history_content.strip().split("\n") if line.strip()]
|
|
assert len(lines) <= 10
|
|
|
|
async def test_requires_section_and_content(self, tool: MemoryTool, store: MemoryStore):
|
|
"""缺少 section 或 content 时返回错误."""
|
|
store.get_file("soul").write("## 身份\n我是助手")
|
|
|
|
# 缺少 section
|
|
result = await tool.execute(
|
|
action="update_soul",
|
|
file="soul",
|
|
content="内容",
|
|
)
|
|
assert result["success"] is False
|
|
assert "section" in result.get("error", "").lower()
|
|
|
|
# 缺少 content
|
|
result = await tool.execute(
|
|
action="update_soul",
|
|
file="soul",
|
|
section="性格",
|
|
)
|
|
assert result["success"] is False
|
|
assert "content" in result.get("error", "").lower()
|
|
|
|
async def test_invalid_action_still_rejected(self, tool: MemoryTool):
|
|
"""无效 action 仍然被拒绝."""
|
|
result = await tool.execute(action="delete_everything", file="soul")
|
|
assert result["success"] is False
|
|
assert "Unknown action" in result.get("error", "")
|
|
|
|
|
|
# ── EvolutionMixin.evolve_soul 测试 ──────────────────────────
|
|
|
|
|
|
class TestEvolveSoul:
|
|
"""EvolutionMixin.evolve_soul 测试."""
|
|
|
|
async def test_no_update_when_fewer_than_3_reflections(self, store: MemoryStore):
|
|
"""少于 3 次同类反思时不触发 soul 更新."""
|
|
reflector = LowQualityReflector()
|
|
mixin = EvolutionMixin(reflector=reflector)
|
|
|
|
task = _make_task()
|
|
result = _make_result()
|
|
|
|
# 只调用 2 次,不够 3 次阈值
|
|
for _ in range(2):
|
|
updated = await mixin.evolve_soul(task, result, memory_store=store)
|
|
assert updated is False
|
|
|
|
async def test_triggers_update_when_3_same_category_reflections(self, store: MemoryStore):
|
|
"""同类反思累积 >= 3 次时触发 soul 更新."""
|
|
reflector = LowQualityReflector()
|
|
mixin = EvolutionMixin(reflector=reflector)
|
|
|
|
task = _make_task()
|
|
result = _make_result()
|
|
|
|
# 前 2 次不触发
|
|
for _ in range(2):
|
|
updated = await mixin.evolve_soul(task, result, memory_store=store)
|
|
assert updated is False
|
|
|
|
# 第 3 次触发
|
|
updated = await mixin.evolve_soul(task, result, memory_store=store)
|
|
assert updated is True
|
|
|
|
# 验证 SOUL 被更新了
|
|
soul_content = store.get_file("soul").read()
|
|
assert "slow_execution" in soul_content
|
|
|
|
async def test_no_update_without_memory_store(self):
|
|
"""没有 memory_store 时不触发更新."""
|
|
reflector = LowQualityReflector()
|
|
mixin = EvolutionMixin(reflector=reflector)
|
|
|
|
task = _make_task()
|
|
result = _make_result()
|
|
|
|
updated = await mixin.evolve_soul(task, result, memory_store=None)
|
|
assert updated is False
|
|
|
|
async def test_no_update_when_quality_score_above_threshold(self, store: MemoryStore):
|
|
"""quality_score >= 0.5 时不触发更新."""
|
|
reflector = HighQualityReflector()
|
|
mixin = EvolutionMixin(reflector=reflector)
|
|
|
|
task = _make_task()
|
|
result = _make_result()
|
|
|
|
updated = await mixin.evolve_soul(task, result, memory_store=store)
|
|
assert updated is False
|