fischer-agentkit/tests/unit/test_evolution_store_persis...

375 lines
14 KiB
Python

"""Tests for PersistentEvolutionStore - SQLite-backed evolution persistence"""
import os
import tempfile
import pytest
from agentkit.core.protocol import EvolutionEvent
from agentkit.evolution.evolution_store import (
InMemoryEvolutionStore,
PersistentEvolutionStore,
create_evolution_store,
)
# ── Fixtures ──────────────────────────────────────────────
@pytest.fixture
def db_path(tmp_path):
"""Provide a temporary SQLite database path."""
return str(tmp_path / "test_evolution.db")
@pytest.fixture
def store(db_path):
"""Create a PersistentEvolutionStore with a temporary database."""
return PersistentEvolutionStore(db_path=db_path)
@pytest.fixture
def sample_event():
"""A sample EvolutionEvent."""
return EvolutionEvent(
agent_name="test_agent",
change_type="prompt",
before={"prompt": "old prompt"},
after={"prompt": "new prompt"},
metrics={"accuracy": 0.9},
)
# ── record() + persistence tests ─────────────────────────
class TestRecordAndPersistence:
async def test_record_returns_event_id(self, store, sample_event):
event_id = await store.record(sample_event)
assert event_id is not None
assert isinstance(event_id, str)
assert len(event_id) > 0
async def test_record_sets_event_id_on_event(self, store, sample_event):
assert sample_event.event_id is None
await store.record(sample_event)
assert sample_event.event_id is not None
async def test_record_and_reopen_returns_event(self, db_path, sample_event):
"""Persistence test: record → close → reopen → list_events returns the event."""
store1 = PersistentEvolutionStore(db_path=db_path)
await store1.record(sample_event)
event_id = sample_event.event_id
del store1 # close
store2 = PersistentEvolutionStore(db_path=db_path)
events = await store2.list_events()
assert len(events) == 1
assert events[0]["id"] == event_id
assert events[0]["agent_name"] == "test_agent"
assert events[0]["change_type"] == "prompt"
async def test_record_event_data_roundtrip(self, store, sample_event):
"""Verify before/after/metrics are stored and retrieved correctly."""
await store.record(sample_event)
events = await store.list_events()
assert len(events) == 1
e = events[0]
assert e["before"] == {"prompt": "old prompt"}
assert e["after"] == {"prompt": "new prompt"}
assert e["metrics"] == {"accuracy": 0.9}
assert e["status"] == "active"
assert e["created_at"] is not None
# ── rollback() tests ──────────────────────────────────────
class TestRollback:
async def test_rollback_success(self, store, sample_event):
event_id = await store.record(sample_event)
result = await store.rollback(event_id)
assert result is True
events = await store.list_events()
assert len(events) == 1
assert events[0]["status"] == "rolled_back"
async def test_rollback_nonexistent_returns_false(self, store):
result = await store.rollback("nonexistent-id")
assert result is False
async def test_rollback_persists_across_reopen(self, db_path, sample_event):
"""Rollback status persists after reopening the database."""
store1 = PersistentEvolutionStore(db_path=db_path)
event_id = await store1.record(sample_event)
await store1.rollback(event_id)
del store1
store2 = PersistentEvolutionStore(db_path=db_path)
events = await store2.list_events()
assert events[0]["status"] == "rolled_back"
# ── list_events() tests ──────────────────────────────────
class TestListEvents:
async def test_list_events_empty(self, store):
events = await store.list_events()
assert events == []
async def test_list_events_filter_by_agent_name(self, store):
event_a = EvolutionEvent(
agent_name="agent_a", change_type="prompt", before={}, after={}
)
event_b = EvolutionEvent(
agent_name="agent_b", change_type="prompt", before={}, after={}
)
await store.record(event_a)
await store.record(event_b)
events = await store.list_events(agent_name="agent_a")
assert len(events) == 1
assert events[0]["agent_name"] == "agent_a"
async def test_list_events_filter_by_change_type(self, store):
event_prompt = EvolutionEvent(
agent_name="test", change_type="prompt", before={}, after={}
)
event_strategy = EvolutionEvent(
agent_name="test", change_type="strategy", before={}, after={}
)
await store.record(event_prompt)
await store.record(event_strategy)
events = await store.list_events(change_type="strategy")
assert len(events) == 1
assert events[0]["change_type"] == "strategy"
async def test_list_events_filter_by_status(self, store):
event = EvolutionEvent(
agent_name="test", change_type="prompt", before={}, after={}
)
event_id = await store.record(event)
await store.rollback(event_id)
active_events = await store.list_events(status="active")
assert len(active_events) == 0
rolled_back_events = await store.list_events(status="rolled_back")
assert len(rolled_back_events) == 1
assert rolled_back_events[0]["status"] == "rolled_back"
async def test_list_events_multiple_with_combined_filters(self, store):
"""Integration: record multiple events, list with filters."""
for i in range(3):
event = EvolutionEvent(
agent_name="agent_a" if i < 2 else "agent_b",
change_type="prompt" if i % 2 == 0 else "strategy",
before={},
after={},
)
await store.record(event)
# Filter by agent_name
events = await store.list_events(agent_name="agent_a")
assert len(events) == 2
# Filter by change_type
events = await store.list_events(change_type="strategy")
assert len(events) == 1
# Combined filter
events = await store.list_events(agent_name="agent_a", change_type="prompt")
assert len(events) == 1
async def test_list_events_ordered_by_created_at_desc(self, store):
"""Events are returned newest first."""
import asyncio
event1 = EvolutionEvent(
agent_name="test", change_type="prompt", before={"v": 1}, after={}
)
await store.record(event1)
await asyncio.sleep(0.01) # ensure different timestamps
event2 = EvolutionEvent(
agent_name="test", change_type="prompt", before={"v": 2}, after={}
)
await store.record(event2)
events = await store.list_events()
assert len(events) == 2
# Newest first
assert events[0]["before"]["v"] == 2
assert events[1]["before"]["v"] == 1
# ── Skill version tests ──────────────────────────────────
class TestSkillVersions:
async def test_record_and_list_skill_version(self, store):
vid = await store.record_skill_version(
skill_name="search",
version="v1",
content='{"prompt": "search for X"}',
)
assert vid is not None
versions = await store.list_skill_versions("search")
assert len(versions) == 1
assert versions[0]["skill_name"] == "search"
assert versions[0]["version"] == "v1"
assert versions[0]["content"] == '{"prompt": "search for X"}'
async def test_skill_version_with_parent(self, store):
await store.record_skill_version("search", "v1", '{"prompt": "v1"}')
await store.record_skill_version(
"search", "v2", '{"prompt": "v2"}', parent_version="v1"
)
versions = await store.list_skill_versions("search")
assert len(versions) == 2
# Newest first
assert versions[0]["version"] == "v2"
assert versions[0]["parent_version"] == "v1"
assert versions[1]["version"] == "v1"
assert versions[1]["parent_version"] is None
async def test_skill_versions_persist_across_reopen(self, db_path):
store1 = PersistentEvolutionStore(db_path=db_path)
await store1.record_skill_version("search", "v1", '{"prompt": "v1"}')
del store1
store2 = PersistentEvolutionStore(db_path=db_path)
versions = await store2.list_skill_versions("search")
assert len(versions) == 1
assert versions[0]["version"] == "v1"
async def test_list_skill_versions_empty(self, store):
versions = await store.list_skill_versions("nonexistent")
assert versions == []
# ── A/B test result tests ────────────────────────────────
class TestABTestResults:
async def test_record_and_get_ab_test_result(self, store):
rid = await store.record_ab_test_result(
test_id="test_001", variant="control", score=0.85, sample_count=10
)
assert rid is not None
results = await store.get_ab_test_results("test_001")
assert len(results) == 1
assert results[0]["test_id"] == "test_001"
assert results[0]["variant"] == "control"
assert results[0]["score"] == 0.85
assert results[0]["sample_count"] == 10
async def test_ab_test_multiple_variants(self, store):
await store.record_ab_test_result("test_001", "control", 0.8, 10)
await store.record_ab_test_result("test_001", "experiment", 0.9, 10)
results = await store.get_ab_test_results("test_001")
assert len(results) == 2
async def test_ab_test_results_persist_across_reopen(self, db_path):
store1 = PersistentEvolutionStore(db_path=db_path)
await store1.record_ab_test_result("test_001", "control", 0.8, 5)
del store1
store2 = PersistentEvolutionStore(db_path=db_path)
results = await store2.get_ab_test_results("test_001")
assert len(results) == 1
assert results[0]["variant"] == "control"
async def test_get_ab_test_results_empty(self, store):
results = await store.get_ab_test_results("nonexistent")
assert results == []
# ── InMemoryEvolutionStore tests ─────────────────────────
class TestInMemoryEvolutionStore:
async def test_record_and_list(self):
store = InMemoryEvolutionStore()
event = EvolutionEvent(
agent_name="test", change_type="prompt", before={}, after={}
)
event_id = await store.record(event)
assert event_id is not None
events = await store.list_events()
assert len(events) == 1
assert events[0]["agent_name"] == "test"
async def test_rollback(self):
store = InMemoryEvolutionStore()
event = EvolutionEvent(
agent_name="test", change_type="prompt", before={}, after={}
)
event_id = await store.record(event)
result = await store.rollback(event_id)
assert result is True
events = await store.list_events()
assert events[0]["status"] == "rolled_back"
async def test_rollback_nonexistent(self):
store = InMemoryEvolutionStore()
result = await store.rollback("nonexistent")
assert result is False
async def test_list_events_with_filters(self):
store = InMemoryEvolutionStore()
await store.record(
EvolutionEvent(agent_name="a", change_type="prompt", before={}, after={})
)
await store.record(
EvolutionEvent(agent_name="b", change_type="strategy", before={}, after={})
)
events = await store.list_events(agent_name="a")
assert len(events) == 1
async def test_skill_versions(self):
store = InMemoryEvolutionStore()
await store.record_skill_version("skill1", "v1", '{"data": 1}')
versions = await store.list_skill_versions("skill1")
assert len(versions) == 1
assert versions[0]["version"] == "v1"
async def test_ab_test_results(self):
store = InMemoryEvolutionStore()
await store.record_ab_test_result("t1", "control", 0.8, 5)
results = await store.get_ab_test_results("t1")
assert len(results) == 1
assert results[0]["variant"] == "control"
# ── create_evolution_store factory tests ──────────────────
class TestCreateEvolutionStore:
def test_create_memory_backend(self):
store = create_evolution_store(backend="memory")
assert isinstance(store, InMemoryEvolutionStore)
def test_create_sqlite_backend(self, tmp_path):
db_path = str(tmp_path / "factory_test.db")
store = create_evolution_store(backend="sqlite", db_path=db_path)
assert isinstance(store, PersistentEvolutionStore)
def test_create_default_backend(self):
store = create_evolution_store()
assert isinstance(store, InMemoryEvolutionStore)
def test_create_sql_backend_without_params_falls_back(self):
"""sql backend without session_factory/evolution_model falls back to memory."""
store = create_evolution_store(backend="sql")
assert isinstance(store, InMemoryEvolutionStore)