229 lines
9.1 KiB
Python
229 lines
9.1 KiB
Python
"""Episodic Memory - 基于 pgvector + PostgreSQL 的任务经验记忆"""
|
||
|
||
import logging
|
||
import math
|
||
from datetime import datetime, timezone
|
||
from typing import Any
|
||
|
||
from agentkit.memory.base import Memory, MemoryItem
|
||
from agentkit.memory.embedder import Embedder
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class EpisodicMemory(Memory):
|
||
"""Episodic Memory - 记录每次任务的输入/输出/效果/反思
|
||
|
||
基于 pgvector + PostgreSQL 实现,支持语义检索和时间衰减。
|
||
生命周期:永久(可配置衰减)。
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
session_factory: Any,
|
||
episodic_model: Any,
|
||
embedder: Embedder | None = None,
|
||
decay_rate: float = 0.01,
|
||
alpha: float = 0.7,
|
||
):
|
||
"""
|
||
Args:
|
||
session_factory: 返回 async context manager 的工厂
|
||
episodic_model: EpisodicMemory ORM 模型类
|
||
embedder: 嵌入器,用于生成向量
|
||
decay_rate: 时间衰减率(越大衰减越快)
|
||
alpha: 混合评分权重,alpha * cosine + (1-alpha) * time_decay
|
||
"""
|
||
self._session_factory = session_factory
|
||
self._episodic_model = episodic_model
|
||
self._embedder = embedder
|
||
self._decay_rate = decay_rate
|
||
self._alpha = alpha
|
||
|
||
async def store(self, key: str, value: Any, metadata: dict[str, Any] | None = None) -> None:
|
||
"""存储任务经验"""
|
||
async with self._session_factory() as db:
|
||
try:
|
||
Model = self._episodic_model
|
||
meta = metadata or {}
|
||
|
||
# 生成 embedding
|
||
embedding = None
|
||
if self._embedder:
|
||
text = f"{key} {value}"
|
||
embedding = await self._embedder.embed(text)
|
||
|
||
entry = Model(
|
||
agent_name=meta.get("agent_name", ""),
|
||
task_type=meta.get("task_type", ""),
|
||
input_summary=str(value)[:500] if value else "",
|
||
output_summary=meta.get("output_summary", ""),
|
||
outcome=meta.get("outcome", "success"),
|
||
quality_score=meta.get("quality_score", 0.5),
|
||
reflection=meta.get("reflection", ""),
|
||
embedding=embedding,
|
||
)
|
||
db.add(entry)
|
||
await db.commit()
|
||
except Exception as e:
|
||
await db.rollback()
|
||
logger.error(f"Failed to store episodic memory: {e}")
|
||
raise
|
||
|
||
async def retrieve(self, key: str) -> MemoryItem | None:
|
||
"""按 key 语义检索(使用 embedding 相似度)"""
|
||
if not self._embedder:
|
||
return None
|
||
|
||
async with self._session_factory() as db:
|
||
try:
|
||
Model = self._episodic_model
|
||
from sqlalchemy import select
|
||
|
||
stmt = select(Model).order_by(Model.created_at.desc()).limit(50)
|
||
result = await db.execute(stmt)
|
||
entries = result.scalars().all()
|
||
|
||
if not entries:
|
||
return None
|
||
|
||
query_embedding = await self._embedder.embed(key)
|
||
best_item = None
|
||
best_score = -1.0
|
||
|
||
for entry in entries:
|
||
entry_embedding = entry.embedding
|
||
if entry_embedding is None:
|
||
continue
|
||
cosine = self._compute_cosine_similarity(query_embedding, entry_embedding)
|
||
if cosine > best_score:
|
||
best_score = cosine
|
||
best_item = entry
|
||
|
||
if best_item is None or best_score < 0.1:
|
||
return None
|
||
|
||
return MemoryItem(
|
||
key=str(best_item.id),
|
||
value={
|
||
"input_summary": best_item.input_summary,
|
||
"output_summary": best_item.output_summary,
|
||
"outcome": best_item.outcome,
|
||
"quality_score": best_item.quality_score,
|
||
"reflection": best_item.reflection,
|
||
},
|
||
metadata={
|
||
"agent_name": best_item.agent_name,
|
||
"task_type": best_item.task_type,
|
||
"created_at": best_item.created_at.isoformat() if best_item.created_at else None,
|
||
"cosine_similarity": best_score,
|
||
},
|
||
score=best_score,
|
||
created_at=best_item.created_at or datetime.now(timezone.utc),
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(f"Failed to retrieve episodic memory: {e}")
|
||
return None
|
||
|
||
async def search(self, query: str, top_k: int = 5, filters: dict[str, Any] | None = None) -> list[MemoryItem]:
|
||
"""语义检索相似历史案例"""
|
||
async with self._session_factory() as db:
|
||
try:
|
||
Model = self._episodic_model
|
||
filters = filters or {}
|
||
|
||
# 构建查询
|
||
from sqlalchemy import select
|
||
stmt = select(Model)
|
||
|
||
if filters.get("agent_name"):
|
||
stmt = stmt.where(Model.agent_name == filters["agent_name"])
|
||
if filters.get("task_type"):
|
||
stmt = stmt.where(Model.task_type == filters["task_type"])
|
||
if filters.get("outcome"):
|
||
stmt = stmt.where(Model.outcome == filters["outcome"])
|
||
|
||
stmt = stmt.order_by(Model.created_at.desc()).limit(top_k * 2)
|
||
|
||
result = await db.execute(stmt)
|
||
entries = result.scalars().all()
|
||
|
||
# 如果有 embedder,生成 query embedding
|
||
query_embedding = None
|
||
if self._embedder and entries:
|
||
query_embedding = await self._embedder.embed(query)
|
||
|
||
# 计算得分并构建 MemoryItem
|
||
items = []
|
||
for entry in entries:
|
||
age_hours = (datetime.now(timezone.utc) - entry.created_at).total_seconds() / 3600 if entry.created_at else 0
|
||
decay = math.exp(-self._decay_rate * age_hours)
|
||
time_decay_score = (entry.quality_score or 0.5) * decay
|
||
|
||
# 混合评分:alpha * cosine + (1 - alpha) * time_decay
|
||
if self._embedder and query_embedding is not None and entry.embedding is not None:
|
||
cosine_sim = self._compute_cosine_similarity(query_embedding, entry.embedding)
|
||
score = self._alpha * cosine_sim + (1 - self._alpha) * time_decay_score
|
||
else:
|
||
score = time_decay_score
|
||
|
||
items.append(MemoryItem(
|
||
key=str(entry.id),
|
||
value={
|
||
"input_summary": entry.input_summary,
|
||
"output_summary": entry.output_summary,
|
||
"outcome": entry.outcome,
|
||
"quality_score": entry.quality_score,
|
||
"reflection": entry.reflection,
|
||
},
|
||
metadata={
|
||
"agent_name": entry.agent_name,
|
||
"task_type": entry.task_type,
|
||
"created_at": entry.created_at.isoformat() if entry.created_at else None,
|
||
},
|
||
score=score,
|
||
created_at=entry.created_at or datetime.now(timezone.utc),
|
||
))
|
||
|
||
items.sort(key=lambda x: x.score, reverse=True)
|
||
return items[:top_k]
|
||
|
||
except Exception as e:
|
||
logger.error(f"Failed to search episodic memory: {e}")
|
||
return []
|
||
|
||
async def delete(self, key: str) -> bool:
|
||
"""删除指定经验"""
|
||
async with self._session_factory() as db:
|
||
try:
|
||
from sqlalchemy import select, delete as sql_delete
|
||
import uuid
|
||
Model = self._episodic_model
|
||
|
||
stmt = sql_delete(Model).where(Model.id == uuid.UUID(key))
|
||
await db.execute(stmt)
|
||
await db.commit()
|
||
return True
|
||
except Exception as e:
|
||
await db.rollback()
|
||
logger.error(f"Failed to delete episodic memory: {e}")
|
||
return False
|
||
|
||
@staticmethod
|
||
def _compute_cosine_similarity(vec_a: list[float], vec_b: list[float]) -> float:
|
||
"""计算两个向量的余弦相似度"""
|
||
if len(vec_a) != len(vec_b):
|
||
logger.warning(
|
||
f"Vector dimension mismatch: {len(vec_a)} vs {len(vec_b)}"
|
||
)
|
||
return 0.0
|
||
if not vec_a:
|
||
return 0.0
|
||
dot_product = sum(a * b for a, b in zip(vec_a, vec_b))
|
||
magnitude_a = sum(a**2 for a in vec_a) ** 0.5
|
||
magnitude_b = sum(b**2 for b in vec_b) ** 0.5
|
||
if magnitude_a == 0.0 or magnitude_b == 0.0:
|
||
return 0.0
|
||
return dot_product / (magnitude_a * magnitude_b)
|