fischer-agentkit/src/agentkit/documents/db.py

122 lines
4.2 KiB
Python

"""SQLite persistence for document metadata.
Follows the aiosqlite bare-connection pattern from ``server/auth/models.py``:
no SQLAlchemy session injection, just ``async with aiosqlite.connect(...)``.
The documents table stores metadata; file bytes live on disk under
``data/uploads/``.
"""
from __future__ import annotations
import logging
import os
from collections.abc import Mapping
from pathlib import Path
import aiosqlite
from agentkit.documents.models import DocumentMeta
logger = logging.getLogger(__name__)
_PROJECT_ROOT = Path(__file__).parents[3]
DEFAULT_DOC_DB_PATH = Path(
os.environ.get("AGENTKIT_DOC_DB", _PROJECT_ROOT / "data" / "documents.db")
)
_SCHEMA_SQL = """
CREATE TABLE IF NOT EXISTS documents (
id TEXT PRIMARY KEY,
filename TEXT NOT NULL,
stored_name TEXT NOT NULL,
format TEXT NOT NULL,
size INTEGER NOT NULL,
conversation_id TEXT NOT NULL,
created_at TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_documents_conversation_id
ON documents(conversation_id);
"""
async def init_documents_db(db_path: str | Path | None = None) -> Path:
"""Create the documents table if it does not exist. Idempotent."""
path = Path(db_path) if db_path is not None else DEFAULT_DOC_DB_PATH
path.parent.mkdir(parents=True, exist_ok=True)
async with aiosqlite.connect(str(path)) as db:
db.row_factory = aiosqlite.Row
await db.execute("PRAGMA journal_mode=WAL")
await db.execute("PRAGMA busy_timeout = 5000")
await db.executescript(_SCHEMA_SQL)
await db.commit()
logger.info(f"Documents DB initialized at {path}")
return path
def _row_to_meta(row: aiosqlite.Row | Mapping[str, object]) -> DocumentMeta:
return DocumentMeta(
id=row["id"],
filename=row["filename"],
stored_name=row["stored_name"],
format=row["format"],
size=row["size"],
conversation_id=row["conversation_id"],
created_at=row["created_at"],
)
async def insert_document(meta: DocumentMeta, db_path: str | Path | None = None) -> None:
"""Insert a document metadata row."""
path = Path(db_path) if db_path is not None else DEFAULT_DOC_DB_PATH
async with aiosqlite.connect(str(path)) as db:
await db.execute(
"INSERT INTO documents (id, filename, stored_name, format, size, "
"conversation_id, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)",
(
meta.id,
meta.filename,
meta.stored_name,
meta.format,
meta.size,
meta.conversation_id,
meta.created_at,
),
)
await db.commit()
async def get_conversation_documents(
conversation_id: str, db_path: str | Path | None = None
) -> list[DocumentMeta]:
"""Return all documents for a conversation, newest first."""
path = Path(db_path) if db_path is not None else DEFAULT_DOC_DB_PATH
async with aiosqlite.connect(str(path)) as db:
db.row_factory = aiosqlite.Row
cursor = await db.execute(
"SELECT * FROM documents WHERE conversation_id = ? ORDER BY created_at DESC",
(conversation_id,),
)
rows = await cursor.fetchall()
return [_row_to_meta(row) for row in rows]
async def get_document_by_id(doc_id: str, db_path: str | Path | None = None) -> DocumentMeta | None:
"""Return a single document by id, or None if not found."""
path = Path(db_path) if db_path is not None else DEFAULT_DOC_DB_PATH
async with aiosqlite.connect(str(path)) as db:
db.row_factory = aiosqlite.Row
cursor = await db.execute("SELECT * FROM documents WHERE id = ?", (doc_id,))
row = await cursor.fetchone()
return _row_to_meta(row) if row else None
async def delete_document(doc_id: str, db_path: str | Path | None = None) -> bool:
"""Delete a document metadata row. Returns True if a row was deleted."""
path = Path(db_path) if db_path is not None else DEFAULT_DOC_DB_PATH
async with aiosqlite.connect(str(path)) as db:
cursor = await db.execute("DELETE FROM documents WHERE id = ?", (doc_id,))
await db.commit()
return cursor.rowcount > 0