218 lines
7.9 KiB
Python
218 lines
7.9 KiB
Python
import uuid
|
|
from datetime import datetime
|
|
|
|
from sqlalchemy import String, Integer, ForeignKey, Index, func, Text, DateTime
|
|
from sqlalchemy import Uuid
|
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
|
|
from app.database import Base, JSONType
|
|
|
|
# pgvector Vector type - imported conditionally
|
|
try:
|
|
from pgvector.sqlalchemy import Vector
|
|
_VECTOR_AVAILABLE = True
|
|
except ImportError:
|
|
# pgvector package not installed; Vector columns will be skipped in ORM
|
|
# The migration file handles the actual column creation via raw SQL
|
|
Vector = None
|
|
_VECTOR_AVAILABLE = False
|
|
|
|
|
|
class KnowledgeBase(Base):
|
|
__tablename__ = "knowledge_bases"
|
|
|
|
id: Mapped[uuid.UUID] = mapped_column(
|
|
Uuid(as_uuid=True),
|
|
primary_key=True,
|
|
default=uuid.uuid4,
|
|
)
|
|
organization_id: Mapped[uuid.UUID] = mapped_column(
|
|
Uuid(as_uuid=True),
|
|
ForeignKey("organizations.id", ondelete="CASCADE"),
|
|
nullable=False,
|
|
)
|
|
name: Mapped[str] = mapped_column(String(200), nullable=False)
|
|
type: Mapped[str] = mapped_column(String(20), nullable=False) # "industry" / "enterprise"
|
|
description: Mapped[str | None] = mapped_column(Text, nullable=True)
|
|
document_count: Mapped[int] = mapped_column(Integer, server_default="0", nullable=False)
|
|
status: Mapped[str] = mapped_column(String(20), server_default="active", nullable=False)
|
|
created_by: Mapped[str | None] = mapped_column(
|
|
String(36),
|
|
ForeignKey("users.id", ondelete="SET NULL"),
|
|
nullable=True,
|
|
)
|
|
created_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True),
|
|
server_default=func.now(),
|
|
nullable=False,
|
|
)
|
|
updated_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True),
|
|
server_default=func.now(),
|
|
onupdate=func.now(),
|
|
nullable=False,
|
|
)
|
|
|
|
# Relationships
|
|
organization: Mapped["Organization"] = relationship(
|
|
"Organization", back_populates="knowledge_bases"
|
|
)
|
|
creator: Mapped["User"] = relationship(
|
|
"User", foreign_keys=[created_by]
|
|
)
|
|
documents: Mapped[list["KnowledgeDocument"]] = relationship(
|
|
"KnowledgeDocument", back_populates="knowledge_base", cascade="all, delete-orphan"
|
|
)
|
|
|
|
__table_args__ = (
|
|
Index("idx_knowledge_bases_organization_id", "organization_id"),
|
|
Index("idx_knowledge_bases_type", "type"),
|
|
Index("idx_knowledge_bases_status", "status"),
|
|
)
|
|
|
|
|
|
class KnowledgeDocument(Base):
|
|
__tablename__ = "knowledge_documents"
|
|
|
|
id: Mapped[uuid.UUID] = mapped_column(
|
|
Uuid(as_uuid=True),
|
|
primary_key=True,
|
|
default=uuid.uuid4,
|
|
)
|
|
knowledge_base_id: Mapped[uuid.UUID] = mapped_column(
|
|
Uuid(as_uuid=True),
|
|
ForeignKey("knowledge_bases.id", ondelete="CASCADE"),
|
|
nullable=False,
|
|
)
|
|
title: Mapped[str] = mapped_column(String(500), nullable=False)
|
|
source_type: Mapped[str] = mapped_column(String(20), nullable=False) # "text" / "url" / "pdf" / "markdown"
|
|
source_url: Mapped[str | None] = mapped_column(String(2000), nullable=True)
|
|
content: Mapped[str] = mapped_column(Text, nullable=False)
|
|
content_hash: Mapped[str] = mapped_column(String(64), nullable=False)
|
|
chunk_count: Mapped[int] = mapped_column(Integer, server_default="0", nullable=False)
|
|
status: Mapped[str] = mapped_column(String(20), server_default="processing", nullable=False) # "processing" / "ready" / "failed"
|
|
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
|
# mapped_column("metadata") to avoid SQLAlchemy reserved keyword conflict
|
|
extra_metadata: Mapped[dict | None] = mapped_column("metadata", JSONType, nullable=True)
|
|
created_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True),
|
|
server_default=func.now(),
|
|
nullable=False,
|
|
)
|
|
updated_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True),
|
|
server_default=func.now(),
|
|
onupdate=func.now(),
|
|
nullable=False,
|
|
)
|
|
|
|
# Relationships
|
|
knowledge_base: Mapped["KnowledgeBase"] = relationship(
|
|
"KnowledgeBase", back_populates="documents"
|
|
)
|
|
chunks: Mapped[list["KnowledgeChunk"]] = relationship(
|
|
"KnowledgeChunk", back_populates="document", cascade="all, delete-orphan"
|
|
)
|
|
|
|
__table_args__ = (
|
|
Index("idx_knowledge_documents_knowledge_base_id", "knowledge_base_id"),
|
|
Index("idx_knowledge_documents_status", "status"),
|
|
Index("idx_knowledge_documents_content_hash", "content_hash"),
|
|
)
|
|
|
|
|
|
# Conditionally build KnowledgeChunk with or without Vector column
|
|
def _build_chunk_columns():
|
|
"""Return embedding column definition based on pgvector availability."""
|
|
if _VECTOR_AVAILABLE:
|
|
return mapped_column(Vector(1536), nullable=True)
|
|
else:
|
|
# Fallback: use Text to store serialized embeddings (not for production use)
|
|
# The actual vector column is created by the migration via raw SQL
|
|
return mapped_column(Text, nullable=True)
|
|
|
|
|
|
class KnowledgeChunk(Base):
|
|
__tablename__ = "knowledge_chunks"
|
|
|
|
id: Mapped[uuid.UUID] = mapped_column(
|
|
Uuid(as_uuid=True),
|
|
primary_key=True,
|
|
default=uuid.uuid4,
|
|
)
|
|
document_id: Mapped[uuid.UUID] = mapped_column(
|
|
Uuid(as_uuid=True),
|
|
ForeignKey("knowledge_documents.id", ondelete="CASCADE"),
|
|
nullable=False,
|
|
)
|
|
content: Mapped[str] = mapped_column(Text, nullable=False)
|
|
# Vector(1536) for OpenAI text-embedding-3-small; requires pgvector extension
|
|
# If pgvector package unavailable, column is managed purely via migration SQL
|
|
embedding: Mapped[None] = mapped_column(
|
|
Vector(1536) if _VECTOR_AVAILABLE else Text,
|
|
nullable=True,
|
|
)
|
|
chunk_index: Mapped[int] = mapped_column(Integer, nullable=False)
|
|
token_count: Mapped[int] = mapped_column(Integer, server_default="0", nullable=False)
|
|
# mapped_column("metadata") to avoid SQLAlchemy reserved keyword conflict
|
|
extra_metadata: Mapped[dict | None] = mapped_column("metadata", JSONType, nullable=True)
|
|
created_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True),
|
|
server_default=func.now(),
|
|
nullable=False,
|
|
)
|
|
|
|
# Relationships
|
|
document: Mapped["KnowledgeDocument"] = relationship(
|
|
"KnowledgeDocument", back_populates="chunks"
|
|
)
|
|
|
|
__table_args__ = (
|
|
Index("idx_knowledge_chunks_document_id", "document_id"),
|
|
Index("idx_knowledge_chunks_chunk_index", "document_id", "chunk_index"),
|
|
# HNSW index on embedding is created via raw SQL in migration
|
|
)
|
|
|
|
|
|
class KnowledgeSearchLog(Base):
|
|
__tablename__ = "knowledge_search_logs"
|
|
|
|
id: Mapped[uuid.UUID] = mapped_column(
|
|
Uuid(as_uuid=True),
|
|
primary_key=True,
|
|
default=uuid.uuid4,
|
|
)
|
|
organization_id: Mapped[uuid.UUID] = mapped_column(
|
|
Uuid(as_uuid=True),
|
|
ForeignKey("organizations.id", ondelete="CASCADE"),
|
|
nullable=False,
|
|
)
|
|
user_id: Mapped[str | None] = mapped_column(
|
|
String(36),
|
|
ForeignKey("users.id", ondelete="SET NULL"),
|
|
nullable=True,
|
|
)
|
|
query: Mapped[str] = mapped_column(Text, nullable=False)
|
|
knowledge_base_ids: Mapped[list | None] = mapped_column(JSONType, nullable=True)
|
|
results_count: Mapped[int] = mapped_column(Integer, server_default="0", nullable=False)
|
|
latency_ms: Mapped[int] = mapped_column(Integer, server_default="0", nullable=False)
|
|
created_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True),
|
|
server_default=func.now(),
|
|
nullable=False,
|
|
)
|
|
|
|
# Relationships
|
|
organization: Mapped["Organization"] = relationship(
|
|
"Organization", foreign_keys=[organization_id]
|
|
)
|
|
user: Mapped["User"] = relationship(
|
|
"User", foreign_keys=[user_id]
|
|
)
|
|
|
|
__table_args__ = (
|
|
Index("idx_knowledge_search_logs_organization_id", "organization_id"),
|
|
Index("idx_knowledge_search_logs_user_id", "user_id"),
|
|
Index("idx_knowledge_search_logs_created_at", "created_at"),
|
|
)
|