import uuid from datetime import datetime from sqlalchemy import String, Integer, ForeignKey, Index, func, Text from sqlalchemy import Uuid from sqlalchemy.orm import Mapped, mapped_column, relationship from app.database import Base, JSONType # pgvector Vector type - imported conditionally try: from pgvector.sqlalchemy import Vector _VECTOR_AVAILABLE = True except ImportError: # pgvector package not installed; Vector columns will be skipped in ORM # The migration file handles the actual column creation via raw SQL Vector = None _VECTOR_AVAILABLE = False class KnowledgeBase(Base): __tablename__ = "knowledge_bases" id: Mapped[uuid.UUID] = mapped_column( Uuid(as_uuid=True), primary_key=True, default=uuid.uuid4, ) organization_id: Mapped[uuid.UUID] = mapped_column( Uuid(as_uuid=True), ForeignKey("organizations.id", ondelete="CASCADE"), nullable=False, ) name: Mapped[str] = mapped_column(String(200), nullable=False) type: Mapped[str] = mapped_column(String(20), nullable=False) # "industry" / "enterprise" description: Mapped[str | None] = mapped_column(Text, nullable=True) document_count: Mapped[int] = mapped_column(Integer, server_default="0", nullable=False) status: Mapped[str] = mapped_column(String(20), server_default="active", nullable=False) created_by: Mapped[uuid.UUID | None] = mapped_column( Uuid(as_uuid=True), ForeignKey("users.id", ondelete="SET NULL"), nullable=True, ) created_at: Mapped[datetime] = mapped_column( server_default=func.now(), nullable=False, ) updated_at: Mapped[datetime] = mapped_column( server_default=func.now(), onupdate=func.now(), nullable=False, ) # Relationships organization: Mapped["Organization"] = relationship( "Organization", back_populates="knowledge_bases" ) creator: Mapped["User"] = relationship( "User", foreign_keys=[created_by] ) documents: Mapped[list["KnowledgeDocument"]] = relationship( "KnowledgeDocument", back_populates="knowledge_base", cascade="all, delete-orphan" ) __table_args__ = ( Index("idx_knowledge_bases_organization_id", "organization_id"), Index("idx_knowledge_bases_type", "type"), Index("idx_knowledge_bases_status", "status"), ) class KnowledgeDocument(Base): __tablename__ = "knowledge_documents" id: Mapped[uuid.UUID] = mapped_column( Uuid(as_uuid=True), primary_key=True, default=uuid.uuid4, ) knowledge_base_id: Mapped[uuid.UUID] = mapped_column( Uuid(as_uuid=True), ForeignKey("knowledge_bases.id", ondelete="CASCADE"), nullable=False, ) title: Mapped[str] = mapped_column(String(500), nullable=False) source_type: Mapped[str] = mapped_column(String(20), nullable=False) # "text" / "url" / "pdf" / "markdown" source_url: Mapped[str | None] = mapped_column(String(2000), nullable=True) content: Mapped[str] = mapped_column(Text, nullable=False) content_hash: Mapped[str] = mapped_column(String(64), nullable=False) chunk_count: Mapped[int] = mapped_column(Integer, server_default="0", nullable=False) status: Mapped[str] = mapped_column(String(20), server_default="processing", nullable=False) # "processing" / "ready" / "failed" error_message: Mapped[str | None] = mapped_column(Text, nullable=True) # mapped_column("metadata") to avoid SQLAlchemy reserved keyword conflict extra_metadata: Mapped[dict | None] = mapped_column("metadata", JSONType, nullable=True) created_at: Mapped[datetime] = mapped_column( server_default=func.now(), nullable=False, ) updated_at: Mapped[datetime] = mapped_column( server_default=func.now(), onupdate=func.now(), nullable=False, ) # Relationships knowledge_base: Mapped["KnowledgeBase"] = relationship( "KnowledgeBase", back_populates="documents" ) chunks: Mapped[list["KnowledgeChunk"]] = relationship( "KnowledgeChunk", back_populates="document", cascade="all, delete-orphan" ) __table_args__ = ( Index("idx_knowledge_documents_knowledge_base_id", "knowledge_base_id"), Index("idx_knowledge_documents_status", "status"), Index("idx_knowledge_documents_content_hash", "content_hash"), ) # Conditionally build KnowledgeChunk with or without Vector column def _build_chunk_columns(): """Return embedding column definition based on pgvector availability.""" if _VECTOR_AVAILABLE: return mapped_column(Vector(1536), nullable=True) else: # Fallback: use Text to store serialized embeddings (not for production use) # The actual vector column is created by the migration via raw SQL return mapped_column(Text, nullable=True) class KnowledgeChunk(Base): __tablename__ = "knowledge_chunks" id: Mapped[uuid.UUID] = mapped_column( Uuid(as_uuid=True), primary_key=True, default=uuid.uuid4, ) document_id: Mapped[uuid.UUID] = mapped_column( Uuid(as_uuid=True), ForeignKey("knowledge_documents.id", ondelete="CASCADE"), nullable=False, ) content: Mapped[str] = mapped_column(Text, nullable=False) # Vector(1536) for OpenAI text-embedding-3-small; requires pgvector extension # If pgvector package unavailable, column is managed purely via migration SQL embedding: Mapped[None] = mapped_column( Vector(1536) if _VECTOR_AVAILABLE else Text, nullable=True, ) chunk_index: Mapped[int] = mapped_column(Integer, nullable=False) token_count: Mapped[int] = mapped_column(Integer, server_default="0", nullable=False) # mapped_column("metadata") to avoid SQLAlchemy reserved keyword conflict extra_metadata: Mapped[dict | None] = mapped_column("metadata", JSONType, nullable=True) created_at: Mapped[datetime] = mapped_column( server_default=func.now(), nullable=False, ) # Relationships document: Mapped["KnowledgeDocument"] = relationship( "KnowledgeDocument", back_populates="chunks" ) __table_args__ = ( Index("idx_knowledge_chunks_document_id", "document_id"), Index("idx_knowledge_chunks_chunk_index", "document_id", "chunk_index"), # HNSW index on embedding is created via raw SQL in migration ) class KnowledgeSearchLog(Base): __tablename__ = "knowledge_search_logs" id: Mapped[uuid.UUID] = mapped_column( Uuid(as_uuid=True), primary_key=True, default=uuid.uuid4, ) organization_id: Mapped[uuid.UUID] = mapped_column( Uuid(as_uuid=True), ForeignKey("organizations.id", ondelete="CASCADE"), nullable=False, ) user_id: Mapped[uuid.UUID | None] = mapped_column( Uuid(as_uuid=True), ForeignKey("users.id", ondelete="SET NULL"), nullable=True, ) query: Mapped[str] = mapped_column(Text, nullable=False) knowledge_base_ids: Mapped[list | None] = mapped_column(JSONType, nullable=True) results_count: Mapped[int] = mapped_column(Integer, server_default="0", nullable=False) latency_ms: Mapped[int] = mapped_column(Integer, server_default="0", nullable=False) created_at: Mapped[datetime] = mapped_column( server_default=func.now(), nullable=False, ) # Relationships organization: Mapped["Organization"] = relationship( "Organization", foreign_keys=[organization_id] ) user: Mapped["User"] = relationship( "User", foreign_keys=[user_id] ) __table_args__ = ( Index("idx_knowledge_search_logs_organization_id", "organization_id"), Index("idx_knowledge_search_logs_user_id", "user_id"), Index("idx_knowledge_search_logs_created_at", "created_at"), )