import uuid from datetime import datetime from sqlalchemy import String, Boolean, Integer, Float, DateTime, ForeignKey, Index, func, Text from sqlalchemy import Uuid, JSON from sqlalchemy.orm import Mapped, mapped_column, relationship from app.database import Base from app.utils.text import sanitize_raw_response class CitationRecord(Base): __tablename__ = "citation_records" id: Mapped[uuid.UUID] = mapped_column( Uuid(as_uuid=True), primary_key=True, default=uuid.uuid4, ) query_id: Mapped[uuid.UUID] = mapped_column( Uuid(as_uuid=True), ForeignKey("queries.id", ondelete="CASCADE"), nullable=False, ) platform: Mapped[str] = mapped_column(String(50), nullable=False) cited: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False) citation_position: Mapped[int | None] = mapped_column(Integer, nullable=True) citation_text: Mapped[str | None] = mapped_column(Text, nullable=True) competitor_brands: Mapped[list] = mapped_column(JSON, default=list) raw_response: Mapped[str | None] = mapped_column(Text, nullable=True) confidence: Mapped[float | None] = mapped_column(Float, nullable=True) match_type: Mapped[str | None] = mapped_column(String(20), nullable=True) # 情感分析字段 sentiment: Mapped[str | None] = mapped_column( String(20), nullable=True, comment="情感倾向: positive / neutral / negative", ) sentiment_confidence: Mapped[float | None] = mapped_column( Float, nullable=True, comment="情感分析置信度 0.0-1.0", ) sentiment_key_phrases: Mapped[list | None] = mapped_column( JSON, nullable=True, comment="关键情感短语列表", ) # 引用源分析字段 data_source: Mapped[str | None] = mapped_column( String(20), nullable=True, comment="数据来源类型: ai_platform / search_engine / unknown", ) source_urls: Mapped[list | None] = mapped_column( JSON, nullable=True, comment="提取的引用URL列表", ) source_titles: Mapped[list | None] = mapped_column( JSON, nullable=True, comment="提取的引用来源标题列表", ) citation_contexts: Mapped[list | None] = mapped_column( JSON, nullable=True, comment="引用出现的上下文片段列表", ) ai_response_text: Mapped[str | None] = mapped_column( Text, nullable=True, comment="AI回答原始文本(去掉data_source标记后的纯文本)", ) queried_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), server_default=func.now(), nullable=False, ) query: Mapped["Query"] = relationship("Query", back_populates="citation_records") __table_args__ = ( Index("idx_citation_records_query_id", "query_id"), Index("idx_citation_records_queried_at", "queried_at"), Index("idx_citation_records_platform", "platform"), ) @classmethod def from_citation_result( cls, query_id: uuid.UUID, platform: str, result: dict, ) -> "CitationRecord": """从引用检测结果字典创建 CitationRecord 实例 统一处理字段映射、默认值和 raw_response / ai_response_text 的清理。 Args: query_id: 关联的查询 ID platform: 平台名称 result: 引用检测结果字典 Returns: CitationRecord 实例(未持久化) """ return cls( query_id=query_id, platform=platform, cited=result.get("cited", False), citation_position=result.get("position"), citation_text=result.get("citation_text"), competitor_brands=result.get("competitor_brands", []), raw_response=sanitize_raw_response(result.get("raw_response", "")), confidence=result.get("confidence"), match_type=result.get("match_type"), # 引用源分析字段 data_source=result.get("data_source"), source_urls=result.get("source_urls"), source_titles=result.get("source_titles"), citation_contexts=result.get("citation_contexts"), ai_response_text=sanitize_raw_response(result.get("ai_response_text", "")), )