geo/backend/app/models/citation_record.py

117 lines
4.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import uuid
from datetime import datetime
from sqlalchemy import String, Boolean, Integer, Float, DateTime, ForeignKey, Index, func, Text
from sqlalchemy import Uuid, JSON
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.database import Base
from app.utils.text import sanitize_raw_response
class CitationRecord(Base):
__tablename__ = "citation_records"
id: Mapped[uuid.UUID] = mapped_column(
Uuid(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
)
query_id: Mapped[uuid.UUID] = mapped_column(
Uuid(as_uuid=True),
ForeignKey("queries.id", ondelete="CASCADE"),
nullable=False,
)
platform: Mapped[str] = mapped_column(String(50), nullable=False)
cited: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
citation_position: Mapped[int | None] = mapped_column(Integer, nullable=True)
citation_text: Mapped[str | None] = mapped_column(Text, nullable=True)
competitor_brands: Mapped[list] = mapped_column(JSON, default=list)
raw_response: Mapped[str | None] = mapped_column(Text, nullable=True)
confidence: Mapped[float | None] = mapped_column(Float, nullable=True)
match_type: Mapped[str | None] = mapped_column(String(20), nullable=True)
# 情感分析字段
sentiment: Mapped[str | None] = mapped_column(
String(20), nullable=True,
comment="情感倾向: positive / neutral / negative",
)
sentiment_confidence: Mapped[float | None] = mapped_column(
Float, nullable=True,
comment="情感分析置信度 0.0-1.0",
)
sentiment_key_phrases: Mapped[list | None] = mapped_column(
JSON, nullable=True,
comment="关键情感短语列表",
)
# 引用源分析字段
data_source: Mapped[str | None] = mapped_column(
String(20), nullable=True,
comment="数据来源类型: ai_platform / search_engine / unknown",
)
source_urls: Mapped[list | None] = mapped_column(
JSON, nullable=True,
comment="提取的引用URL列表",
)
source_titles: Mapped[list | None] = mapped_column(
JSON, nullable=True,
comment="提取的引用来源标题列表",
)
citation_contexts: Mapped[list | None] = mapped_column(
JSON, nullable=True,
comment="引用出现的上下文片段列表",
)
ai_response_text: Mapped[str | None] = mapped_column(
Text, nullable=True,
comment="AI回答原始文本去掉data_source标记后的纯文本",
)
queried_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
query: Mapped["Query"] = relationship("Query", back_populates="citation_records")
__table_args__ = (
Index("idx_citation_records_query_id", "query_id"),
Index("idx_citation_records_queried_at", "queried_at"),
Index("idx_citation_records_platform", "platform"),
)
@classmethod
def from_citation_result(
cls,
query_id: uuid.UUID,
platform: str,
result: dict,
) -> "CitationRecord":
"""从引用检测结果字典创建 CitationRecord 实例
统一处理字段映射、默认值和 raw_response / ai_response_text 的清理。
Args:
query_id: 关联的查询 ID
platform: 平台名称
result: 引用检测结果字典
Returns:
CitationRecord 实例(未持久化)
"""
return cls(
query_id=query_id,
platform=platform,
cited=result.get("cited", False),
citation_position=result.get("position"),
citation_text=result.get("citation_text"),
competitor_brands=result.get("competitor_brands", []),
raw_response=sanitize_raw_response(result.get("raw_response", "")),
confidence=result.get("confidence"),
match_type=result.get("match_type"),
# 引用源分析字段
data_source=result.get("data_source"),
source_urls=result.get("source_urls"),
source_titles=result.get("source_titles"),
citation_contexts=result.get("citation_contexts"),
ai_response_text=sanitize_raw_response(result.get("ai_response_text", "")),
)