116 lines
4.2 KiB
Python
116 lines
4.2 KiB
Python
import uuid
|
||
from datetime import datetime
|
||
|
||
from sqlalchemy import String, Boolean, Integer, Float, ForeignKey, Index, func, Text
|
||
from sqlalchemy import Uuid, JSON
|
||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||
|
||
from app.database import Base
|
||
from app.utils.text import sanitize_raw_response
|
||
|
||
|
||
class CitationRecord(Base):
|
||
__tablename__ = "citation_records"
|
||
|
||
id: Mapped[uuid.UUID] = mapped_column(
|
||
Uuid(as_uuid=True),
|
||
primary_key=True,
|
||
default=uuid.uuid4,
|
||
)
|
||
query_id: Mapped[uuid.UUID] = mapped_column(
|
||
Uuid(as_uuid=True),
|
||
ForeignKey("queries.id", ondelete="CASCADE"),
|
||
nullable=False,
|
||
)
|
||
platform: Mapped[str] = mapped_column(String(50), nullable=False)
|
||
cited: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
|
||
citation_position: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||
citation_text: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||
competitor_brands: Mapped[list] = mapped_column(JSON, default=list)
|
||
raw_response: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||
confidence: Mapped[float | None] = mapped_column(Float, nullable=True)
|
||
match_type: Mapped[str | None] = mapped_column(String(20), nullable=True)
|
||
# 情感分析字段
|
||
sentiment: Mapped[str | None] = mapped_column(
|
||
String(20), nullable=True,
|
||
comment="情感倾向: positive / neutral / negative",
|
||
)
|
||
sentiment_confidence: Mapped[float | None] = mapped_column(
|
||
Float, nullable=True,
|
||
comment="情感分析置信度 0.0-1.0",
|
||
)
|
||
sentiment_key_phrases: Mapped[list | None] = mapped_column(
|
||
JSON, nullable=True,
|
||
comment="关键情感短语列表",
|
||
)
|
||
# 引用源分析字段
|
||
data_source: Mapped[str | None] = mapped_column(
|
||
String(20), nullable=True,
|
||
comment="数据来源类型: ai_platform / search_engine / unknown",
|
||
)
|
||
source_urls: Mapped[list | None] = mapped_column(
|
||
JSON, nullable=True,
|
||
comment="提取的引用URL列表",
|
||
)
|
||
source_titles: Mapped[list | None] = mapped_column(
|
||
JSON, nullable=True,
|
||
comment="提取的引用来源标题列表",
|
||
)
|
||
citation_contexts: Mapped[list | None] = mapped_column(
|
||
JSON, nullable=True,
|
||
comment="引用出现的上下文片段列表",
|
||
)
|
||
ai_response_text: Mapped[str | None] = mapped_column(
|
||
Text, nullable=True,
|
||
comment="AI回答原始文本(去掉data_source标记后的纯文本)",
|
||
)
|
||
queried_at: Mapped[datetime] = mapped_column(
|
||
server_default=func.now(),
|
||
nullable=False,
|
||
)
|
||
|
||
query: Mapped["Query"] = relationship("Query", back_populates="citation_records")
|
||
|
||
__table_args__ = (
|
||
Index("idx_citation_records_query_id", "query_id"),
|
||
Index("idx_citation_records_queried_at", "queried_at"),
|
||
Index("idx_citation_records_platform", "platform"),
|
||
)
|
||
|
||
@classmethod
|
||
def from_citation_result(
|
||
cls,
|
||
query_id: uuid.UUID,
|
||
platform: str,
|
||
result: dict,
|
||
) -> "CitationRecord":
|
||
"""从引用检测结果字典创建 CitationRecord 实例
|
||
|
||
统一处理字段映射、默认值和 raw_response / ai_response_text 的清理。
|
||
|
||
Args:
|
||
query_id: 关联的查询 ID
|
||
platform: 平台名称
|
||
result: 引用检测结果字典
|
||
|
||
Returns:
|
||
CitationRecord 实例(未持久化)
|
||
"""
|
||
return cls(
|
||
query_id=query_id,
|
||
platform=platform,
|
||
cited=result.get("cited", False),
|
||
citation_position=result.get("position"),
|
||
citation_text=result.get("citation_text"),
|
||
competitor_brands=result.get("competitor_brands", []),
|
||
raw_response=sanitize_raw_response(result.get("raw_response", "")),
|
||
confidence=result.get("confidence"),
|
||
match_type=result.get("match_type"),
|
||
# 引用源分析字段
|
||
data_source=result.get("data_source"),
|
||
source_urls=result.get("source_urls"),
|
||
source_titles=result.get("source_titles"),
|
||
citation_contexts=result.get("citation_contexts"),
|
||
ai_response_text=sanitize_raw_response(result.get("ai_response_text", "")),
|
||
)
|