655 lines
20 KiB
Python
655 lines
20 KiB
Python
"""
|
||
评分服务V2 - 品牌可见性评分系统
|
||
|
||
评分维度(总分100):
|
||
- 提及率 (Mention Rate): 25分 - 品牌在AI回答中被提及的频率
|
||
- 推荐排名 (Recommendation Rank): 25分 - 品牌在推荐列表中的位置
|
||
- 情感倾向 (Sentiment Score): 20分 - AI对品牌的情感倾向
|
||
- 引用质量 (Citation Quality): 15分 - 引用内容的深度和正面性
|
||
- 竞品对比 (Competitive Position): 15分 - 相对于竞品的表现
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import logging
|
||
import math
|
||
from dataclasses import dataclass, field
|
||
|
||
from app.schemas.scoring import CitationResult
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
# ============================================================
|
||
# V1 兼容函数(保留旧接口,供向后兼容)
|
||
# ============================================================
|
||
|
||
def calculate_mention_rate_score(brand_citations: int, total_queries: int) -> float:
|
||
"""V1兼容: 计算提及率得分"""
|
||
if total_queries <= 0:
|
||
return 0.0
|
||
return (brand_citations / total_queries) * 100
|
||
|
||
|
||
def calculate_sov_score(brand_citations: int, total_citations: int) -> float:
|
||
"""V1兼容: 计算SOV得分"""
|
||
if total_citations <= 0:
|
||
return 0.0
|
||
return (brand_citations / total_citations) * 100
|
||
|
||
|
||
def _get_sentiment_coefficient(sentiment: str) -> float:
|
||
"""V1兼容: 获取情感系数"""
|
||
sentiment_map = {
|
||
"positive": 1.0,
|
||
"neutral": 0.6,
|
||
"negative": 0.2,
|
||
}
|
||
return sentiment_map.get(sentiment.lower(), 0.6)
|
||
|
||
|
||
def _get_position_coefficient(position: int | None) -> float:
|
||
"""V1兼容: 获取位置系数"""
|
||
if position is None:
|
||
return 0.4
|
||
if position == 1:
|
||
return 1.0
|
||
if position <= 3:
|
||
return 0.8
|
||
if position <= 5:
|
||
return 0.6
|
||
return 0.4
|
||
|
||
|
||
def _get_length_coefficient(citation_text: str | None) -> float:
|
||
"""V1兼容: 获取长度系数"""
|
||
if not citation_text:
|
||
return 0.6
|
||
text_length = len(citation_text)
|
||
if text_length > 100:
|
||
return 1.0
|
||
if text_length >= 50:
|
||
return 0.8
|
||
return 0.6
|
||
|
||
|
||
def calculate_quality_score(citations: list[CitationResult]) -> float:
|
||
"""V1兼容: 计算引用质量得分"""
|
||
if not citations:
|
||
return 0.0
|
||
|
||
total_score = 0.0
|
||
for citation in citations:
|
||
if not citation.cited:
|
||
continue
|
||
sentiment_coef = _get_sentiment_coefficient(citation.sentiment)
|
||
position_coef = _get_position_coefficient(citation.position)
|
||
length_coef = _get_length_coefficient(citation.citation_text)
|
||
quality = sentiment_coef * 0.4 + position_coef * 0.3 + length_coef * 0.3
|
||
total_score += quality
|
||
|
||
cited_count = sum(1 for c in citations if c.cited)
|
||
if cited_count == 0:
|
||
return 0.0
|
||
return (total_score / cited_count) * 100
|
||
|
||
|
||
def calculate_overall_score(
|
||
mention_rate_score: float,
|
||
sov_score: float,
|
||
quality_score: float,
|
||
) -> float:
|
||
"""V1兼容: 计算综合评分"""
|
||
overall = (
|
||
mention_rate_score * 0.3
|
||
+ sov_score * 0.4
|
||
+ quality_score * 0.3
|
||
)
|
||
return min(100.0, max(0.0, overall))
|
||
|
||
|
||
# ============================================================
|
||
# V2 评分数据结构
|
||
# ============================================================
|
||
|
||
@dataclass
|
||
class DimensionScore:
|
||
"""单个维度的评分详情"""
|
||
name: str # 维度名称
|
||
score: float # 该维度得分 (0-max_score)
|
||
max_score: float # 该维度满分
|
||
percentage: float # 得分率 (0-100)
|
||
detail: dict = field(default_factory=dict) # 评分细节
|
||
|
||
|
||
@dataclass
|
||
class ScoringResultV2:
|
||
"""V2评分结果"""
|
||
# 五维度得分
|
||
mention_rate: DimensionScore # 提及率 (25分)
|
||
recommendation_rank: DimensionScore # 推荐排名 (25分)
|
||
sentiment_score: DimensionScore # 情感倾向 (20分)
|
||
citation_quality: DimensionScore # 引用质量 (15分)
|
||
competitive_position: DimensionScore # 竞品对比 (15分)
|
||
|
||
# 总分
|
||
overall_score: float = 0.0
|
||
|
||
# 健康等级
|
||
health_level: str = "danger" # excellent/good/pass/danger
|
||
|
||
def __post_init__(self):
|
||
"""计算总分和健康等级"""
|
||
self.overall_score = (
|
||
self.mention_rate.score
|
||
+ self.recommendation_rank.score
|
||
+ self.sentiment_score.score
|
||
+ self.citation_quality.score
|
||
+ self.competitive_position.score
|
||
)
|
||
self.overall_score = round(min(100.0, max(0.0, self.overall_score)), 2)
|
||
|
||
if self.overall_score >= 80:
|
||
self.health_level = "excellent"
|
||
elif self.overall_score >= 60:
|
||
self.health_level = "good"
|
||
elif self.overall_score >= 40:
|
||
self.health_level = "pass"
|
||
else:
|
||
self.health_level = "danger"
|
||
|
||
def to_dict(self) -> dict:
|
||
"""转换为字典格式"""
|
||
return {
|
||
"overall_score": self.overall_score,
|
||
"health_level": self.health_level,
|
||
"dimensions": {
|
||
"mention_rate": {
|
||
"name": self.mention_rate.name,
|
||
"score": round(self.mention_rate.score, 2),
|
||
"max_score": self.mention_rate.max_score,
|
||
"percentage": round(self.mention_rate.percentage, 2),
|
||
"detail": self.mention_rate.detail,
|
||
},
|
||
"recommendation_rank": {
|
||
"name": self.recommendation_rank.name,
|
||
"score": round(self.recommendation_rank.score, 2),
|
||
"max_score": self.recommendation_rank.max_score,
|
||
"percentage": round(self.recommendation_rank.percentage, 2),
|
||
"detail": self.recommendation_rank.detail,
|
||
},
|
||
"sentiment_score": {
|
||
"name": self.sentiment_score.name,
|
||
"score": round(self.sentiment_score.score, 2),
|
||
"max_score": self.sentiment_score.max_score,
|
||
"percentage": round(self.sentiment_score.percentage, 2),
|
||
"detail": self.sentiment_score.detail,
|
||
},
|
||
"citation_quality": {
|
||
"name": self.citation_quality.name,
|
||
"score": round(self.citation_quality.score, 2),
|
||
"max_score": self.citation_quality.max_score,
|
||
"percentage": round(self.citation_quality.percentage, 2),
|
||
"detail": self.citation_quality.detail,
|
||
},
|
||
"competitive_position": {
|
||
"name": self.competitive_position.name,
|
||
"score": round(self.competitive_position.score, 2),
|
||
"max_score": self.competitive_position.max_score,
|
||
"percentage": round(self.competitive_position.percentage, 2),
|
||
"detail": self.competitive_position.detail,
|
||
},
|
||
},
|
||
}
|
||
|
||
|
||
# ============================================================
|
||
# V2 评分计算函数
|
||
# ============================================================
|
||
|
||
def calculate_mention_rate_v2(
|
||
mentioned_count: int,
|
||
total_queries: int,
|
||
) -> DimensionScore:
|
||
"""
|
||
计算提及率得分 (满分25)
|
||
|
||
公式: (被提及次数 / 总查询次数) * 25
|
||
|
||
Args:
|
||
mentioned_count: 品牌被提及的次数
|
||
total_queries: 总查询次数
|
||
|
||
Returns:
|
||
DimensionScore: 提及率维度评分
|
||
"""
|
||
max_score = 25.0
|
||
if total_queries <= 0:
|
||
return DimensionScore(
|
||
name="提及率",
|
||
score=0.0,
|
||
max_score=max_score,
|
||
percentage=0.0,
|
||
detail={"mentioned_count": 0, "total_queries": 0},
|
||
)
|
||
|
||
rate = mentioned_count / total_queries
|
||
score = rate * max_score
|
||
|
||
return DimensionScore(
|
||
name="提及率",
|
||
score=score,
|
||
max_score=max_score,
|
||
percentage=round(rate * 100, 2),
|
||
detail={
|
||
"mentioned_count": mentioned_count,
|
||
"total_queries": total_queries,
|
||
"rate": round(rate, 4),
|
||
},
|
||
)
|
||
|
||
|
||
def calculate_recommendation_rank_v2(
|
||
positions: list[int | None],
|
||
) -> DimensionScore:
|
||
"""
|
||
计算推荐排名得分 (满分25)
|
||
|
||
基于品牌在推荐列表中的位置,使用递减函数计算得分。
|
||
排名第1得满分,排名越靠后得分越低。
|
||
|
||
公式: avg(1 / log2(position + 1)) * 25
|
||
|
||
Args:
|
||
positions: 品牌在各次查询中的排名位置列表 (1-based, None表示未出现在推荐列表)
|
||
|
||
Returns:
|
||
DimensionScore: 推荐排名维度评分
|
||
"""
|
||
max_score = 25.0
|
||
|
||
if not positions:
|
||
return DimensionScore(
|
||
name="推荐排名",
|
||
score=0.0,
|
||
max_score=max_score,
|
||
percentage=0.0,
|
||
detail={"positions": [], "avg_position": None},
|
||
)
|
||
|
||
# 过滤None值(未出现在推荐列表的位置视为低排名)
|
||
valid_positions = [p for p in positions if p is not None and p >= 1]
|
||
|
||
if not valid_positions:
|
||
return DimensionScore(
|
||
name="推荐排名",
|
||
score=0.0,
|
||
max_score=max_score,
|
||
percentage=0.0,
|
||
detail={
|
||
"positions": positions,
|
||
"avg_position": None,
|
||
"valid_count": 0,
|
||
},
|
||
)
|
||
|
||
# 使用对数递减函数: 1 / log2(position + 1)
|
||
# position=1 -> 1/log2(2) = 1.0
|
||
# position=2 -> 1/log2(3) = 0.63
|
||
# position=3 -> 1/log2(4) = 0.5
|
||
# position=5 -> 1/log2(6) = 0.39
|
||
# position=10 -> 1/log2(11) = 0.29
|
||
rank_scores = [1.0 / math.log2(p + 1) for p in valid_positions]
|
||
avg_rank_score = sum(rank_scores) / len(rank_scores)
|
||
|
||
# 归一化到0-1范围(rank_score最大为1.0)
|
||
normalized = min(1.0, avg_rank_score)
|
||
score = normalized * max_score
|
||
|
||
avg_position = sum(valid_positions) / len(valid_positions)
|
||
|
||
return DimensionScore(
|
||
name="推荐排名",
|
||
score=score,
|
||
max_score=max_score,
|
||
percentage=round(normalized * 100, 2),
|
||
detail={
|
||
"positions": valid_positions,
|
||
"avg_position": round(avg_position, 2),
|
||
"valid_count": len(valid_positions),
|
||
"total_count": len(positions),
|
||
},
|
||
)
|
||
|
||
|
||
def calculate_sentiment_score_v2(
|
||
sentiment_counts: dict[str, int],
|
||
) -> DimensionScore:
|
||
"""
|
||
计算情感倾向得分 (满分20)
|
||
|
||
公式: (positive占比 * 1 + neutral占比 * 0.5 + negative占比 * 0) * 20
|
||
|
||
Args:
|
||
sentiment_counts: {"positive": int, "neutral": int, "negative": int}
|
||
|
||
Returns:
|
||
DimensionScore: 情感倾向维度评分
|
||
"""
|
||
max_score = 20.0
|
||
|
||
positive = sentiment_counts.get("positive", 0)
|
||
neutral = sentiment_counts.get("neutral", 0)
|
||
negative = sentiment_counts.get("negative", 0)
|
||
total = positive + neutral + negative
|
||
|
||
if total <= 0:
|
||
return DimensionScore(
|
||
name="情感倾向",
|
||
score=0.0,
|
||
max_score=max_score,
|
||
percentage=0.0,
|
||
detail=sentiment_counts,
|
||
)
|
||
|
||
# 加权情感分数
|
||
sentiment_value = (positive * 1.0 + neutral * 0.5 + negative * 0.0) / total
|
||
score = sentiment_value * max_score
|
||
|
||
return DimensionScore(
|
||
name="情感倾向",
|
||
score=score,
|
||
max_score=max_score,
|
||
percentage=round(sentiment_value * 100, 2),
|
||
detail={
|
||
"positive": positive,
|
||
"neutral": neutral,
|
||
"negative": negative,
|
||
"positive_rate": round(positive / total, 4) if total > 0 else 0.0,
|
||
"negative_rate": round(negative / total, 4) if total > 0 else 0.0,
|
||
},
|
||
)
|
||
|
||
|
||
def calculate_citation_quality_v2(
|
||
citations: list[CitationResult],
|
||
) -> DimensionScore:
|
||
"""
|
||
计算引用质量得分 (满分15)
|
||
|
||
公式: (引用深度分数 * 情感修正系数) * 15
|
||
|
||
引用深度分数基于: 位置系数(0.4) + 长度系数(0.3) + 置信度系数(0.3)
|
||
情感修正系数: positive=1.0, neutral=0.7, negative=0.3
|
||
|
||
Args:
|
||
citations: 引用结果列表
|
||
|
||
Returns:
|
||
DimensionScore: 引用质量维度评分
|
||
"""
|
||
max_score = 15.0
|
||
|
||
cited_items = [c for c in citations if c.cited]
|
||
if not cited_items:
|
||
return DimensionScore(
|
||
name="引用质量",
|
||
score=0.0,
|
||
max_score=max_score,
|
||
percentage=0.0,
|
||
detail={"cited_count": 0},
|
||
)
|
||
|
||
total_quality = 0.0
|
||
for citation in cited_items:
|
||
# 位置系数
|
||
position_coef = _get_position_coefficient(citation.position)
|
||
|
||
# 长度系数
|
||
length_coef = _get_length_coefficient(citation.citation_text)
|
||
|
||
# 置信度系数 (直接使用confidence字段)
|
||
confidence_coef = citation.confidence if citation.confidence else 0.5
|
||
|
||
# 引用深度分数
|
||
depth_score = (
|
||
position_coef * 0.4
|
||
+ length_coef * 0.3
|
||
+ confidence_coef * 0.3
|
||
)
|
||
|
||
# 情感修正系数
|
||
sentiment_mod_map = {
|
||
"positive": 1.0,
|
||
"neutral": 0.7,
|
||
"negative": 0.3,
|
||
}
|
||
sentiment_mod = sentiment_mod_map.get(
|
||
citation.sentiment.lower(), 0.7
|
||
)
|
||
|
||
total_quality += depth_score * sentiment_mod
|
||
|
||
avg_quality = total_quality / len(cited_items)
|
||
score = avg_quality * max_score
|
||
|
||
return DimensionScore(
|
||
name="引用质量",
|
||
score=score,
|
||
max_score=max_score,
|
||
percentage=round(avg_quality * 100, 2),
|
||
detail={
|
||
"cited_count": len(cited_items),
|
||
"avg_depth_score": round(avg_quality, 4),
|
||
},
|
||
)
|
||
|
||
|
||
def calculate_competitive_position_v2(
|
||
brand_mentions: int,
|
||
competitor_mentions: dict[str, int],
|
||
) -> DimensionScore:
|
||
"""
|
||
计算竞品对比得分 (满分15)
|
||
|
||
公式: (领先竞品数 / 总竞品数) * 15
|
||
|
||
领先竞品: 品牌提及次数 > 竞品提及次数
|
||
|
||
Args:
|
||
brand_mentions: 品牌被提及次数
|
||
competitor_mentions: {竞品名称: 提及次数}
|
||
|
||
Returns:
|
||
DimensionScore: 竞品对比维度评分
|
||
"""
|
||
max_score = 15.0
|
||
|
||
if not competitor_mentions:
|
||
# 没有竞品数据时,给中间分数
|
||
return DimensionScore(
|
||
name="竞品对比",
|
||
score=7.5,
|
||
max_score=max_score,
|
||
percentage=50.0,
|
||
detail={
|
||
"brand_mentions": brand_mentions,
|
||
"competitor_count": 0,
|
||
"ahead_count": 0,
|
||
"behind_count": 0,
|
||
"note": "无竞品数据,使用默认中间分",
|
||
},
|
||
)
|
||
|
||
total_competitors = len(competitor_mentions)
|
||
ahead_count = sum(
|
||
1 for count in competitor_mentions.values()
|
||
if brand_mentions > count
|
||
)
|
||
behind_count = sum(
|
||
1 for count in competitor_mentions.values()
|
||
if brand_mentions <= count
|
||
)
|
||
|
||
# 领先比例
|
||
ahead_ratio = ahead_count / total_competitors if total_competitors > 0 else 0.0
|
||
|
||
# 加入SOV因子: 品牌在总提及中的占比
|
||
total_mentions = brand_mentions + sum(competitor_mentions.values())
|
||
sov = brand_mentions / total_mentions if total_mentions > 0 else 0.0
|
||
|
||
# 综合得分: 领先比例权重60%, SOV权重40%
|
||
combined = ahead_ratio * 0.6 + sov * 0.4
|
||
score = combined * max_score
|
||
|
||
return DimensionScore(
|
||
name="竞品对比",
|
||
score=score,
|
||
max_score=max_score,
|
||
percentage=round(combined * 100, 2),
|
||
detail={
|
||
"brand_mentions": brand_mentions,
|
||
"competitor_count": total_competitors,
|
||
"ahead_count": ahead_count,
|
||
"behind_count": behind_count,
|
||
"sov": round(sov, 4),
|
||
"ahead_ratio": round(ahead_ratio, 4),
|
||
},
|
||
)
|
||
|
||
|
||
def calculate_v2_score(
|
||
mentioned_count: int,
|
||
total_queries: int,
|
||
positions: list[int | None],
|
||
sentiment_counts: dict[str, int],
|
||
citations: list[CitationResult],
|
||
brand_mentions: int,
|
||
competitor_mentions: dict[str, int],
|
||
) -> ScoringResultV2:
|
||
"""
|
||
计算V2品牌可见性评分
|
||
|
||
Args:
|
||
mentioned_count: 品牌被提及的次数
|
||
total_queries: 总查询次数
|
||
positions: 品牌在各次查询中的排名位置列表
|
||
sentiment_counts: {"positive": int, "neutral": int, "negative": int}
|
||
citations: 引用结果列表
|
||
brand_mentions: 品牌被提及次数(用于竞品对比)
|
||
competitor_mentions: {竞品名称: 提及次数}
|
||
|
||
Returns:
|
||
ScoringResultV2: V2评分结果
|
||
"""
|
||
# 1. 提及率 (25分)
|
||
mention_rate = calculate_mention_rate_v2(mentioned_count, total_queries)
|
||
|
||
# 2. 推荐排名 (25分)
|
||
recommendation_rank = calculate_recommendation_rank_v2(positions)
|
||
|
||
# 3. 情感倾向 (20分)
|
||
sentiment_score = calculate_sentiment_score_v2(sentiment_counts)
|
||
|
||
# 4. 引用质量 (15分)
|
||
citation_quality = calculate_citation_quality_v2(citations)
|
||
|
||
# 5. 竞品对比 (15分)
|
||
competitive_position = calculate_competitive_position_v2(
|
||
brand_mentions, competitor_mentions
|
||
)
|
||
|
||
return ScoringResultV2(
|
||
mention_rate=mention_rate,
|
||
recommendation_rank=recommendation_rank,
|
||
sentiment_score=sentiment_score,
|
||
citation_quality=citation_quality,
|
||
competitive_position=competitive_position,
|
||
)
|
||
|
||
|
||
# ============================================================
|
||
# 健康等级工具函数
|
||
# ============================================================
|
||
|
||
def get_health_level(score: float) -> str:
|
||
"""
|
||
根据评分获取健康等级
|
||
|
||
80+ -> excellent (优秀/绿)
|
||
60-79 -> good (良好/黄)
|
||
40-59 -> pass (及格/橙)
|
||
<40 -> danger (危险/红)
|
||
"""
|
||
if score >= 80:
|
||
return "excellent"
|
||
if score >= 60:
|
||
return "good"
|
||
if score >= 40:
|
||
return "pass"
|
||
return "danger"
|
||
|
||
|
||
def get_health_level_label(level: str) -> str:
|
||
"""获取健康等级中文标签"""
|
||
labels = {
|
||
"excellent": "优秀",
|
||
"good": "良好",
|
||
"pass": "及格",
|
||
"danger": "危险",
|
||
}
|
||
return labels.get(level, "未知")
|
||
|
||
|
||
# ============================================================
|
||
# ScoringService (兼容V1接口 + V2新接口)
|
||
# ============================================================
|
||
|
||
class ScoringService:
|
||
"""评分服务(兼容V1 + V2)"""
|
||
|
||
# --- V1 兼容接口 ---
|
||
|
||
def calculate_mention_rate_score(self, brand_citations: int, total_queries: int) -> float:
|
||
"""V1兼容: 计算提及率得分"""
|
||
return calculate_mention_rate_score(brand_citations, total_queries)
|
||
|
||
def calculate_sov_score(self, brand_citations: int, total_citations: int) -> float:
|
||
"""V1兼容: 计算SOV得分"""
|
||
return calculate_sov_score(brand_citations, total_citations)
|
||
|
||
def calculate_quality_score(self, citations: list[CitationResult]) -> float:
|
||
"""V1兼容: 计算引用质量得分"""
|
||
return calculate_quality_score(citations)
|
||
|
||
def calculate_overall_score(
|
||
self,
|
||
mention_rate_score: float,
|
||
sov_score: float,
|
||
quality_score: float,
|
||
) -> float:
|
||
"""V1兼容: 计算综合评分"""
|
||
return calculate_overall_score(mention_rate_score, sov_score, quality_score)
|
||
|
||
# --- V2 新接口 ---
|
||
|
||
def calculate_v2(
|
||
self,
|
||
mentioned_count: int,
|
||
total_queries: int,
|
||
positions: list[int | None],
|
||
sentiment_counts: dict[str, int],
|
||
citations: list[CitationResult],
|
||
brand_mentions: int,
|
||
competitor_mentions: dict[str, int],
|
||
) -> ScoringResultV2:
|
||
"""计算V2品牌可见性评分"""
|
||
return calculate_v2_score(
|
||
mentioned_count=mentioned_count,
|
||
total_queries=total_queries,
|
||
positions=positions,
|
||
sentiment_counts=sentiment_counts,
|
||
citations=citations,
|
||
brand_mentions=brand_mentions,
|
||
competitor_mentions=competitor_mentions,
|
||
)
|