geo/backend/app/services/scoring_service.py

655 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
评分服务V2 - 品牌可见性评分系统
评分维度总分100
- 提及率 (Mention Rate): 25分 - 品牌在AI回答中被提及的频率
- 推荐排名 (Recommendation Rank): 25分 - 品牌在推荐列表中的位置
- 情感倾向 (Sentiment Score): 20分 - AI对品牌的情感倾向
- 引用质量 (Citation Quality): 15分 - 引用内容的深度和正面性
- 竞品对比 (Competitive Position): 15分 - 相对于竞品的表现
"""
from __future__ import annotations
import logging
import math
from dataclasses import dataclass, field
from app.schemas.scoring import CitationResult
logger = logging.getLogger(__name__)
# ============================================================
# V1 兼容函数(保留旧接口,供向后兼容)
# ============================================================
def calculate_mention_rate_score(brand_citations: int, total_queries: int) -> float:
"""V1兼容: 计算提及率得分"""
if total_queries <= 0:
return 0.0
return (brand_citations / total_queries) * 100
def calculate_sov_score(brand_citations: int, total_citations: int) -> float:
"""V1兼容: 计算SOV得分"""
if total_citations <= 0:
return 0.0
return (brand_citations / total_citations) * 100
def _get_sentiment_coefficient(sentiment: str) -> float:
"""V1兼容: 获取情感系数"""
sentiment_map = {
"positive": 1.0,
"neutral": 0.6,
"negative": 0.2,
}
return sentiment_map.get(sentiment.lower(), 0.6)
def _get_position_coefficient(position: int | None) -> float:
"""V1兼容: 获取位置系数"""
if position is None:
return 0.4
if position == 1:
return 1.0
if position <= 3:
return 0.8
if position <= 5:
return 0.6
return 0.4
def _get_length_coefficient(citation_text: str | None) -> float:
"""V1兼容: 获取长度系数"""
if not citation_text:
return 0.6
text_length = len(citation_text)
if text_length > 100:
return 1.0
if text_length >= 50:
return 0.8
return 0.6
def calculate_quality_score(citations: list[CitationResult]) -> float:
"""V1兼容: 计算引用质量得分"""
if not citations:
return 0.0
total_score = 0.0
for citation in citations:
if not citation.cited:
continue
sentiment_coef = _get_sentiment_coefficient(citation.sentiment)
position_coef = _get_position_coefficient(citation.position)
length_coef = _get_length_coefficient(citation.citation_text)
quality = sentiment_coef * 0.4 + position_coef * 0.3 + length_coef * 0.3
total_score += quality
cited_count = sum(1 for c in citations if c.cited)
if cited_count == 0:
return 0.0
return (total_score / cited_count) * 100
def calculate_overall_score(
mention_rate_score: float,
sov_score: float,
quality_score: float,
) -> float:
"""V1兼容: 计算综合评分"""
overall = (
mention_rate_score * 0.3
+ sov_score * 0.4
+ quality_score * 0.3
)
return min(100.0, max(0.0, overall))
# ============================================================
# V2 评分数据结构
# ============================================================
@dataclass
class DimensionScore:
"""单个维度的评分详情"""
name: str # 维度名称
score: float # 该维度得分 (0-max_score)
max_score: float # 该维度满分
percentage: float # 得分率 (0-100)
detail: dict = field(default_factory=dict) # 评分细节
@dataclass
class ScoringResultV2:
"""V2评分结果"""
# 五维度得分
mention_rate: DimensionScore # 提及率 (25分)
recommendation_rank: DimensionScore # 推荐排名 (25分)
sentiment_score: DimensionScore # 情感倾向 (20分)
citation_quality: DimensionScore # 引用质量 (15分)
competitive_position: DimensionScore # 竞品对比 (15分)
# 总分
overall_score: float = 0.0
# 健康等级
health_level: str = "danger" # excellent/good/pass/danger
def __post_init__(self):
"""计算总分和健康等级"""
self.overall_score = (
self.mention_rate.score
+ self.recommendation_rank.score
+ self.sentiment_score.score
+ self.citation_quality.score
+ self.competitive_position.score
)
self.overall_score = round(min(100.0, max(0.0, self.overall_score)), 2)
if self.overall_score >= 80:
self.health_level = "excellent"
elif self.overall_score >= 60:
self.health_level = "good"
elif self.overall_score >= 40:
self.health_level = "pass"
else:
self.health_level = "danger"
def to_dict(self) -> dict:
"""转换为字典格式"""
return {
"overall_score": self.overall_score,
"health_level": self.health_level,
"dimensions": {
"mention_rate": {
"name": self.mention_rate.name,
"score": round(self.mention_rate.score, 2),
"max_score": self.mention_rate.max_score,
"percentage": round(self.mention_rate.percentage, 2),
"detail": self.mention_rate.detail,
},
"recommendation_rank": {
"name": self.recommendation_rank.name,
"score": round(self.recommendation_rank.score, 2),
"max_score": self.recommendation_rank.max_score,
"percentage": round(self.recommendation_rank.percentage, 2),
"detail": self.recommendation_rank.detail,
},
"sentiment_score": {
"name": self.sentiment_score.name,
"score": round(self.sentiment_score.score, 2),
"max_score": self.sentiment_score.max_score,
"percentage": round(self.sentiment_score.percentage, 2),
"detail": self.sentiment_score.detail,
},
"citation_quality": {
"name": self.citation_quality.name,
"score": round(self.citation_quality.score, 2),
"max_score": self.citation_quality.max_score,
"percentage": round(self.citation_quality.percentage, 2),
"detail": self.citation_quality.detail,
},
"competitive_position": {
"name": self.competitive_position.name,
"score": round(self.competitive_position.score, 2),
"max_score": self.competitive_position.max_score,
"percentage": round(self.competitive_position.percentage, 2),
"detail": self.competitive_position.detail,
},
},
}
# ============================================================
# V2 评分计算函数
# ============================================================
def calculate_mention_rate_v2(
mentioned_count: int,
total_queries: int,
) -> DimensionScore:
"""
计算提及率得分 (满分25)
公式: (被提及次数 / 总查询次数) * 25
Args:
mentioned_count: 品牌被提及的次数
total_queries: 总查询次数
Returns:
DimensionScore: 提及率维度评分
"""
max_score = 25.0
if total_queries <= 0:
return DimensionScore(
name="提及率",
score=0.0,
max_score=max_score,
percentage=0.0,
detail={"mentioned_count": 0, "total_queries": 0},
)
rate = mentioned_count / total_queries
score = rate * max_score
return DimensionScore(
name="提及率",
score=score,
max_score=max_score,
percentage=round(rate * 100, 2),
detail={
"mentioned_count": mentioned_count,
"total_queries": total_queries,
"rate": round(rate, 4),
},
)
def calculate_recommendation_rank_v2(
positions: list[int | None],
) -> DimensionScore:
"""
计算推荐排名得分 (满分25)
基于品牌在推荐列表中的位置,使用递减函数计算得分。
排名第1得满分排名越靠后得分越低。
公式: avg(1 / log2(position + 1)) * 25
Args:
positions: 品牌在各次查询中的排名位置列表 (1-based, None表示未出现在推荐列表)
Returns:
DimensionScore: 推荐排名维度评分
"""
max_score = 25.0
if not positions:
return DimensionScore(
name="推荐排名",
score=0.0,
max_score=max_score,
percentage=0.0,
detail={"positions": [], "avg_position": None},
)
# 过滤None值未出现在推荐列表的位置视为低排名
valid_positions = [p for p in positions if p is not None and p >= 1]
if not valid_positions:
return DimensionScore(
name="推荐排名",
score=0.0,
max_score=max_score,
percentage=0.0,
detail={
"positions": positions,
"avg_position": None,
"valid_count": 0,
},
)
# 使用对数递减函数: 1 / log2(position + 1)
# position=1 -> 1/log2(2) = 1.0
# position=2 -> 1/log2(3) = 0.63
# position=3 -> 1/log2(4) = 0.5
# position=5 -> 1/log2(6) = 0.39
# position=10 -> 1/log2(11) = 0.29
rank_scores = [1.0 / math.log2(p + 1) for p in valid_positions]
avg_rank_score = sum(rank_scores) / len(rank_scores)
# 归一化到0-1范围rank_score最大为1.0
normalized = min(1.0, avg_rank_score)
score = normalized * max_score
avg_position = sum(valid_positions) / len(valid_positions)
return DimensionScore(
name="推荐排名",
score=score,
max_score=max_score,
percentage=round(normalized * 100, 2),
detail={
"positions": valid_positions,
"avg_position": round(avg_position, 2),
"valid_count": len(valid_positions),
"total_count": len(positions),
},
)
def calculate_sentiment_score_v2(
sentiment_counts: dict[str, int],
) -> DimensionScore:
"""
计算情感倾向得分 (满分20)
公式: (positive占比 * 1 + neutral占比 * 0.5 + negative占比 * 0) * 20
Args:
sentiment_counts: {"positive": int, "neutral": int, "negative": int}
Returns:
DimensionScore: 情感倾向维度评分
"""
max_score = 20.0
positive = sentiment_counts.get("positive", 0)
neutral = sentiment_counts.get("neutral", 0)
negative = sentiment_counts.get("negative", 0)
total = positive + neutral + negative
if total <= 0:
return DimensionScore(
name="情感倾向",
score=0.0,
max_score=max_score,
percentage=0.0,
detail=sentiment_counts,
)
# 加权情感分数
sentiment_value = (positive * 1.0 + neutral * 0.5 + negative * 0.0) / total
score = sentiment_value * max_score
return DimensionScore(
name="情感倾向",
score=score,
max_score=max_score,
percentage=round(sentiment_value * 100, 2),
detail={
"positive": positive,
"neutral": neutral,
"negative": negative,
"positive_rate": round(positive / total, 4) if total > 0 else 0.0,
"negative_rate": round(negative / total, 4) if total > 0 else 0.0,
},
)
def calculate_citation_quality_v2(
citations: list[CitationResult],
) -> DimensionScore:
"""
计算引用质量得分 (满分15)
公式: (引用深度分数 * 情感修正系数) * 15
引用深度分数基于: 位置系数(0.4) + 长度系数(0.3) + 置信度系数(0.3)
情感修正系数: positive=1.0, neutral=0.7, negative=0.3
Args:
citations: 引用结果列表
Returns:
DimensionScore: 引用质量维度评分
"""
max_score = 15.0
cited_items = [c for c in citations if c.cited]
if not cited_items:
return DimensionScore(
name="引用质量",
score=0.0,
max_score=max_score,
percentage=0.0,
detail={"cited_count": 0},
)
total_quality = 0.0
for citation in cited_items:
# 位置系数
position_coef = _get_position_coefficient(citation.position)
# 长度系数
length_coef = _get_length_coefficient(citation.citation_text)
# 置信度系数 (直接使用confidence字段)
confidence_coef = citation.confidence if citation.confidence else 0.5
# 引用深度分数
depth_score = (
position_coef * 0.4
+ length_coef * 0.3
+ confidence_coef * 0.3
)
# 情感修正系数
sentiment_mod_map = {
"positive": 1.0,
"neutral": 0.7,
"negative": 0.3,
}
sentiment_mod = sentiment_mod_map.get(
citation.sentiment.lower(), 0.7
)
total_quality += depth_score * sentiment_mod
avg_quality = total_quality / len(cited_items)
score = avg_quality * max_score
return DimensionScore(
name="引用质量",
score=score,
max_score=max_score,
percentage=round(avg_quality * 100, 2),
detail={
"cited_count": len(cited_items),
"avg_depth_score": round(avg_quality, 4),
},
)
def calculate_competitive_position_v2(
brand_mentions: int,
competitor_mentions: dict[str, int],
) -> DimensionScore:
"""
计算竞品对比得分 (满分15)
公式: (领先竞品数 / 总竞品数) * 15
领先竞品: 品牌提及次数 > 竞品提及次数
Args:
brand_mentions: 品牌被提及次数
competitor_mentions: {竞品名称: 提及次数}
Returns:
DimensionScore: 竞品对比维度评分
"""
max_score = 15.0
if not competitor_mentions:
# 没有竞品数据时,给中间分数
return DimensionScore(
name="竞品对比",
score=7.5,
max_score=max_score,
percentage=50.0,
detail={
"brand_mentions": brand_mentions,
"competitor_count": 0,
"ahead_count": 0,
"behind_count": 0,
"note": "无竞品数据,使用默认中间分",
},
)
total_competitors = len(competitor_mentions)
ahead_count = sum(
1 for count in competitor_mentions.values()
if brand_mentions > count
)
behind_count = sum(
1 for count in competitor_mentions.values()
if brand_mentions <= count
)
# 领先比例
ahead_ratio = ahead_count / total_competitors if total_competitors > 0 else 0.0
# 加入SOV因子: 品牌在总提及中的占比
total_mentions = brand_mentions + sum(competitor_mentions.values())
sov = brand_mentions / total_mentions if total_mentions > 0 else 0.0
# 综合得分: 领先比例权重60%, SOV权重40%
combined = ahead_ratio * 0.6 + sov * 0.4
score = combined * max_score
return DimensionScore(
name="竞品对比",
score=score,
max_score=max_score,
percentage=round(combined * 100, 2),
detail={
"brand_mentions": brand_mentions,
"competitor_count": total_competitors,
"ahead_count": ahead_count,
"behind_count": behind_count,
"sov": round(sov, 4),
"ahead_ratio": round(ahead_ratio, 4),
},
)
def calculate_v2_score(
mentioned_count: int,
total_queries: int,
positions: list[int | None],
sentiment_counts: dict[str, int],
citations: list[CitationResult],
brand_mentions: int,
competitor_mentions: dict[str, int],
) -> ScoringResultV2:
"""
计算V2品牌可见性评分
Args:
mentioned_count: 品牌被提及的次数
total_queries: 总查询次数
positions: 品牌在各次查询中的排名位置列表
sentiment_counts: {"positive": int, "neutral": int, "negative": int}
citations: 引用结果列表
brand_mentions: 品牌被提及次数(用于竞品对比)
competitor_mentions: {竞品名称: 提及次数}
Returns:
ScoringResultV2: V2评分结果
"""
# 1. 提及率 (25分)
mention_rate = calculate_mention_rate_v2(mentioned_count, total_queries)
# 2. 推荐排名 (25分)
recommendation_rank = calculate_recommendation_rank_v2(positions)
# 3. 情感倾向 (20分)
sentiment_score = calculate_sentiment_score_v2(sentiment_counts)
# 4. 引用质量 (15分)
citation_quality = calculate_citation_quality_v2(citations)
# 5. 竞品对比 (15分)
competitive_position = calculate_competitive_position_v2(
brand_mentions, competitor_mentions
)
return ScoringResultV2(
mention_rate=mention_rate,
recommendation_rank=recommendation_rank,
sentiment_score=sentiment_score,
citation_quality=citation_quality,
competitive_position=competitive_position,
)
# ============================================================
# 健康等级工具函数
# ============================================================
def get_health_level(score: float) -> str:
"""
根据评分获取健康等级
80+ -> excellent (优秀/绿)
60-79 -> good (良好/黄)
40-59 -> pass (及格/橙)
<40 -> danger (危险/红)
"""
if score >= 80:
return "excellent"
if score >= 60:
return "good"
if score >= 40:
return "pass"
return "danger"
def get_health_level_label(level: str) -> str:
"""获取健康等级中文标签"""
labels = {
"excellent": "优秀",
"good": "良好",
"pass": "及格",
"danger": "危险",
}
return labels.get(level, "未知")
# ============================================================
# ScoringService (兼容V1接口 + V2新接口)
# ============================================================
class ScoringService:
"""评分服务兼容V1 + V2"""
# --- V1 兼容接口 ---
def calculate_mention_rate_score(self, brand_citations: int, total_queries: int) -> float:
"""V1兼容: 计算提及率得分"""
return calculate_mention_rate_score(brand_citations, total_queries)
def calculate_sov_score(self, brand_citations: int, total_citations: int) -> float:
"""V1兼容: 计算SOV得分"""
return calculate_sov_score(brand_citations, total_citations)
def calculate_quality_score(self, citations: list[CitationResult]) -> float:
"""V1兼容: 计算引用质量得分"""
return calculate_quality_score(citations)
def calculate_overall_score(
self,
mention_rate_score: float,
sov_score: float,
quality_score: float,
) -> float:
"""V1兼容: 计算综合评分"""
return calculate_overall_score(mention_rate_score, sov_score, quality_score)
# --- V2 新接口 ---
def calculate_v2(
self,
mentioned_count: int,
total_queries: int,
positions: list[int | None],
sentiment_counts: dict[str, int],
citations: list[CitationResult],
brand_mentions: int,
competitor_mentions: dict[str, int],
) -> ScoringResultV2:
"""计算V2品牌可见性评分"""
return calculate_v2_score(
mentioned_count=mentioned_count,
total_queries=total_queries,
positions=positions,
sentiment_counts=sentiment_counts,
citations=citations,
brand_mentions=brand_mentions,
competitor_mentions=competitor_mentions,
)