geo/backend/app/services/scoring_service.py

"""
评分服务V2 - 品牌可见性评分系统

评分维度（总分100）：
- 提及率 (Mention Rate): 25分 - 品牌在AI回答中被提及的频率
- 推荐排名 (Recommendation Rank): 25分 - 品牌在推荐列表中的位置
- 情感倾向 (Sentiment Score): 20分 - AI对品牌的情感倾向
- 引用质量 (Citation Quality): 15分 - 引用内容的深度和正面性
- 竞品对比 (Competitive Position): 15分 - 相对于竞品的表现
"""
from __future__ import annotations

import logging
import math
from dataclasses import dataclass, field

from app.schemas.scoring import CitationResult

logger = logging.getLogger(__name__)


# ============================================================
# V1 兼容函数（保留旧接口，供向后兼容）
# ============================================================

def calculate_mention_rate_score(brand_citations: int, total_queries: int) -> float:
    """V1兼容: 计算提及率得分"""
    if total_queries <= 0:
        return 0.0
    return (brand_citations / total_queries) * 100


def calculate_sov_score(brand_citations: int, total_citations: int) -> float:
    """V1兼容: 计算SOV得分"""
    if total_citations <= 0:
        return 0.0
    return (brand_citations / total_citations) * 100


def _get_sentiment_coefficient(sentiment: str) -> float:
    """V1兼容: 获取情感系数"""
    sentiment_map = {
        "positive": 1.0,
        "neutral": 0.6,
        "negative": 0.2,
    }
    return sentiment_map.get(sentiment.lower(), 0.6)


def _get_position_coefficient(position: int | None) -> float:
    """V1兼容: 获取位置系数"""
    if position is None:
        return 0.4
    if position == 1:
        return 1.0
    if position <= 3:
        return 0.8
    if position <= 5:
        return 0.6
    return 0.4


def _get_length_coefficient(citation_text: str | None) -> float:
    """V1兼容: 获取长度系数"""
    if not citation_text:
        return 0.6
    text_length = len(citation_text)
    if text_length > 100:
        return 1.0
    if text_length >= 50:
        return 0.8
    return 0.6


def calculate_quality_score(citations: list[CitationResult]) -> float:
    """V1兼容: 计算引用质量得分"""
    if not citations:
        return 0.0

    total_score = 0.0
    for citation in citations:
        if not citation.cited:
            continue
        sentiment_coef = _get_sentiment_coefficient(citation.sentiment)
        position_coef = _get_position_coefficient(citation.position)
        length_coef = _get_length_coefficient(citation.citation_text)
        quality = sentiment_coef * 0.4 + position_coef * 0.3 + length_coef * 0.3
        total_score += quality

    cited_count = sum(1 for c in citations if c.cited)
    if cited_count == 0:
        return 0.0
    return (total_score / cited_count) * 100


def calculate_overall_score(
    mention_rate_score: float,
    sov_score: float,
    quality_score: float,
) -> float:
    """V1兼容: 计算综合评分"""
    overall = (
        mention_rate_score * 0.3
        + sov_score * 0.4
        + quality_score * 0.3
    )
    return min(100.0, max(0.0, overall))


# ============================================================
# V2 评分数据结构
# ============================================================

@dataclass
class DimensionScore:
    """单个维度的评分详情"""
    name: str           # 维度名称
    score: float        # 该维度得分 (0-max_score)
    max_score: float    # 该维度满分
    percentage: float   # 得分率 (0-100)
    detail: dict = field(default_factory=dict)  # 评分细节


@dataclass
class ScoringResultV2:
    """V2评分结果"""
    # 五维度得分
    mention_rate: DimensionScore       # 提及率 (25分)
    recommendation_rank: DimensionScore  # 推荐排名 (25分)
    sentiment_score: DimensionScore    # 情感倾向 (20分)
    citation_quality: DimensionScore   # 引用质量 (15分)
    competitive_position: DimensionScore  # 竞品对比 (15分)

    # 总分
    overall_score: float = 0.0

    # 健康等级
    health_level: str = "danger"  # excellent/good/pass/danger

    def __post_init__(self):
        """计算总分和健康等级"""
        self.overall_score = (
            self.mention_rate.score
            + self.recommendation_rank.score
            + self.sentiment_score.score
            + self.citation_quality.score
            + self.competitive_position.score
        )
        self.overall_score = round(min(100.0, max(0.0, self.overall_score)), 2)

        if self.overall_score >= 80:
            self.health_level = "excellent"
        elif self.overall_score >= 60:
            self.health_level = "good"
        elif self.overall_score >= 40:
            self.health_level = "pass"
        else:
            self.health_level = "danger"

    def to_dict(self) -> dict:
        """转换为字典格式"""
        return {
            "overall_score": self.overall_score,
            "health_level": self.health_level,
            "dimensions": {
                "mention_rate": {
                    "name": self.mention_rate.name,
                    "score": round(self.mention_rate.score, 2),
                    "max_score": self.mention_rate.max_score,
                    "percentage": round(self.mention_rate.percentage, 2),
                    "detail": self.mention_rate.detail,
                },
                "recommendation_rank": {
                    "name": self.recommendation_rank.name,
                    "score": round(self.recommendation_rank.score, 2),
                    "max_score": self.recommendation_rank.max_score,
                    "percentage": round(self.recommendation_rank.percentage, 2),
                    "detail": self.recommendation_rank.detail,
                },
                "sentiment_score": {
                    "name": self.sentiment_score.name,
                    "score": round(self.sentiment_score.score, 2),
                    "max_score": self.sentiment_score.max_score,
                    "percentage": round(self.sentiment_score.percentage, 2),
                    "detail": self.sentiment_score.detail,
                },
                "citation_quality": {
                    "name": self.citation_quality.name,
                    "score": round(self.citation_quality.score, 2),
                    "max_score": self.citation_quality.max_score,
                    "percentage": round(self.citation_quality.percentage, 2),
                    "detail": self.citation_quality.detail,
                },
                "competitive_position": {
                    "name": self.competitive_position.name,
                    "score": round(self.competitive_position.score, 2),
                    "max_score": self.competitive_position.max_score,
                    "percentage": round(self.competitive_position.percentage, 2),
                    "detail": self.competitive_position.detail,
                },
            },
        }


# ============================================================
# V2 评分计算函数
# ============================================================

def calculate_mention_rate_v2(
    mentioned_count: int,
    total_queries: int,
) -> DimensionScore:
    """
    计算提及率得分 (满分25)

    公式: (被提及次数 / 总查询次数) * 25

    Args:
        mentioned_count: 品牌被提及的次数
        total_queries: 总查询次数

    Returns:
        DimensionScore: 提及率维度评分
    """
    max_score = 25.0
    if total_queries <= 0:
        return DimensionScore(
            name="提及率",
            score=0.0,
            max_score=max_score,
            percentage=0.0,
            detail={"mentioned_count": 0, "total_queries": 0},
        )

    rate = mentioned_count / total_queries
    score = rate * max_score

    return DimensionScore(
        name="提及率",
        score=score,
        max_score=max_score,
        percentage=round(rate * 100, 2),
        detail={
            "mentioned_count": mentioned_count,
            "total_queries": total_queries,
            "rate": round(rate, 4),
        },
    )


def calculate_recommendation_rank_v2(
    positions: list[int | None],
) -> DimensionScore:
    """
    计算推荐排名得分 (满分25)

    基于品牌在推荐列表中的位置，使用递减函数计算得分。
    排名第1得满分，排名越靠后得分越低。

    公式: avg(1 / log2(position + 1)) * 25

    Args:
        positions: 品牌在各次查询中的排名位置列表 (1-based, None表示未出现在推荐列表)

    Returns:
        DimensionScore: 推荐排名维度评分
    """
    max_score = 25.0

    if not positions:
        return DimensionScore(
            name="推荐排名",
            score=0.0,
            max_score=max_score,
            percentage=0.0,
            detail={"positions": [], "avg_position": None},
        )

    # 过滤None值（未出现在推荐列表的位置视为低排名）
    valid_positions = [p for p in positions if p is not None and p >= 1]

    if not valid_positions:
        return DimensionScore(
            name="推荐排名",
            score=0.0,
            max_score=max_score,
            percentage=0.0,
            detail={
                "positions": positions,
                "avg_position": None,
                "valid_count": 0,
            },
        )

    # 使用对数递减函数: 1 / log2(position + 1)
    # position=1 -> 1/log2(2) = 1.0
    # position=2 -> 1/log2(3) = 0.63
    # position=3 -> 1/log2(4) = 0.5
    # position=5 -> 1/log2(6) = 0.39
    # position=10 -> 1/log2(11) = 0.29
    rank_scores = [1.0 / math.log2(p + 1) for p in valid_positions]
    avg_rank_score = sum(rank_scores) / len(rank_scores)

    # 归一化到0-1范围（rank_score最大为1.0）
    normalized = min(1.0, avg_rank_score)
    score = normalized * max_score

    avg_position = sum(valid_positions) / len(valid_positions)

    return DimensionScore(
        name="推荐排名",
        score=score,
        max_score=max_score,
        percentage=round(normalized * 100, 2),
        detail={
            "positions": valid_positions,
            "avg_position": round(avg_position, 2),
            "valid_count": len(valid_positions),
            "total_count": len(positions),
        },
    )


def calculate_sentiment_score_v2(
    sentiment_counts: dict[str, int],
) -> DimensionScore:
    """
    计算情感倾向得分 (满分20)

    公式: (positive占比 * 1 + neutral占比 * 0.5 + negative占比 * 0) * 20

    Args:
        sentiment_counts: {"positive": int, "neutral": int, "negative": int}

    Returns:
        DimensionScore: 情感倾向维度评分
    """
    max_score = 20.0

    positive = sentiment_counts.get("positive", 0)
    neutral = sentiment_counts.get("neutral", 0)
    negative = sentiment_counts.get("negative", 0)
    total = positive + neutral + negative

    if total <= 0:
        return DimensionScore(
            name="情感倾向",
            score=0.0,
            max_score=max_score,
            percentage=0.0,
            detail=sentiment_counts,
        )

    # 加权情感分数
    sentiment_value = (positive * 1.0 + neutral * 0.5 + negative * 0.0) / total
    score = sentiment_value * max_score

    return DimensionScore(
        name="情感倾向",
        score=score,
        max_score=max_score,
        percentage=round(sentiment_value * 100, 2),
        detail={
            "positive": positive,
            "neutral": neutral,
            "negative": negative,
            "positive_rate": round(positive / total, 4) if total > 0 else 0.0,
            "negative_rate": round(negative / total, 4) if total > 0 else 0.0,
        },
    )


def calculate_citation_quality_v2(
    citations: list[CitationResult],
) -> DimensionScore:
    """
    计算引用质量得分 (满分15)

    公式: (引用深度分数 * 情感修正系数) * 15

    引用深度分数基于: 位置系数(0.4) + 长度系数(0.3) + 置信度系数(0.3)
    情感修正系数: positive=1.0, neutral=0.7, negative=0.3

    Args:
        citations: 引用结果列表

    Returns:
        DimensionScore: 引用质量维度评分
    """
    max_score = 15.0

    cited_items = [c for c in citations if c.cited]
    if not cited_items:
        return DimensionScore(
            name="引用质量",
            score=0.0,
            max_score=max_score,
            percentage=0.0,
            detail={"cited_count": 0},
        )

    total_quality = 0.0
    for citation in cited_items:
        # 位置系数
        position_coef = _get_position_coefficient(citation.position)

        # 长度系数
        length_coef = _get_length_coefficient(citation.citation_text)

        # 置信度系数 (直接使用confidence字段)
        confidence_coef = citation.confidence if citation.confidence else 0.5

        # 引用深度分数
        depth_score = (
            position_coef * 0.4
            + length_coef * 0.3
            + confidence_coef * 0.3
        )

        # 情感修正系数
        sentiment_mod_map = {
            "positive": 1.0,
            "neutral": 0.7,
            "negative": 0.3,
        }
        sentiment_mod = sentiment_mod_map.get(
            citation.sentiment.lower(), 0.7
        )

        total_quality += depth_score * sentiment_mod

    avg_quality = total_quality / len(cited_items)
    score = avg_quality * max_score

    return DimensionScore(
        name="引用质量",
        score=score,
        max_score=max_score,
        percentage=round(avg_quality * 100, 2),
        detail={
            "cited_count": len(cited_items),
            "avg_depth_score": round(avg_quality, 4),
        },
    )


def calculate_competitive_position_v2(
    brand_mentions: int,
    competitor_mentions: dict[str, int],
) -> DimensionScore:
    """
    计算竞品对比得分 (满分15)

    公式: (领先竞品数 / 总竞品数) * 15

    领先竞品: 品牌提及次数 > 竞品提及次数

    Args:
        brand_mentions: 品牌被提及次数
        competitor_mentions: {竞品名称: 提及次数}

    Returns:
        DimensionScore: 竞品对比维度评分
    """
    max_score = 15.0

    if not competitor_mentions:
        # 没有竞品数据时，给中间分数
        return DimensionScore(
            name="竞品对比",
            score=7.5,
            max_score=max_score,
            percentage=50.0,
            detail={
                "brand_mentions": brand_mentions,
                "competitor_count": 0,
                "ahead_count": 0,
                "behind_count": 0,
                "note": "无竞品数据，使用默认中间分",
            },
        )

    total_competitors = len(competitor_mentions)
    ahead_count = sum(
        1 for count in competitor_mentions.values()
        if brand_mentions > count
    )
    behind_count = sum(
        1 for count in competitor_mentions.values()
        if brand_mentions <= count
    )

    # 领先比例
    ahead_ratio = ahead_count / total_competitors if total_competitors > 0 else 0.0

    # 加入SOV因子: 品牌在总提及中的占比
    total_mentions = brand_mentions + sum(competitor_mentions.values())
    sov = brand_mentions / total_mentions if total_mentions > 0 else 0.0

    # 综合得分: 领先比例权重60%, SOV权重40%
    combined = ahead_ratio * 0.6 + sov * 0.4
    score = combined * max_score

    return DimensionScore(
        name="竞品对比",
        score=score,
        max_score=max_score,
        percentage=round(combined * 100, 2),
        detail={
            "brand_mentions": brand_mentions,
            "competitor_count": total_competitors,
            "ahead_count": ahead_count,
            "behind_count": behind_count,
            "sov": round(sov, 4),
            "ahead_ratio": round(ahead_ratio, 4),
        },
    )


def calculate_v2_score(
    mentioned_count: int,
    total_queries: int,
    positions: list[int | None],
    sentiment_counts: dict[str, int],
    citations: list[CitationResult],
    brand_mentions: int,
    competitor_mentions: dict[str, int],
) -> ScoringResultV2:
    """
    计算V2品牌可见性评分

    Args:
        mentioned_count: 品牌被提及的次数
        total_queries: 总查询次数
        positions: 品牌在各次查询中的排名位置列表
        sentiment_counts: {"positive": int, "neutral": int, "negative": int}
        citations: 引用结果列表
        brand_mentions: 品牌被提及次数（用于竞品对比）
        competitor_mentions: {竞品名称: 提及次数}

    Returns:
        ScoringResultV2: V2评分结果
    """
    # 1. 提及率 (25分)
    mention_rate = calculate_mention_rate_v2(mentioned_count, total_queries)

    # 2. 推荐排名 (25分)
    recommendation_rank = calculate_recommendation_rank_v2(positions)

    # 3. 情感倾向 (20分)
    sentiment_score = calculate_sentiment_score_v2(sentiment_counts)

    # 4. 引用质量 (15分)
    citation_quality = calculate_citation_quality_v2(citations)

    # 5. 竞品对比 (15分)
    competitive_position = calculate_competitive_position_v2(
        brand_mentions, competitor_mentions
    )

    return ScoringResultV2(
        mention_rate=mention_rate,
        recommendation_rank=recommendation_rank,
        sentiment_score=sentiment_score,
        citation_quality=citation_quality,
        competitive_position=competitive_position,
    )


# ============================================================
# 健康等级工具函数
# ============================================================

def get_health_level(score: float) -> str:
    """
    根据评分获取健康等级

    80+  -> excellent (优秀/绿)
    60-79 -> good (良好/黄)
    40-59 -> pass (及格/橙)
    <40  -> danger (危险/红)
    """
    if score >= 80:
        return "excellent"
    if score >= 60:
        return "good"
    if score >= 40:
        return "pass"
    return "danger"


def get_health_level_label(level: str) -> str:
    """获取健康等级中文标签"""
    labels = {
        "excellent": "优秀",
        "good": "良好",
        "pass": "及格",
        "danger": "危险",
    }
    return labels.get(level, "未知")


# ============================================================
# ScoringService (兼容V1接口 + V2新接口)
# ============================================================

class ScoringService:
    """评分服务（兼容V1 + V2）"""

    # --- V1 兼容接口 ---

    def calculate_mention_rate_score(self, brand_citations: int, total_queries: int) -> float:
        """V1兼容: 计算提及率得分"""
        return calculate_mention_rate_score(brand_citations, total_queries)

    def calculate_sov_score(self, brand_citations: int, total_citations: int) -> float:
        """V1兼容: 计算SOV得分"""
        return calculate_sov_score(brand_citations, total_citations)

    def calculate_quality_score(self, citations: list[CitationResult]) -> float:
        """V1兼容: 计算引用质量得分"""
        return calculate_quality_score(citations)

    def calculate_overall_score(
        self,
        mention_rate_score: float,
        sov_score: float,
        quality_score: float,
    ) -> float:
        """V1兼容: 计算综合评分"""
        return calculate_overall_score(mention_rate_score, sov_score, quality_score)

    # --- V2 新接口 ---

    def calculate_v2(
        self,
        mentioned_count: int,
        total_queries: int,
        positions: list[int | None],
        sentiment_counts: dict[str, int],
        citations: list[CitationResult],
        brand_mentions: int,
        competitor_mentions: dict[str, int],
    ) -> ScoringResultV2:
        """计算V2品牌可见性评分"""
        return calculate_v2_score(
            mentioned_count=mentioned_count,
            total_queries=total_queries,
            positions=positions,
            sentiment_counts=sentiment_counts,
            citations=citations,
            brand_mentions=brand_mentions,
            competitor_mentions=competitor_mentions,
        )