geo/backend/app/services/advisor/optimization_advisor.py

"""
优化建议引擎 - 基于品牌数据差距提供可操作的优化建议

建议类型:
- content_optimization: 内容优化建议（如何让AI更容易引用你的内容）
- platform_targeting: 平台定向建议（哪些平台需要重点优化）
- competitor_gap: 竞品差距建议（如何缩小与竞品的差距）
- query_expansion: 查询词扩展建议（应该添加哪些查询词）
- citation_improvement: 引用改善建议（如何增加被引用的概率）

生成逻辑:
1. 基于评分V2的五维度数据，找出最弱的维度
2. 基于竞品对比数据，找出差距最大的方面
3. 基于情感分析数据，找出负面情感的来源
4. 使用DeepSeek API生成个性化建议（ENABLE_LLM控制开关）
5. 无LLM时使用基于规则的模板建议
"""
from __future__ import annotations

import asyncio
import json
import logging
import uuid
from dataclasses import dataclass, field
from typing import Any

from app.config import settings
from app.services.scoring.scoring_service import ScoringResultV2
from app.utils.json_extractor import extract_json

logger = logging.getLogger(__name__)


# ============================================================
# 建议数据结构
# ============================================================

@dataclass
class SuggestionItem:
    """单条优化建议"""
    type: str           # content_optimization/platform_targeting/competitor_gap/query_expansion/citation_improvement
    priority: str       # high/medium/low
    title: str          # 建议标题
    description: str    # 详细描述
    action: str         # 具体操作步骤
    expected_impact: str  # 预期效果
    difficulty: str     # easy/medium/hard


@dataclass
class BrandAnalysisContext:
    """品牌分析上下文 - 用于生成建议的输入数据"""
    brand_name: str
    overall_score: float
    # 五维度评分
    mention_rate_score: float = 0.0
    mention_rate_max: float = 25.0
    mention_rate_percentage: float = 0.0
    rank_score: float = 0.0
    rank_max: float = 25.0
    rank_percentage: float = 0.0
    sentiment_score: float = 0.0
    sentiment_max: float = 20.0
    sentiment_percentage: float = 0.0
    citation_score: float = 0.0
    citation_max: float = 15.0
    citation_percentage: float = 0.0
    competitive_score: float = 0.0
    competitive_max: float = 15.0
    competitive_percentage: float = 0.0
    # 竞品对比数据
    competitor_data: dict[str, Any] = field(default_factory=dict)
    # 情感分析数据
    sentiment_data: dict[str, int] = field(default_factory=dict)
    # 平台评分数据
    platform_scores: dict[str, float] = field(default_factory=dict)
    # 查询词数据
    total_queries: int = 0
    mentioned_count: int = 0


# ============================================================
# 基于规则的建议生成器
# ============================================================

def _get_weakest_dimensions(ctx: BrandAnalysisContext) -> list[tuple[str, float, float]]:
    """
    找出最弱的维度，按得分率升序排列。

    Returns:
        [(维度名, 得分率, 得分), ...]
    """
    dimensions = [
        ("提及率", ctx.mention_rate_percentage, ctx.mention_rate_score),
        ("推荐排名", ctx.rank_percentage, ctx.rank_score),
        ("情感倾向", ctx.sentiment_percentage, ctx.sentiment_score),
        ("引用质量", ctx.citation_percentage, ctx.citation_score),
        ("竞品对比", ctx.competitive_percentage, ctx.competitive_score),
    ]
    return sorted(dimensions, key=lambda x: x[1])


def _generate_content_optimization_suggestions(
    ctx: BrandAnalysisContext,
) -> list[SuggestionItem]:
    """生成内容优化建议"""
    suggestions: list[SuggestionItem] = []

    # 提及率低 -> 内容优化
    if ctx.mention_rate_percentage < 50:
        suggestions.append(SuggestionItem(
            type="content_optimization",
            priority="high",
            title="提升品牌内容在AI平台的可见性",
            description=(
                f"当前提及率仅{ctx.mention_rate_percentage:.0f}%，"
                f"品牌在AI回答中被提及的频率较低。"
                f"AI平台倾向于引用结构化、权威性强的内容。"
            ),
            action=(
                "1. 在官网和核心页面添加FAQ结构化数据(Schema.org FAQPage)\n"
                "2. 创建行业白皮书和深度分析文章，使用清晰的标题层级\n"
                "3. 确保品牌在维基百科、行业百科等权威来源有准确条目\n"
                "4. 优化内容中的品牌名称一致性，避免过多别名导致AI无法识别"
            ),
            expected_impact="预计可将提及率提升15-25个百分点",
            difficulty="medium",
        ))
    elif ctx.mention_rate_percentage < 75:
        suggestions.append(SuggestionItem(
            type="content_optimization",
            priority="medium",
            title="持续优化品牌内容结构",
            description=(
                f"当前提及率为{ctx.mention_rate_percentage:.0f}%，仍有提升空间。"
                f"建议进一步优化内容结构以提高AI引用概率。"
            ),
            action=(
                "1. 增加品牌相关的高质量长文内容（2000字以上）\n"
                "2. 在内容中增加数据支撑和案例引用\n"
                "3. 定期更新内容以保持时效性"
            ),
            expected_impact="预计可将提及率提升5-15个百分点",
            difficulty="easy",
        ))

    return suggestions


def _generate_platform_targeting_suggestions(
    ctx: BrandAnalysisContext,
) -> list[SuggestionItem]:
    """生成平台定向建议"""
    suggestions: list[SuggestionItem] = []

    # 找出评分最低的平台
    if ctx.platform_scores:
        weak_platforms = sorted(
            ctx.platform_scores.items(),
            key=lambda x: x[1],
        )[:3]
        weak_platform_names = [p[0] for p in weak_platforms if p[1] < 40]
        weak_platform_str = "、".join(weak_platform_names) if weak_platform_names else ""

        if weak_platform_str:
            suggestions.append(SuggestionItem(
                type="platform_targeting",
                priority="high",
                title=f"重点优化{weak_platform_str}平台表现",
                description=(
                    f"在这些平台上品牌评分低于40分，"
                    f"AI引用率极低。不同AI平台有不同的内容偏好，"
                    f"需要针对性优化。"
                ),
                action=(
                    f"1. 分析{weak_platform_str}平台的内容偏好和引用模式\n"
                    "2. 针对各平台优化内容格式和表达方式\n"
                    "3. 增加在这些平台关联的内容源数量\n"
                    "4. 关注平台算法更新，及时调整优化策略"
                ),
                expected_impact="预计可将弱平台评分提升20-30分",
                difficulty="hard",
            ))

    # 如果没有平台数据
    if not ctx.platform_scores or all(v == 0 for v in ctx.platform_scores.values()):
        suggestions.append(SuggestionItem(
            type="platform_targeting",
            priority="high",
            title="启动全平台品牌监控",
            description=(
                "当前没有任何平台的引用数据。"
                "需要先在各AI平台建立品牌存在感。"
            ),
            action=(
                "1. 确保已添加足够的查询词覆盖核心业务关键词\n"
                "2. 等待系统完成首轮数据采集（通常需要1-2天）\n"
                "3. 采集完成后查看各平台评分，确定优先优化方向"
            ),
            expected_impact="获取全平台基准数据，为后续优化提供方向",
            difficulty="easy",
        ))

    return suggestions


def _generate_competitor_gap_suggestions(
    ctx: BrandAnalysisContext,
) -> list[SuggestionItem]:
    """生成竞品差距建议"""
    suggestions: list[SuggestionItem] = []

    # 竞品对比维度弱
    if ctx.competitive_percentage < 40:
        # 找出领先的品牌
        ahead_competitors = []
        behind_competitors = []
        if ctx.competitor_data:
            brand_mentions = ctx.competitor_data.get("brand_mentions", 0)
            for name, count in ctx.competitor_data.get("competitor_mentions", {}).items():
                if count > brand_mentions:
                    ahead_competitors.append((name, count))
                else:
                    behind_competitors.append((name, count))

        ahead_str = "、".join([n for n, _ in ahead_competitors[:3]]) if ahead_competitors else "竞品"

        suggestions.append(SuggestionItem(
            type="competitor_gap",
            priority="high",
            title=f"缩小与{ahead_str}的差距",
            description=(
                f"当前竞品对比得分率仅{ctx.competitive_percentage:.0f}%，"
                f"品牌在AI引用中落后于主要竞品。"
                f"需要分析竞品的优势领域并制定追赶策略。"
            ),
            action=(
                "1. 分析竞品在AI平台被引用的内容类型和话题\n"
                "2. 找出竞品有而品牌缺失的内容领域\n"
                "3. 针对性创建竞品优势领域的优质内容\n"
                "4. 加强品牌差异化定位，突出独特价值主张"
            ),
            expected_impact="预计3-6个月内可将竞品对比得分率提升15-25个百分点",
            difficulty="hard",
        ))
    elif ctx.competitive_percentage < 70:
        suggestions.append(SuggestionItem(
            type="competitor_gap",
            priority="medium",
            title="持续巩固竞争优势",
            description=(
                f"当前竞品对比得分率为{ctx.competitive_percentage:.0f}%，"
                f"品牌处于中等水平，需要持续巩固并扩大优势。"
            ),
            action=(
                "1. 定期监控竞品动态和内容更新\n"
                "2. 在品牌优势领域持续输出高质量内容\n"
                "3. 关注新兴话题和趋势，抢占先机"
            ),
            expected_impact="预计可将竞品对比得分率提升10-15个百分点",
            difficulty="medium",
        ))

    return suggestions


def _generate_query_expansion_suggestions(
    ctx: BrandAnalysisContext,
) -> list[SuggestionItem]:
    """生成查询词扩展建议"""
    suggestions: list[SuggestionItem] = []

    # 查询词数量不足
    if ctx.total_queries < 10:
        suggestions.append(SuggestionItem(
            type="query_expansion",
            priority="high" if ctx.total_queries < 3 else "medium",
            title="扩展查询词覆盖范围",
            description=(
                f"当前仅有{ctx.total_queries}个查询词，"
                f"覆盖范围不足，无法全面反映品牌在AI搜索中的表现。"
                f"更多查询词意味着更全面的品牌认知度画像。"
            ),
            action=(
                "1. 添加行业核心关键词（如：'XX行业推荐'、'XX解决方案'）\n"
                "2. 添加品牌相关长尾词（如：'XX品牌怎么样'、'XX vs 竞品'）\n"
                "3. 添加场景化查询词（如：'XX场景下选什么产品'）\n"
                "4. 建议至少添加10-20个查询词以获得可靠的分析结果"
            ),
            expected_impact="更多查询词可提升评分准确度，发现更多优化机会",
            difficulty="easy",
        ))

    # 提及率低且查询词不少 -> 可能需要优化查询词质量
    elif ctx.total_queries >= 10 and ctx.mention_rate_percentage < 50:
        suggestions.append(SuggestionItem(
            type="query_expansion",
            priority="medium",
            title="优化查询词质量和相关性",
            description=(
                f"已有{ctx.total_queries}个查询词，但提及率仅{ctx.mention_rate_percentage:.0f}%。"
                f"可能是查询词与品牌核心业务关联度不够。"
            ),
            action=(
                "1. 检查现有查询词是否覆盖品牌核心业务场景\n"
                "2. 添加品牌擅长的专业领域相关查询词\n"
                "3. 删除与品牌无关的宽泛查询词\n"
                "4. 增加品牌独特卖点和差异化优势相关的查询词"
            ),
            expected_impact="优化查询词后可提升提及率10-20个百分点",
            difficulty="easy",
        ))

    return suggestions


def _generate_citation_improvement_suggestions(
    ctx: BrandAnalysisContext,
) -> list[SuggestionItem]:
    """生成引用改善建议"""
    suggestions: list[SuggestionItem] = []

    # 引用质量低
    if ctx.citation_percentage < 40:
        suggestions.append(SuggestionItem(
            type="citation_improvement",
            priority="high",
            title="提升AI引用内容的质量和深度",
            description=(
                f"当前引用质量得分率仅{ctx.citation_percentage:.0f}%，"
                f"AI对品牌的引用多为浅层提及，缺乏深度正面描述。"
                f"高质量引用能显著影响用户决策。"
            ),
            action=(
                "1. 创建详细的产品对比页面，包含数据表格和评测结论\n"
                "2. 发布客户案例和成功故事，提供具体数据支撑\n"
                "3. 在内容中增加可引用的数据点和统计信息\n"
                "4. 优化内容结构，使用AI易于提取的格式（列表、表格、要点）"
            ),
            expected_impact="预计可将引用质量得分率提升15-25个百分点",
            difficulty="medium",
        ))

    # 推荐排名低
    if ctx.rank_percentage < 40:
        suggestions.append(SuggestionItem(
            type="citation_improvement",
            priority="high",
            title="提升品牌在AI推荐中的排名位置",
            description=(
                f"当前推荐排名得分率仅{ctx.rank_percentage:.0f}%，"
                f"品牌在AI推荐列表中排名靠后，用户看到概率低。"
                f"排名越靠前，被用户选择的可能性越大。"
            ),
            action=(
                "1. 增加品牌在权威第三方平台的正面评价和推荐\n"
                "2. 优化品牌在行业榜单和评测中的排名\n"
                "3. 创建'最佳XX'、'XX推荐'类内容，增加被推荐概率\n"
                "4. 提升品牌官网的SEO表现，增加AI爬取到的概率"
            ),
            expected_impact="预计可将推荐排名提升2-3位",
            difficulty="medium",
        ))

    # 情感倾向差
    negative_rate = 0.0
    total_sentiment = sum(ctx.sentiment_data.values())
    if total_sentiment > 0:
        negative_rate = ctx.sentiment_data.get("negative", 0) / total_sentiment

    if negative_rate > 0.3:
        suggestions.append(SuggestionItem(
            type="citation_improvement",
            priority="high",
            title="改善AI平台对品牌的负面评价",
            description=(
                f"当前负面评价占比{negative_rate:.0%}，"
                f"AI在引用品牌时倾向使用负面表述。"
                f"负面引用会严重影响用户对品牌的印象。"
            ),
            action=(
                "1. 分析负面引用的具体内容，找出主要批评点\n"
                "2. 针对性改进产品或服务，解决用户痛点\n"
                "3. 主动发布正面内容，稀释负面信息的影响\n"
                "4. 在官方渠道积极回应用户反馈和投诉"
            ),
            expected_impact="减少负面引用比例10-20个百分点",
            difficulty="hard",
        ))

    return suggestions


def generate_rule_based_suggestions(
    ctx: BrandAnalysisContext,
) -> list[SuggestionItem]:
    """
    基于规则生成优化建议（不依赖LLM）

    分析品牌数据差距，按照优先级生成可操作的建议。
    """
    all_suggestions: list[SuggestionItem] = []

    # 收集各类建议
    all_suggestions.extend(_generate_content_optimization_suggestions(ctx))
    all_suggestions.extend(_generate_platform_targeting_suggestions(ctx))
    all_suggestions.extend(_generate_competitor_gap_suggestions(ctx))
    all_suggestions.extend(_generate_query_expansion_suggestions(ctx))
    all_suggestions.extend(_generate_citation_improvement_suggestions(ctx))

    # 按优先级排序: high > medium > low
    priority_order = {"high": 0, "medium": 1, "low": 2}
    all_suggestions.sort(key=lambda s: priority_order.get(s.priority, 1))

    # 限制最多5条建议
    return all_suggestions[:5]


# ============================================================
# LLM 建议生成器
# ============================================================

OPTIMIZATION_PROMPT = """你是一个GEO（生成式引擎优化）专家。基于以下品牌数据，提供3-5条可操作的优化建议。

品牌: {brand_name}
当前评分: {overall_score}/100
评分维度:
- 提及率: {mention_rate_score}/{mention_rate_max} ({mention_rate_percentage}%)
- 推荐排名: {rank_score}/{rank_max} ({rank_percentage}%)
- 情感倾向: {sentiment_score}/{sentiment_max} ({sentiment_percentage}%)
- 引用质量: {citation_score}/{citation_max} ({citation_percentage}%)
- 竞品对比: {competitive_score}/{competitive_max} ({competitive_percentage}%)

竞品对比数据:
{competitor_data_str}

情感分析:
{sentiment_data_str}

平台评分:
{platform_data_str}

请返回JSON格式:
{{
  "suggestions": [
    {{
      "type": "content_optimization" | "platform_targeting" | "competitor_gap" | "query_expansion" | "citation_improvement",
      "priority": "high" | "medium" | "low",
      "title": "建议标题",
      "description": "详细描述",
      "action": "具体操作步骤",
      "expected_impact": "预期效果",
      "difficulty": "easy" | "medium" | "hard"
    }}
  ]
}}

要求:
1. 每条建议必须基于数据，指出具体的差距和改进方向
2. 优先关注最弱的维度
3. 建议必须可操作，包含具体步骤
4. 预期效果要量化
5. 返回3-5条建议，按优先级从高到低排列
"""


async def generate_llm_suggestions(
    ctx: BrandAnalysisContext,
) -> list[SuggestionItem]:
    """
    使用DeepSeek LLM生成个性化优化建议

    如果LLM不可用或调用失败，回退到规则生成。
    """
    if not settings.ENABLE_LLM or not settings.DEEPSEEK_API_KEY:
        logger.info("LLM未启用或API Key未配置，使用规则生成建议")
        return generate_rule_based_suggestions(ctx)

    try:
        # 构建prompt
        competitor_data_str = json.dumps(ctx.competitor_data, ensure_ascii=False, indent=2)
        sentiment_data_str = json.dumps(ctx.sentiment_data, ensure_ascii=False, indent=2)
        platform_data_str = json.dumps(ctx.platform_scores, ensure_ascii=False, indent=2)

        prompt = OPTIMIZATION_PROMPT.format(
            brand_name=ctx.brand_name,
            overall_score=ctx.overall_score,
            mention_rate_score=round(ctx.mention_rate_score, 2),
            mention_rate_max=ctx.mention_rate_max,
            mention_rate_percentage=round(ctx.mention_rate_percentage, 1),
            rank_score=round(ctx.rank_score, 2),
            rank_max=ctx.rank_max,
            rank_percentage=round(ctx.rank_percentage, 1),
            sentiment_score=round(ctx.sentiment_score, 2),
            sentiment_max=ctx.sentiment_max,
            sentiment_percentage=round(ctx.sentiment_percentage, 1),
            citation_score=round(ctx.citation_score, 2),
            citation_max=ctx.citation_max,
            citation_percentage=round(ctx.citation_percentage, 1),
            competitive_score=round(ctx.competitive_score, 2),
            competitive_max=ctx.competitive_max,
            competitive_percentage=round(ctx.competitive_percentage, 1),
            competitor_data_str=competitor_data_str,
            sentiment_data_str=sentiment_data_str,
            platform_data_str=platform_data_str,
        )

        # 调用DeepSeek API
        from openai import OpenAI

        client = OpenAI(
            api_key=settings.DEEPSEEK_API_KEY,
            base_url="https://api.deepseek.com",
        )

        response = await asyncio.to_thread(
            client.chat.completions.create,
            model="deepseek-chat",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3,
            max_tokens=2000,
        )

        content = response.choices[0].message.content
        if not content:
            raise ValueError("LLM返回空响应")

        # 提取JSON
        json_str = extract_json(content)
        result = json.loads(json_str)

        # 解析建议
        suggestions: list[SuggestionItem] = []
        for item in result.get("suggestions", []):
            # 验证type字段
            valid_types = {
                "content_optimization", "platform_targeting",
                "competitor_gap", "query_expansion", "citation_improvement",
            }
            suggestion_type = item.get("type", "content_optimization")
            if suggestion_type not in valid_types:
                suggestion_type = "content_optimization"

            # 验证priority字段
            valid_priorities = {"high", "medium", "low"}
            priority = item.get("priority", "medium")
            if priority not in valid_priorities:
                priority = "medium"

            # 验证difficulty字段
            valid_difficulties = {"easy", "medium", "hard"}
            difficulty = item.get("difficulty", "medium")
            if difficulty not in valid_difficulties:
                difficulty = "medium"

            suggestions.append(SuggestionItem(
                type=suggestion_type,
                priority=priority,
                title=item.get("title", "优化建议"),
                description=item.get("description", ""),
                action=item.get("action", ""),
                expected_impact=item.get("expected_impact", ""),
                difficulty=difficulty,
            ))

        if not suggestions:
            logger.warning("LLM未返回有效建议，回退到规则生成")
            return generate_rule_based_suggestions(ctx)

        return suggestions[:5]

    except Exception as e:
        logger.error(f"LLM生成建议失败: {e}，回退到规则生成")
        return generate_rule_based_suggestions(ctx)


# ============================================================
# 主入口：生成优化建议
# ============================================================

async def generate_suggestions(
    ctx: BrandAnalysisContext,
) -> list[SuggestionItem]:
    """
    生成优化建议

    如果ENABLE_LLM=True且有DeepSeek API Key，使用LLM生成个性化建议。
    否则使用基于规则的模板建议。
    """
    if settings.ENABLE_LLM and settings.DEEPSEEK_API_KEY:
        return await generate_llm_suggestions(ctx)
    return generate_rule_based_suggestions(ctx)


def build_context_from_scoring_result(
    brand_name: str,
    scoring_result: ScoringResultV2,
    competitor_data: dict[str, Any] | None = None,
    sentiment_data: dict[str, int] | None = None,
    platform_scores: dict[str, float] | None = None,
    total_queries: int = 0,
    mentioned_count: int = 0,
) -> BrandAnalysisContext:
    """
    从ScoringResultV2构建BrandAnalysisContext

    Args:
        brand_name: 品牌名称
        scoring_result: V2评分结果
        competitor_data: 竞品对比数据
        sentiment_data: 情感分析数据
        platform_scores: 平台评分数据
        total_queries: 总查询次数
        mentioned_count: 被提及次数

    Returns:
        BrandAnalysisContext: 品牌分析上下文
    """
    return BrandAnalysisContext(
        brand_name=brand_name,
        overall_score=scoring_result.overall_score,
        mention_rate_score=scoring_result.mention_rate.score,
        mention_rate_max=scoring_result.mention_rate.max_score,
        mention_rate_percentage=scoring_result.mention_rate.percentage,
        rank_score=scoring_result.recommendation_rank.score,
        rank_max=scoring_result.recommendation_rank.max_score,
        rank_percentage=scoring_result.recommendation_rank.percentage,
        sentiment_score=scoring_result.sentiment_score.score,
        sentiment_max=scoring_result.sentiment_score.max_score,
        sentiment_percentage=scoring_result.sentiment_score.percentage,
        citation_score=scoring_result.citation_quality.score,
        citation_max=scoring_result.citation_quality.max_score,
        citation_percentage=scoring_result.citation_quality.percentage,
        competitive_score=scoring_result.competitive_position.score,
        competitive_max=scoring_result.competitive_position.max_score,
        competitive_percentage=scoring_result.competitive_position.percentage,
        competitor_data=competitor_data or {},
        sentiment_data=sentiment_data or {},
        platform_scores=platform_scores or {},
        total_queries=total_queries,
        mentioned_count=mentioned_count,
    )