geo/backend/app/services/advisor/optimization_advisor.py

643 lines
25 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
优化建议引擎 - 基于品牌数据差距提供可操作的优化建议
建议类型:
- content_optimization: 内容优化建议如何让AI更容易引用你的内容
- platform_targeting: 平台定向建议(哪些平台需要重点优化)
- competitor_gap: 竞品差距建议(如何缩小与竞品的差距)
- query_expansion: 查询词扩展建议(应该添加哪些查询词)
- citation_improvement: 引用改善建议(如何增加被引用的概率)
生成逻辑:
1. 基于评分V2的五维度数据找出最弱的维度
2. 基于竞品对比数据,找出差距最大的方面
3. 基于情感分析数据,找出负面情感的来源
4. 使用DeepSeek API生成个性化建议ENABLE_LLM控制开关
5. 无LLM时使用基于规则的模板建议
"""
from __future__ import annotations
import asyncio
import json
import logging
import uuid
from dataclasses import dataclass, field
from typing import Any
from app.config import settings
from app.services.scoring.scoring_service import ScoringResultV2
from app.utils.json_extractor import extract_json
logger = logging.getLogger(__name__)
# ============================================================
# 建议数据结构
# ============================================================
@dataclass
class SuggestionItem:
"""单条优化建议"""
type: str # content_optimization/platform_targeting/competitor_gap/query_expansion/citation_improvement
priority: str # high/medium/low
title: str # 建议标题
description: str # 详细描述
action: str # 具体操作步骤
expected_impact: str # 预期效果
difficulty: str # easy/medium/hard
@dataclass
class BrandAnalysisContext:
"""品牌分析上下文 - 用于生成建议的输入数据"""
brand_name: str
overall_score: float
# 五维度评分
mention_rate_score: float = 0.0
mention_rate_max: float = 25.0
mention_rate_percentage: float = 0.0
rank_score: float = 0.0
rank_max: float = 25.0
rank_percentage: float = 0.0
sentiment_score: float = 0.0
sentiment_max: float = 20.0
sentiment_percentage: float = 0.0
citation_score: float = 0.0
citation_max: float = 15.0
citation_percentage: float = 0.0
competitive_score: float = 0.0
competitive_max: float = 15.0
competitive_percentage: float = 0.0
# 竞品对比数据
competitor_data: dict[str, Any] = field(default_factory=dict)
# 情感分析数据
sentiment_data: dict[str, int] = field(default_factory=dict)
# 平台评分数据
platform_scores: dict[str, float] = field(default_factory=dict)
# 查询词数据
total_queries: int = 0
mentioned_count: int = 0
# ============================================================
# 基于规则的建议生成器
# ============================================================
def _get_weakest_dimensions(ctx: BrandAnalysisContext) -> list[tuple[str, float, float]]:
"""
找出最弱的维度,按得分率升序排列。
Returns:
[(维度名, 得分率, 得分), ...]
"""
dimensions = [
("提及率", ctx.mention_rate_percentage, ctx.mention_rate_score),
("推荐排名", ctx.rank_percentage, ctx.rank_score),
("情感倾向", ctx.sentiment_percentage, ctx.sentiment_score),
("引用质量", ctx.citation_percentage, ctx.citation_score),
("竞品对比", ctx.competitive_percentage, ctx.competitive_score),
]
return sorted(dimensions, key=lambda x: x[1])
def _generate_content_optimization_suggestions(
ctx: BrandAnalysisContext,
) -> list[SuggestionItem]:
"""生成内容优化建议"""
suggestions: list[SuggestionItem] = []
# 提及率低 -> 内容优化
if ctx.mention_rate_percentage < 50:
suggestions.append(SuggestionItem(
type="content_optimization",
priority="high",
title="提升品牌内容在AI平台的可见性",
description=(
f"当前提及率仅{ctx.mention_rate_percentage:.0f}%"
f"品牌在AI回答中被提及的频率较低。"
f"AI平台倾向于引用结构化、权威性强的内容。"
),
action=(
"1. 在官网和核心页面添加FAQ结构化数据(Schema.org FAQPage)\n"
"2. 创建行业白皮书和深度分析文章,使用清晰的标题层级\n"
"3. 确保品牌在维基百科、行业百科等权威来源有准确条目\n"
"4. 优化内容中的品牌名称一致性避免过多别名导致AI无法识别"
),
expected_impact="预计可将提及率提升15-25个百分点",
difficulty="medium",
))
elif ctx.mention_rate_percentage < 75:
suggestions.append(SuggestionItem(
type="content_optimization",
priority="medium",
title="持续优化品牌内容结构",
description=(
f"当前提及率为{ctx.mention_rate_percentage:.0f}%,仍有提升空间。"
f"建议进一步优化内容结构以提高AI引用概率。"
),
action=(
"1. 增加品牌相关的高质量长文内容2000字以上\n"
"2. 在内容中增加数据支撑和案例引用\n"
"3. 定期更新内容以保持时效性"
),
expected_impact="预计可将提及率提升5-15个百分点",
difficulty="easy",
))
return suggestions
def _generate_platform_targeting_suggestions(
ctx: BrandAnalysisContext,
) -> list[SuggestionItem]:
"""生成平台定向建议"""
suggestions: list[SuggestionItem] = []
# 找出评分最低的平台
if ctx.platform_scores:
weak_platforms = sorted(
ctx.platform_scores.items(),
key=lambda x: x[1],
)[:3]
weak_platform_names = [p[0] for p in weak_platforms if p[1] < 40]
weak_platform_str = "".join(weak_platform_names) if weak_platform_names else ""
if weak_platform_str:
suggestions.append(SuggestionItem(
type="platform_targeting",
priority="high",
title=f"重点优化{weak_platform_str}平台表现",
description=(
f"在这些平台上品牌评分低于40分"
f"AI引用率极低。不同AI平台有不同的内容偏好"
f"需要针对性优化。"
),
action=(
f"1. 分析{weak_platform_str}平台的内容偏好和引用模式\n"
"2. 针对各平台优化内容格式和表达方式\n"
"3. 增加在这些平台关联的内容源数量\n"
"4. 关注平台算法更新,及时调整优化策略"
),
expected_impact="预计可将弱平台评分提升20-30分",
difficulty="hard",
))
# 如果没有平台数据
if not ctx.platform_scores or all(v == 0 for v in ctx.platform_scores.values()):
suggestions.append(SuggestionItem(
type="platform_targeting",
priority="high",
title="启动全平台品牌监控",
description=(
"当前没有任何平台的引用数据。"
"需要先在各AI平台建立品牌存在感。"
),
action=(
"1. 确保已添加足够的查询词覆盖核心业务关键词\n"
"2. 等待系统完成首轮数据采集通常需要1-2天\n"
"3. 采集完成后查看各平台评分,确定优先优化方向"
),
expected_impact="获取全平台基准数据,为后续优化提供方向",
difficulty="easy",
))
return suggestions
def _generate_competitor_gap_suggestions(
ctx: BrandAnalysisContext,
) -> list[SuggestionItem]:
"""生成竞品差距建议"""
suggestions: list[SuggestionItem] = []
# 竞品对比维度弱
if ctx.competitive_percentage < 40:
# 找出领先的品牌
ahead_competitors = []
behind_competitors = []
if ctx.competitor_data:
brand_mentions = ctx.competitor_data.get("brand_mentions", 0)
for name, count in ctx.competitor_data.get("competitor_mentions", {}).items():
if count > brand_mentions:
ahead_competitors.append((name, count))
else:
behind_competitors.append((name, count))
ahead_str = "".join([n for n, _ in ahead_competitors[:3]]) if ahead_competitors else "竞品"
suggestions.append(SuggestionItem(
type="competitor_gap",
priority="high",
title=f"缩小与{ahead_str}的差距",
description=(
f"当前竞品对比得分率仅{ctx.competitive_percentage:.0f}%"
f"品牌在AI引用中落后于主要竞品。"
f"需要分析竞品的优势领域并制定追赶策略。"
),
action=(
"1. 分析竞品在AI平台被引用的内容类型和话题\n"
"2. 找出竞品有而品牌缺失的内容领域\n"
"3. 针对性创建竞品优势领域的优质内容\n"
"4. 加强品牌差异化定位,突出独特价值主张"
),
expected_impact="预计3-6个月内可将竞品对比得分率提升15-25个百分点",
difficulty="hard",
))
elif ctx.competitive_percentage < 70:
suggestions.append(SuggestionItem(
type="competitor_gap",
priority="medium",
title="持续巩固竞争优势",
description=(
f"当前竞品对比得分率为{ctx.competitive_percentage:.0f}%"
f"品牌处于中等水平,需要持续巩固并扩大优势。"
),
action=(
"1. 定期监控竞品动态和内容更新\n"
"2. 在品牌优势领域持续输出高质量内容\n"
"3. 关注新兴话题和趋势,抢占先机"
),
expected_impact="预计可将竞品对比得分率提升10-15个百分点",
difficulty="medium",
))
return suggestions
def _generate_query_expansion_suggestions(
ctx: BrandAnalysisContext,
) -> list[SuggestionItem]:
"""生成查询词扩展建议"""
suggestions: list[SuggestionItem] = []
# 查询词数量不足
if ctx.total_queries < 10:
suggestions.append(SuggestionItem(
type="query_expansion",
priority="high" if ctx.total_queries < 3 else "medium",
title="扩展查询词覆盖范围",
description=(
f"当前仅有{ctx.total_queries}个查询词,"
f"覆盖范围不足无法全面反映品牌在AI搜索中的表现。"
f"更多查询词意味着更全面的品牌认知度画像。"
),
action=(
"1. 添加行业核心关键词(如:'XX行业推荐''XX解决方案'\n"
"2. 添加品牌相关长尾词(如:'XX品牌怎么样''XX vs 竞品'\n"
"3. 添加场景化查询词(如:'XX场景下选什么产品'\n"
"4. 建议至少添加10-20个查询词以获得可靠的分析结果"
),
expected_impact="更多查询词可提升评分准确度,发现更多优化机会",
difficulty="easy",
))
# 提及率低且查询词不少 -> 可能需要优化查询词质量
elif ctx.total_queries >= 10 and ctx.mention_rate_percentage < 50:
suggestions.append(SuggestionItem(
type="query_expansion",
priority="medium",
title="优化查询词质量和相关性",
description=(
f"已有{ctx.total_queries}个查询词,但提及率仅{ctx.mention_rate_percentage:.0f}%。"
f"可能是查询词与品牌核心业务关联度不够。"
),
action=(
"1. 检查现有查询词是否覆盖品牌核心业务场景\n"
"2. 添加品牌擅长的专业领域相关查询词\n"
"3. 删除与品牌无关的宽泛查询词\n"
"4. 增加品牌独特卖点和差异化优势相关的查询词"
),
expected_impact="优化查询词后可提升提及率10-20个百分点",
difficulty="easy",
))
return suggestions
def _generate_citation_improvement_suggestions(
ctx: BrandAnalysisContext,
) -> list[SuggestionItem]:
"""生成引用改善建议"""
suggestions: list[SuggestionItem] = []
# 引用质量低
if ctx.citation_percentage < 40:
suggestions.append(SuggestionItem(
type="citation_improvement",
priority="high",
title="提升AI引用内容的质量和深度",
description=(
f"当前引用质量得分率仅{ctx.citation_percentage:.0f}%"
f"AI对品牌的引用多为浅层提及缺乏深度正面描述。"
f"高质量引用能显著影响用户决策。"
),
action=(
"1. 创建详细的产品对比页面,包含数据表格和评测结论\n"
"2. 发布客户案例和成功故事,提供具体数据支撑\n"
"3. 在内容中增加可引用的数据点和统计信息\n"
"4. 优化内容结构使用AI易于提取的格式列表、表格、要点"
),
expected_impact="预计可将引用质量得分率提升15-25个百分点",
difficulty="medium",
))
# 推荐排名低
if ctx.rank_percentage < 40:
suggestions.append(SuggestionItem(
type="citation_improvement",
priority="high",
title="提升品牌在AI推荐中的排名位置",
description=(
f"当前推荐排名得分率仅{ctx.rank_percentage:.0f}%"
f"品牌在AI推荐列表中排名靠后用户看到概率低。"
f"排名越靠前,被用户选择的可能性越大。"
),
action=(
"1. 增加品牌在权威第三方平台的正面评价和推荐\n"
"2. 优化品牌在行业榜单和评测中的排名\n"
"3. 创建'最佳XX''XX推荐'类内容,增加被推荐概率\n"
"4. 提升品牌官网的SEO表现增加AI爬取到的概率"
),
expected_impact="预计可将推荐排名提升2-3位",
difficulty="medium",
))
# 情感倾向差
negative_rate = 0.0
total_sentiment = sum(ctx.sentiment_data.values())
if total_sentiment > 0:
negative_rate = ctx.sentiment_data.get("negative", 0) / total_sentiment
if negative_rate > 0.3:
suggestions.append(SuggestionItem(
type="citation_improvement",
priority="high",
title="改善AI平台对品牌的负面评价",
description=(
f"当前负面评价占比{negative_rate:.0%}"
f"AI在引用品牌时倾向使用负面表述。"
f"负面引用会严重影响用户对品牌的印象。"
),
action=(
"1. 分析负面引用的具体内容,找出主要批评点\n"
"2. 针对性改进产品或服务,解决用户痛点\n"
"3. 主动发布正面内容,稀释负面信息的影响\n"
"4. 在官方渠道积极回应用户反馈和投诉"
),
expected_impact="减少负面引用比例10-20个百分点",
difficulty="hard",
))
return suggestions
def generate_rule_based_suggestions(
ctx: BrandAnalysisContext,
) -> list[SuggestionItem]:
"""
基于规则生成优化建议不依赖LLM
分析品牌数据差距,按照优先级生成可操作的建议。
"""
all_suggestions: list[SuggestionItem] = []
# 收集各类建议
all_suggestions.extend(_generate_content_optimization_suggestions(ctx))
all_suggestions.extend(_generate_platform_targeting_suggestions(ctx))
all_suggestions.extend(_generate_competitor_gap_suggestions(ctx))
all_suggestions.extend(_generate_query_expansion_suggestions(ctx))
all_suggestions.extend(_generate_citation_improvement_suggestions(ctx))
# 按优先级排序: high > medium > low
priority_order = {"high": 0, "medium": 1, "low": 2}
all_suggestions.sort(key=lambda s: priority_order.get(s.priority, 1))
# 限制最多5条建议
return all_suggestions[:5]
# ============================================================
# LLM 建议生成器
# ============================================================
OPTIMIZATION_PROMPT = """你是一个GEO生成式引擎优化专家。基于以下品牌数据提供3-5条可操作的优化建议。
品牌: {brand_name}
当前评分: {overall_score}/100
评分维度:
- 提及率: {mention_rate_score}/{mention_rate_max} ({mention_rate_percentage}%)
- 推荐排名: {rank_score}/{rank_max} ({rank_percentage}%)
- 情感倾向: {sentiment_score}/{sentiment_max} ({sentiment_percentage}%)
- 引用质量: {citation_score}/{citation_max} ({citation_percentage}%)
- 竞品对比: {competitive_score}/{competitive_max} ({competitive_percentage}%)
竞品对比数据:
{competitor_data_str}
情感分析:
{sentiment_data_str}
平台评分:
{platform_data_str}
请返回JSON格式:
{{
"suggestions": [
{{
"type": "content_optimization" | "platform_targeting" | "competitor_gap" | "query_expansion" | "citation_improvement",
"priority": "high" | "medium" | "low",
"title": "建议标题",
"description": "详细描述",
"action": "具体操作步骤",
"expected_impact": "预期效果",
"difficulty": "easy" | "medium" | "hard"
}}
]
}}
要求:
1. 每条建议必须基于数据,指出具体的差距和改进方向
2. 优先关注最弱的维度
3. 建议必须可操作,包含具体步骤
4. 预期效果要量化
5. 返回3-5条建议按优先级从高到低排列
"""
async def generate_llm_suggestions(
ctx: BrandAnalysisContext,
) -> list[SuggestionItem]:
"""
使用DeepSeek LLM生成个性化优化建议
如果LLM不可用或调用失败回退到规则生成。
"""
if not settings.ENABLE_LLM or not settings.DEEPSEEK_API_KEY:
logger.info("LLM未启用或API Key未配置使用规则生成建议")
return generate_rule_based_suggestions(ctx)
try:
# 构建prompt
competitor_data_str = json.dumps(ctx.competitor_data, ensure_ascii=False, indent=2)
sentiment_data_str = json.dumps(ctx.sentiment_data, ensure_ascii=False, indent=2)
platform_data_str = json.dumps(ctx.platform_scores, ensure_ascii=False, indent=2)
prompt = OPTIMIZATION_PROMPT.format(
brand_name=ctx.brand_name,
overall_score=ctx.overall_score,
mention_rate_score=round(ctx.mention_rate_score, 2),
mention_rate_max=ctx.mention_rate_max,
mention_rate_percentage=round(ctx.mention_rate_percentage, 1),
rank_score=round(ctx.rank_score, 2),
rank_max=ctx.rank_max,
rank_percentage=round(ctx.rank_percentage, 1),
sentiment_score=round(ctx.sentiment_score, 2),
sentiment_max=ctx.sentiment_max,
sentiment_percentage=round(ctx.sentiment_percentage, 1),
citation_score=round(ctx.citation_score, 2),
citation_max=ctx.citation_max,
citation_percentage=round(ctx.citation_percentage, 1),
competitive_score=round(ctx.competitive_score, 2),
competitive_max=ctx.competitive_max,
competitive_percentage=round(ctx.competitive_percentage, 1),
competitor_data_str=competitor_data_str,
sentiment_data_str=sentiment_data_str,
platform_data_str=platform_data_str,
)
# 调用DeepSeek API
from openai import OpenAI
client = OpenAI(
api_key=settings.DEEPSEEK_API_KEY,
base_url="https://api.deepseek.com",
)
response = await asyncio.to_thread(
client.chat.completions.create,
model="deepseek-chat",
messages=[{"role": "user", "content": prompt}],
temperature=0.3,
max_tokens=2000,
)
content = response.choices[0].message.content
if not content:
raise ValueError("LLM返回空响应")
# 提取JSON
json_str = extract_json(content)
result = json.loads(json_str)
# 解析建议
suggestions: list[SuggestionItem] = []
for item in result.get("suggestions", []):
# 验证type字段
valid_types = {
"content_optimization", "platform_targeting",
"competitor_gap", "query_expansion", "citation_improvement",
}
suggestion_type = item.get("type", "content_optimization")
if suggestion_type not in valid_types:
suggestion_type = "content_optimization"
# 验证priority字段
valid_priorities = {"high", "medium", "low"}
priority = item.get("priority", "medium")
if priority not in valid_priorities:
priority = "medium"
# 验证difficulty字段
valid_difficulties = {"easy", "medium", "hard"}
difficulty = item.get("difficulty", "medium")
if difficulty not in valid_difficulties:
difficulty = "medium"
suggestions.append(SuggestionItem(
type=suggestion_type,
priority=priority,
title=item.get("title", "优化建议"),
description=item.get("description", ""),
action=item.get("action", ""),
expected_impact=item.get("expected_impact", ""),
difficulty=difficulty,
))
if not suggestions:
logger.warning("LLM未返回有效建议回退到规则生成")
return generate_rule_based_suggestions(ctx)
return suggestions[:5]
except Exception as e:
logger.error(f"LLM生成建议失败: {e},回退到规则生成")
return generate_rule_based_suggestions(ctx)
# ============================================================
# 主入口:生成优化建议
# ============================================================
async def generate_suggestions(
ctx: BrandAnalysisContext,
) -> list[SuggestionItem]:
"""
生成优化建议
如果ENABLE_LLM=True且有DeepSeek API Key使用LLM生成个性化建议。
否则使用基于规则的模板建议。
"""
if settings.ENABLE_LLM and settings.DEEPSEEK_API_KEY:
return await generate_llm_suggestions(ctx)
return generate_rule_based_suggestions(ctx)
def build_context_from_scoring_result(
brand_name: str,
scoring_result: ScoringResultV2,
competitor_data: dict[str, Any] | None = None,
sentiment_data: dict[str, int] | None = None,
platform_scores: dict[str, float] | None = None,
total_queries: int = 0,
mentioned_count: int = 0,
) -> BrandAnalysisContext:
"""
从ScoringResultV2构建BrandAnalysisContext
Args:
brand_name: 品牌名称
scoring_result: V2评分结果
competitor_data: 竞品对比数据
sentiment_data: 情感分析数据
platform_scores: 平台评分数据
total_queries: 总查询次数
mentioned_count: 被提及次数
Returns:
BrandAnalysisContext: 品牌分析上下文
"""
return BrandAnalysisContext(
brand_name=brand_name,
overall_score=scoring_result.overall_score,
mention_rate_score=scoring_result.mention_rate.score,
mention_rate_max=scoring_result.mention_rate.max_score,
mention_rate_percentage=scoring_result.mention_rate.percentage,
rank_score=scoring_result.recommendation_rank.score,
rank_max=scoring_result.recommendation_rank.max_score,
rank_percentage=scoring_result.recommendation_rank.percentage,
sentiment_score=scoring_result.sentiment_score.score,
sentiment_max=scoring_result.sentiment_score.max_score,
sentiment_percentage=scoring_result.sentiment_score.percentage,
citation_score=scoring_result.citation_quality.score,
citation_max=scoring_result.citation_quality.max_score,
citation_percentage=scoring_result.citation_quality.percentage,
competitive_score=scoring_result.competitive_position.score,
competitive_max=scoring_result.competitive_position.max_score,
competitive_percentage=scoring_result.competitive_position.percentage,
competitor_data=competitor_data or {},
sentiment_data=sentiment_data or {},
platform_scores=platform_scores or {},
total_queries=total_queries,
mentioned_count=mentioned_count,
)