geo/backend/app/services/geo_diagnosis.py

1079 lines
37 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
GEO诊断服务 - 6大维度检测系统
诊断维度总分100
- 内容可提取性 (Content Extractability): 20分 - AI能否轻松提取和理解内容
- 实体清晰度 (Entity Clarity): 15分 - AI能否理解品牌是什么
- E-E-A-T信号 (E-E-A-T Signals): 20分 - 经验、专业性、权威性、可信度
- Schema标记 (Schema Markup): 15分 - 结构化数据完整性
- 主题权威 (Topic Authority): 15分 - 品牌在特定领域的权威性
- 引用就绪度 (Citation Readiness): 15分 - 品牌在AI回答中被引用的可能性
"""
from __future__ import annotations
import logging
from dataclasses import dataclass, field
logger = logging.getLogger(__name__)
# ============================================================
# 诊断数据结构
# ============================================================
@dataclass
class DiagnosisItem:
"""单个诊断项"""
name: str # 诊断项名称
status: str # pass/warning/fail
description: str # 诊断说明
suggestion: str # 优化建议
score: float = 0.0 # 该项得分
max_score: float = 0.0 # 该项满分
@dataclass
class GEODimensionScore:
"""单个维度的诊断评分详情"""
name: str # 维度名称
score: float # 该维度得分 (0-max_score)
max_score: float # 该维度满分
items: list[DiagnosisItem] = field(default_factory=list)
status: str = "pass" # pass/warning/fail
percentage: float = 0.0 # 得分率 (0-100)
detail: dict = field(default_factory=dict)
@dataclass
class GEORecommendation:
"""优化建议"""
priority: str # P0/P1/P2
dimension: str # 所属维度
title: str # 建议标题
description: str # 建议描述
impact: str # 预期影响: high/medium/low
effort: str # 实施难度: easy/medium/hard
@dataclass
class GEODiagnosisResult:
"""GEO诊断结果"""
overall_score: float = 0.0 # 综合评分 0-100
dimensions: list[GEODimensionScore] = field(default_factory=list)
recommendations: list[GEORecommendation] = field(default_factory=list)
health_level: str = "danger" # excellent/good/pass/danger
def __post_init__(self):
"""计算健康等级"""
if self.overall_score >= 80:
self.health_level = "excellent"
elif self.overall_score >= 60:
self.health_level = "good"
elif self.overall_score >= 40:
self.health_level = "pass"
else:
self.health_level = "danger"
def to_dict(self) -> dict:
"""转换为字典格式"""
return {
"overall_score": round(self.overall_score, 2),
"health_level": self.health_level,
"health_level_label": get_health_level_label(self.health_level),
"dimensions": [
{
"name": dim.name,
"score": round(dim.score, 2),
"max_score": dim.max_score,
"percentage": round(dim.percentage, 2),
"status": dim.status,
"items": [
{
"name": item.name,
"status": item.status,
"description": item.description,
"suggestion": item.suggestion,
"score": round(item.score, 2),
"max_score": item.max_score,
}
for item in dim.items
],
"detail": dim.detail,
}
for dim in self.dimensions
],
"recommendations": [
{
"priority": rec.priority,
"dimension": rec.dimension,
"title": rec.title,
"description": rec.description,
"impact": rec.impact,
"effort": rec.effort,
}
for rec in self.recommendations
],
}
# ============================================================
# 维度1: 内容可提取性诊断 (满分20)
# ============================================================
def diagnose_content_extractability(
has_direct_answer: bool = False,
has_qa_headings: bool = False,
has_structured_data: bool = False,
has_internal_links: bool = False,
has_freshness_info: bool = False,
update_days_ago: int | None = None,
) -> GEODimensionScore:
"""
诊断内容可提取性 (满分20)
AI需要能够轻松提取和理解内容。
Args:
has_direct_answer: 是否有直接回答块(页面首段简洁明确的答案)
has_qa_headings: 是否有问答式标题H2/H3采用问题形式
has_structured_data: 是否使用列表和表格等结构化数据
has_internal_links: 是否有内链到子意图页
has_freshness_info: 是否有内容新鲜度信息(更新日期和作者)
update_days_ago: 内容最后更新距今天数
Returns:
GEODimensionScore: 内容可提取性维度评分
"""
max_score = 20.0
items = []
# 1. 直接回答块 (P0, 6分)
direct_answer_score = 6.0 if has_direct_answer else 0.0
items.append(DiagnosisItem(
name="直接回答块",
status="pass" if has_direct_answer else "fail",
description="页面首段是否包含简洁明确的答案便于AI直接提取",
suggestion="在页面首段添加2-3句话的简洁答案直接回答用户核心问题",
score=direct_answer_score,
max_score=6.0,
))
# 2. 问答式标题 (P0, 5分)
qa_headings_score = 5.0 if has_qa_headings else 0.0
items.append(DiagnosisItem(
name="问答式标题",
status="pass" if has_qa_headings else "fail",
description="H2/H3标题是否采用问题形式帮助AI理解内容结构",
suggestion="将关键H2/H3标题改为问题形式'什么是X''如何使用Y'",
score=qa_headings_score,
max_score=5.0,
))
# 3. 列表和表格 (P0, 4分)
structured_score = 4.0 if has_structured_data else 0.0
items.append(DiagnosisItem(
name="列表和表格",
status="pass" if has_structured_data else "fail",
description="是否使用列表、表格等结构化数据展示信息",
suggestion="使用HTML列表(ul/ol)和表格(table)组织信息便于AI解析",
score=structured_score,
max_score=4.0,
))
# 4. 内链到子意图页 (P1, 3分)
internal_links_score = 3.0 if has_internal_links else 0.0
items.append(DiagnosisItem(
name="内链到子意图页",
status="pass" if has_internal_links else "warning",
description="是否链接到相关深度内容页面",
suggestion="添加内链到相关子话题页面,形成内容网络",
score=internal_links_score,
max_score=3.0,
))
# 5. 内容新鲜度 (P1, 2分)
freshness_score = 0.0
freshness_status = "fail"
if has_freshness_info:
if update_days_ago is not None:
if update_days_ago <= 30:
freshness_score = 2.0
freshness_status = "pass"
elif update_days_ago <= 90:
freshness_score = 1.5
freshness_status = "warning"
else:
freshness_score = 0.5
freshness_status = "warning"
else:
freshness_score = 1.0
freshness_status = "warning"
items.append(DiagnosisItem(
name="内容新鲜度",
status=freshness_status,
description="是否有更新日期和作者信息,体现内容时效性",
suggestion="在页面显眼位置展示最后更新日期和作者信息",
score=freshness_score,
max_score=2.0,
))
total_score = sum(item.score for item in items)
percentage = (total_score / max_score) * 100
# 维度状态如果有fail项则为warning全pass则为pass
has_fail = any(item.status == "fail" for item in items)
status = "warning" if has_fail else "pass"
return GEODimensionScore(
name="内容可提取性",
score=total_score,
max_score=max_score,
items=items,
status=status,
percentage=round(percentage, 2),
detail={
"has_direct_answer": has_direct_answer,
"has_qa_headings": has_qa_headings,
"has_structured_data": has_structured_data,
"has_internal_links": has_internal_links,
"has_freshness_info": has_freshness_info,
"update_days_ago": update_days_ago,
},
)
# ============================================================
# 维度2: 实体清晰度诊断 (满分15)
# ============================================================
def diagnose_entity_clarity(
has_brand_definition: bool = False,
has_target_audience: bool = False,
has_unique_value: bool = False,
has_industry_classification: bool = False,
) -> GEODimensionScore:
"""
诊断实体清晰度 (满分15)
AI需要能够理解品牌是什么。
Args:
has_brand_definition: 是否清晰说明品牌做什么
has_target_audience: 是否明确服务谁
has_unique_value: 是否有差异化价值主张
has_industry_classification: 是否有行业分类信息
Returns:
GEODimensionScore: 实体清晰度维度评分
"""
max_score = 15.0
items = []
# 1. 品牌定义 (5分)
brand_def_score = 5.0 if has_brand_definition else 0.0
items.append(DiagnosisItem(
name="品牌定义",
status="pass" if has_brand_definition else "fail",
description="是否清晰说明品牌做什么AI理解准确率目标≥95%",
suggestion="在首页和About页面添加清晰的品牌定义包含核心业务和价值主张",
score=brand_def_score,
max_score=5.0,
))
# 2. 目标受众 (4分)
audience_score = 4.0 if has_target_audience else 0.0
items.append(DiagnosisItem(
name="目标受众",
status="pass" if has_target_audience else "fail",
description="是否明确服务谁实体识别准确率目标≥90%",
suggestion="明确描述目标用户群体,如'为中小企业提供XX服务'",
score=audience_score,
max_score=4.0,
))
# 3. 差异化价值 (3分)
value_score = 3.0 if has_unique_value else 0.0
items.append(DiagnosisItem(
name="差异化价值",
status="pass" if has_unique_value else "warning",
description="为什么选择这个品牌独特性评分目标≥80",
suggestion="突出品牌独特优势,如技术领先、服务优质、价格合理等",
score=value_score,
max_score=3.0,
))
# 4. 行业分类 (3分)
industry_score = 3.0 if has_industry_classification else 0.0
items.append(DiagnosisItem(
name="行业分类",
status="pass" if has_industry_classification else "warning",
description="品牌属于什么行业分类准确率目标≥95%",
suggestion="在页面中明确标注行业分类,如'SaaS''电子商务'",
score=industry_score,
max_score=3.0,
))
total_score = sum(item.score for item in items)
percentage = (total_score / max_score) * 100
has_fail = any(item.status == "fail" for item in items)
status = "warning" if has_fail else "pass"
return GEODimensionScore(
name="实体清晰度",
score=total_score,
max_score=max_score,
items=items,
status=status,
percentage=round(percentage, 2),
detail={
"has_brand_definition": has_brand_definition,
"has_target_audience": has_target_audience,
"has_unique_value": has_unique_value,
"has_industry_classification": has_industry_classification,
},
)
# ============================================================
# 维度3: E-E-A-T信号诊断 (满分20)
# ============================================================
def diagnose_eeat_signals(
has_author_bio: bool = False,
author_credentials_complete: float = 0.0,
has_certifications: bool = False,
certification_count: int = 0,
has_data_sources: bool = False,
authoritative_source_ratio: float = 0.0,
has_expert_endorsements: bool = False,
endorsement_count: int = 0,
) -> GEODimensionScore:
"""
诊断E-E-A-T信号 (满分20)
AI需要验证品牌的可信度经验、专业性、权威性、可信度
Args:
has_author_bio: 是否有作者资质信息
author_credentials_complete: 作者简介完整度 (0-1)
has_certifications: 是否有专业认证
certification_count: 认证/奖项数量
has_data_sources: 是否引用数据来源
authoritative_source_ratio: 权威源引用比例 (0-1)
has_expert_endorsements: 是否有专家背书
endorsement_count: 专家背书数量
Returns:
GEODimensionScore: E-E-A-T信号维度评分
"""
max_score = 20.0
items = []
# 1. 作者资质 (6分)
author_score = 0.0
if has_author_bio:
author_score = author_credentials_complete * 6.0
author_status = "pass" if author_score >= 5.4 else ("warning" if author_score >= 3.0 else "fail")
items.append(DiagnosisItem(
name="作者资质",
status=author_status,
description="内容作者是否有专业背景作者简介完整度目标≥90%",
suggestion="添加作者详细简介,包含教育背景、工作经验、专业领域",
score=author_score,
max_score=6.0,
))
# 2. 专业认证 (5分)
cert_score = 0.0
if has_certifications:
if certification_count >= 5:
cert_score = 5.0
elif certification_count >= 3:
cert_score = 4.0
elif certification_count >= 1:
cert_score = 2.5
cert_status = "pass" if cert_score >= 4.0 else ("warning" if cert_score >= 2.0 else "fail")
items.append(DiagnosisItem(
name="专业认证",
status=cert_status,
description="是否有行业认证/奖项认证展示率目标≥80%",
suggestion="展示行业认证、奖项、资质如ISO认证、行业奖项等",
score=cert_score,
max_score=5.0,
))
# 3. 数据来源 (5分)
source_score = authoritative_source_ratio * 5.0 if has_data_sources else 0.0
source_status = "pass" if source_score >= 4.0 else ("warning" if source_score >= 2.0 else "fail")
items.append(DiagnosisItem(
name="数据来源",
status=source_status,
description="是否引用可靠数据引用权威源比例目标≥70%",
suggestion="引用权威机构数据,如政府报告、学术研究、行业报告",
score=source_score,
max_score=5.0,
))
# 4. 专家背书 (4分)
endorsement_score = 0.0
if has_expert_endorsements:
if endorsement_count >= 5:
endorsement_score = 4.0
elif endorsement_count >= 3:
endorsement_score = 3.0
elif endorsement_count >= 1:
endorsement_score = 1.5
endorsement_status = "pass" if endorsement_score >= 3.0 else ("warning" if endorsement_score >= 1.5 else "fail")
items.append(DiagnosisItem(
name="专家背书",
status=endorsement_status,
description="是否有行业专家认可背书数量目标≥3",
suggestion="获取行业专家推荐、用户评价、案例研究",
score=endorsement_score,
max_score=4.0,
))
total_score = sum(item.score for item in items)
percentage = (total_score / max_score) * 100
has_fail = any(item.status == "fail" for item in items)
status = "warning" if has_fail else "pass"
return GEODimensionScore(
name="E-E-A-T信号",
score=total_score,
max_score=max_score,
items=items,
status=status,
percentage=round(percentage, 2),
detail={
"has_author_bio": has_author_bio,
"author_credentials_complete": round(author_credentials_complete, 2),
"has_certifications": has_certifications,
"certification_count": certification_count,
"has_data_sources": has_data_sources,
"authoritative_source_ratio": round(authoritative_source_ratio, 2),
"has_expert_endorsements": has_expert_endorsements,
"endorsement_count": endorsement_count,
},
)
# ============================================================
# 维度4: Schema标记诊断 (满分15)
# ============================================================
def diagnose_schema_markup(
has_organization: bool = False,
has_product: bool = False,
has_article: bool = False,
has_faq: bool = False,
has_howto: bool = False,
has_breadcrumb: bool = False,
) -> GEODimensionScore:
"""
诊断Schema标记完整性 (满分15)
结构化数据帮助AI理解内容。
Args:
has_organization: 是否有Organization标记企业主页
has_product: 是否有Product标记产品页
has_article: 是否有Article/BlogPosting标记博客文章
has_faq: 是否有FAQPage标记常见问题
has_howto: 是否有HowTo标记操作指南
has_breadcrumb: 是否有BreadcrumbList标记导航结构
Returns:
GEODimensionScore: Schema标记维度评分
"""
max_score = 15.0
items = []
# 1. Organization (P0必须, 4分)
org_score = 4.0 if has_organization else 0.0
items.append(DiagnosisItem(
name="Organization",
status="pass" if has_organization else "fail",
description="企业主页的Organization标记包含名称、logo、联系方式",
suggestion="添加Organization Schema包含@type: Organization、name、url、logo",
score=org_score,
max_score=4.0,
))
# 2. Product (P0必须, 3分)
product_score = 3.0 if has_product else 0.0
items.append(DiagnosisItem(
name="Product",
status="pass" if has_product else "fail",
description="产品页的Product标记包含名称、描述、价格、评价",
suggestion="为产品页添加Product Schema包含name、description、offers、aggregateRating",
score=product_score,
max_score=3.0,
))
# 3. Article/BlogPosting (P0必须, 3分)
article_score = 3.0 if has_article else 0.0
items.append(DiagnosisItem(
name="Article/BlogPosting",
status="pass" if has_article else "fail",
description="博客文章的Article标记包含作者、发布日期、摘要",
suggestion="为文章添加Article或BlogPosting Schema包含author、datePublished、headline",
score=article_score,
max_score=3.0,
))
# 4. FAQPage (P1推荐, 2分)
faq_score = 2.0 if has_faq else 0.0
items.append(DiagnosisItem(
name="FAQPage",
status="pass" if has_faq else "warning",
description="常见问题的FAQPage标记",
suggestion="为FAQ页面添加FAQPage Schema包含问题和答案对",
score=faq_score,
max_score=2.0,
))
# 5. HowTo (P1推荐, 2分)
howto_score = 2.0 if has_howto else 0.0
items.append(DiagnosisItem(
name="HowTo",
status="pass" if has_howto else "warning",
description="操作指南的HowTo标记",
suggestion="为教程类内容添加HowTo Schema包含步骤列表",
score=howto_score,
max_score=2.0,
))
# 6. BreadcrumbList (P1推荐, 1分)
breadcrumb_score = 1.0 if has_breadcrumb else 0.0
items.append(DiagnosisItem(
name="BreadcrumbList",
status="pass" if has_breadcrumb else "warning",
description="导航结构的BreadcrumbList标记",
suggestion="添加BreadcrumbList Schema帮助AI理解页面层级关系",
score=breadcrumb_score,
max_score=1.0,
))
total_score = sum(item.score for item in items)
percentage = (total_score / max_score) * 100
has_fail = any(item.status == "fail" for item in items)
status = "warning" if has_fail else "pass"
return GEODimensionScore(
name="Schema标记",
score=total_score,
max_score=max_score,
items=items,
status=status,
percentage=round(percentage, 2),
detail={
"has_organization": has_organization,
"has_product": has_product,
"has_article": has_article,
"has_faq": has_faq,
"has_howto": has_howto,
"has_breadcrumb": has_breadcrumb,
"schema_count": sum([
has_organization, has_product, has_article,
has_faq, has_howto, has_breadcrumb,
]),
},
)
# ============================================================
# 维度5: 主题权威诊断 (满分15)
# ============================================================
def diagnose_topic_authority(
content_depth_score: float = 0.0,
topic_coverage_ratio: float = 0.0,
entity_consistency_score: float = 0.0,
cluster_completeness: float = 0.0,
total_content_count: int = 0,
topic_cluster_count: int = 0,
) -> GEODimensionScore:
"""
诊断主题权威 (满分15)
AI需要验证品牌在特定领域的权威性。
Args:
content_depth_score: 内容深度评分 (0-1)目标≥4.6/5即0.92
topic_coverage_ratio: 话题覆盖度 (0-1)目标≥80%
entity_consistency_score: 实体信号一致性 (0-1)目标≥85%
cluster_completeness: 内链网络集群完整度 (0-1)目标≥70%
total_content_count: 总内容数量
topic_cluster_count: 主题集群数量
Returns:
GEODimensionScore: 主题权威维度评分
"""
max_score = 15.0
items = []
# 1. 内容深度 (5分)
depth_score = content_depth_score * 5.0
depth_status = "pass" if content_depth_score >= 0.8 else ("warning" if content_depth_score >= 0.5 else "fail")
items.append(DiagnosisItem(
name="内容深度",
status=depth_status,
description="是否全面覆盖主题内容质量QScore目标≥4.6/5",
suggestion="增加内容深度,包含详细解释、案例分析、数据支撑",
score=depth_score,
max_score=5.0,
))
# 2. 话题覆盖度 (4分)
coverage_score = topic_coverage_ratio * 4.0
coverage_status = "pass" if topic_coverage_ratio >= 0.8 else ("warning" if topic_coverage_ratio >= 0.5 else "fail")
items.append(DiagnosisItem(
name="话题覆盖度",
status=coverage_status,
description="是否覆盖相关子话题话题覆盖率目标≥80%",
suggestion="创建覆盖核心话题及其子话题的内容矩阵",
score=coverage_score,
max_score=4.0,
))
# 3. 实体信号一致性 (3分)
consistency_score = entity_consistency_score * 3.0
consistency_status = "pass" if entity_consistency_score >= 0.85 else ("warning" if entity_consistency_score >= 0.6 else "fail")
items.append(DiagnosisItem(
name="实体信号一致性",
status=consistency_status,
description="各页面实体信号是否一致一致性评分目标≥85%",
suggestion="确保各页面使用一致的品牌名称、描述、行业分类",
score=consistency_score,
max_score=3.0,
))
# 4. 内链网络 (3分)
network_score = cluster_completeness * 3.0
network_status = "pass" if cluster_completeness >= 0.7 else ("warning" if cluster_completeness >= 0.4 else "fail")
items.append(DiagnosisItem(
name="内链网络",
status=network_status,
description="是否形成主题内容集群集群完整度目标≥70%",
suggestion="建立主题集群,通过内链将相关内容连接成网络",
score=network_score,
max_score=3.0,
))
total_score = sum(item.score for item in items)
percentage = (total_score / max_score) * 100
has_fail = any(item.status == "fail" for item in items)
status = "warning" if has_fail else "pass"
return GEODimensionScore(
name="主题权威",
score=total_score,
max_score=max_score,
items=items,
status=status,
percentage=round(percentage, 2),
detail={
"content_depth_score": round(content_depth_score, 2),
"topic_coverage_ratio": round(topic_coverage_ratio, 2),
"entity_consistency_score": round(entity_consistency_score, 2),
"cluster_completeness": round(cluster_completeness, 2),
"total_content_count": total_content_count,
"topic_cluster_count": topic_cluster_count,
},
)
# ============================================================
# 维度6: 引用就绪度诊断 (满分15)
# ============================================================
def diagnose_citation_readiness(
answer_ownership_rate: float = 0.0,
citation_accuracy: float = 0.0,
ai_sov: float = 0.0,
competitor_gap: float = 0.0,
total_ai_responses: int = 0,
brand_mention_count: int = 0,
accurate_citation_count: int = 0,
) -> GEODimensionScore:
"""
诊断引用就绪度 (满分15)
评估品牌在AI回答中被引用的可能性。
Args:
answer_ownership_rate: AOR - Answer Ownership Rate (0-1)目标≥50%
citation_accuracy: 引用准确率 (0-1)目标≥90%
ai_sov: AI Share of Voice (0-1)目标≥30%
competitor_gap: 与竞品差距 (pp)目标≤10pp
total_ai_responses: AI回答总数
brand_mention_count: 品牌被提及次数
accurate_citation_count: 准确引用次数
Returns:
GEODimensionScore: 引用就绪度维度评分
"""
max_score = 15.0
items = []
# 1. 引用频率 AOR (5分)
aor_score = 0.0
if answer_ownership_rate >= 0.5:
aor_score = 5.0
elif answer_ownership_rate >= 0.3:
aor_score = 3.5
elif answer_ownership_rate >= 0.1:
aor_score = 2.0
else:
aor_score = answer_ownership_rate * 10.0
aor_status = "pass" if answer_ownership_rate >= 0.5 else ("warning" if answer_ownership_rate >= 0.2 else "fail")
items.append(DiagnosisItem(
name="引用频率 (AOR)",
status=aor_status,
description="品牌在AI回答中被提及的频率AOR目标≥50%",
suggestion="优化内容结构提高被AI引用的概率",
score=aor_score,
max_score=5.0,
))
# 2. 引用质量 (4分)
accuracy_score = citation_accuracy * 4.0
accuracy_status = "pass" if citation_accuracy >= 0.9 else ("warning" if citation_accuracy >= 0.7 else "fail")
# 确保满分时得到满分
if citation_accuracy >= 1.0:
accuracy_score = 4.0
items.append(DiagnosisItem(
name="引用质量",
status=accuracy_status,
description="引用内容是否准确完整引用准确率目标≥90%",
suggestion="确保内容准确无误,避免过时或错误信息",
score=accuracy_score,
max_score=4.0,
))
# 3. AI声量占比 (3分)
sov_score = 0.0
if ai_sov >= 0.3:
sov_score = 3.0
elif ai_sov >= 0.15:
sov_score = 2.0
elif ai_sov >= 0.05:
sov_score = 1.0
else:
sov_score = ai_sov * 10.0
sov_status = "pass" if ai_sov >= 0.3 else ("warning" if ai_sov >= 0.1 else "fail")
items.append(DiagnosisItem(
name="AI声量占比",
status=sov_status,
description="品牌在AI回答中的占比AI SOV目标≥30%",
suggestion="增加品牌曝光提高在AI回答中的出现频率",
score=sov_score,
max_score=3.0,
))
# 4. 竞品对比 (3分)
gap_score = 0.0
if competitor_gap <= 0.1:
gap_score = 3.0
elif competitor_gap <= 0.2:
gap_score = 2.0
elif competitor_gap <= 0.3:
gap_score = 1.0
else:
gap_score = max(0.0, 3.0 - competitor_gap * 5)
# 确保差距为0时得满分差距过大时得0分
if competitor_gap <= 0.0:
gap_score = 3.0
if competitor_gap >= 0.6:
gap_score = 0.0
gap_status = "pass" if competitor_gap <= 0.1 else ("warning" if competitor_gap <= 0.25 else "fail")
items.append(DiagnosisItem(
name="竞品对比",
status=gap_status,
description="与竞品在AI回答中的表现差距差距目标≤10pp",
suggestion="分析竞品优势,针对性优化内容策略",
score=gap_score,
max_score=3.0,
))
total_score = sum(item.score for item in items)
percentage = (total_score / max_score) * 100
has_fail = any(item.status == "fail" for item in items)
status = "warning" if has_fail else "pass"
return GEODimensionScore(
name="引用就绪度",
score=total_score,
max_score=max_score,
items=items,
status=status,
percentage=round(percentage, 2),
detail={
"answer_ownership_rate": round(answer_ownership_rate, 2),
"citation_accuracy": round(citation_accuracy, 2),
"ai_sov": round(ai_sov, 2),
"competitor_gap": round(competitor_gap, 2),
"total_ai_responses": total_ai_responses,
"brand_mention_count": brand_mention_count,
"accurate_citation_count": accurate_citation_count,
},
)
# ============================================================
# 推荐生成
# ============================================================
def generate_recommendations(dimensions: list[GEODimensionScore]) -> list[GEORecommendation]:
"""
根据诊断结果生成优化建议
Args:
dimensions: 各维度诊断结果
Returns:
list[GEORecommendation]: 优化建议列表
"""
recommendations = []
for dim in dimensions:
for item in dim.items:
if item.status == "fail":
priority = "P0"
impact = "high"
elif item.status == "warning":
priority = "P1"
impact = "medium"
else:
continue
# 根据诊断项确定实施难度
effort = "medium"
if "Schema" in item.name or "标记" in item.name:
effort = "easy"
elif "内容深度" in item.name or "话题覆盖" in item.name:
effort = "hard"
recommendations.append(GEORecommendation(
priority=priority,
dimension=dim.name,
title=f"优化: {item.name}",
description=item.suggestion,
impact=impact,
effort=effort,
))
# 按优先级排序
priority_order = {"P0": 0, "P1": 1, "P2": 2}
recommendations.sort(key=lambda r: priority_order.get(r.priority, 3))
return recommendations
# ============================================================
# 工具函数
# ============================================================
def get_health_level(score: float) -> str:
"""
根据评分获取健康等级
80+ -> excellent (优秀/绿)
60-79 -> good (良好/黄)
40-59 -> pass (及格/橙)
<40 -> danger (危险/红)
"""
if score >= 80:
return "excellent"
if score >= 60:
return "good"
if score >= 40:
return "pass"
return "danger"
def get_health_level_label(level: str) -> str:
"""获取健康等级中文标签"""
labels = {
"excellent": "优秀",
"good": "良好",
"pass": "及格",
"danger": "危险",
}
return labels.get(level, "未知")
# ============================================================
# GEODiagnosisService 服务类
# ============================================================
@dataclass
class GEODiagnosisInput:
"""GEO诊断输入参数"""
# 内容可提取性
has_direct_answer: bool = False
has_qa_headings: bool = False
has_structured_data: bool = False
has_internal_links: bool = False
has_freshness_info: bool = False
update_days_ago: int | None = None
# 实体清晰度
has_brand_definition: bool = False
has_target_audience: bool = False
has_unique_value: bool = False
has_industry_classification: bool = False
# E-E-A-T信号
has_author_bio: bool = False
author_credentials_complete: float = 0.0
has_certifications: bool = False
certification_count: int = 0
has_data_sources: bool = False
authoritative_source_ratio: float = 0.0
has_expert_endorsements: bool = False
endorsement_count: int = 0
# Schema标记
has_organization: bool = False
has_product: bool = False
has_article: bool = False
has_faq: bool = False
has_howto: bool = False
has_breadcrumb: bool = False
# 主题权威
content_depth_score: float = 0.0
topic_coverage_ratio: float = 0.0
entity_consistency_score: float = 0.0
cluster_completeness: float = 0.0
total_content_count: int = 0
topic_cluster_count: int = 0
# 引用就绪度
answer_ownership_rate: float = 0.0
citation_accuracy: float = 0.0
ai_sov: float = 0.0
competitor_gap: float = 0.0
total_ai_responses: int = 0
brand_mention_count: int = 0
accurate_citation_count: int = 0
class GEODiagnosisService:
"""GEO诊断服务"""
def diagnose(self, input_data: GEODiagnosisInput) -> GEODiagnosisResult:
"""
执行GEO诊断
Args:
input_data: 诊断输入参数
Returns:
GEODiagnosisResult: 诊断结果
"""
# 1. 内容可提取性诊断 (20分)
content_extractability = diagnose_content_extractability(
has_direct_answer=input_data.has_direct_answer,
has_qa_headings=input_data.has_qa_headings,
has_structured_data=input_data.has_structured_data,
has_internal_links=input_data.has_internal_links,
has_freshness_info=input_data.has_freshness_info,
update_days_ago=input_data.update_days_ago,
)
# 2. 实体清晰度诊断 (15分)
entity_clarity = diagnose_entity_clarity(
has_brand_definition=input_data.has_brand_definition,
has_target_audience=input_data.has_target_audience,
has_unique_value=input_data.has_unique_value,
has_industry_classification=input_data.has_industry_classification,
)
# 3. E-E-A-T信号诊断 (20分)
eeat_signals = diagnose_eeat_signals(
has_author_bio=input_data.has_author_bio,
author_credentials_complete=input_data.author_credentials_complete,
has_certifications=input_data.has_certifications,
certification_count=input_data.certification_count,
has_data_sources=input_data.has_data_sources,
authoritative_source_ratio=input_data.authoritative_source_ratio,
has_expert_endorsements=input_data.has_expert_endorsements,
endorsement_count=input_data.endorsement_count,
)
# 4. Schema标记诊断 (15分)
schema_markup = diagnose_schema_markup(
has_organization=input_data.has_organization,
has_product=input_data.has_product,
has_article=input_data.has_article,
has_faq=input_data.has_faq,
has_howto=input_data.has_howto,
has_breadcrumb=input_data.has_breadcrumb,
)
# 5. 主题权威诊断 (15分)
topic_authority = diagnose_topic_authority(
content_depth_score=input_data.content_depth_score,
topic_coverage_ratio=input_data.topic_coverage_ratio,
entity_consistency_score=input_data.entity_consistency_score,
cluster_completeness=input_data.cluster_completeness,
total_content_count=input_data.total_content_count,
topic_cluster_count=input_data.topic_cluster_count,
)
# 6. 引用就绪度诊断 (15分)
citation_readiness = diagnose_citation_readiness(
answer_ownership_rate=input_data.answer_ownership_rate,
citation_accuracy=input_data.citation_accuracy,
ai_sov=input_data.ai_sov,
competitor_gap=input_data.competitor_gap,
total_ai_responses=input_data.total_ai_responses,
brand_mention_count=input_data.brand_mention_count,
accurate_citation_count=input_data.accurate_citation_count,
)
# 汇总维度
dimensions = [
content_extractability,
entity_clarity,
eeat_signals,
schema_markup,
topic_authority,
citation_readiness,
]
# 计算综合评分
overall_score = sum(dim.score for dim in dimensions)
overall_score = round(min(100.0, max(0.0, overall_score)), 2)
# 生成推荐
recommendations = generate_recommendations(dimensions)
return GEODiagnosisResult(
overall_score=overall_score,
dimensions=dimensions,
recommendations=recommendations,
)
def diagnose_from_dict(self, data: dict) -> GEODiagnosisResult:
"""
从字典执行GEO诊断便捷方法
Args:
data: 诊断参数字典
Returns:
GEODiagnosisResult: 诊断结果
"""
input_data = GEODiagnosisInput(**data)
return self.diagnose(input_data)