1079 lines
37 KiB
Python
1079 lines
37 KiB
Python
"""
|
||
GEO诊断服务 - 6大维度检测系统
|
||
|
||
诊断维度(总分100):
|
||
- 内容可提取性 (Content Extractability): 20分 - AI能否轻松提取和理解内容
|
||
- 实体清晰度 (Entity Clarity): 15分 - AI能否理解品牌是什么
|
||
- E-E-A-T信号 (E-E-A-T Signals): 20分 - 经验、专业性、权威性、可信度
|
||
- Schema标记 (Schema Markup): 15分 - 结构化数据完整性
|
||
- 主题权威 (Topic Authority): 15分 - 品牌在特定领域的权威性
|
||
- 引用就绪度 (Citation Readiness): 15分 - 品牌在AI回答中被引用的可能性
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import logging
|
||
from dataclasses import dataclass, field
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
# ============================================================
|
||
# 诊断数据结构
|
||
# ============================================================
|
||
|
||
@dataclass
|
||
class DiagnosisItem:
|
||
"""单个诊断项"""
|
||
name: str # 诊断项名称
|
||
status: str # pass/warning/fail
|
||
description: str # 诊断说明
|
||
suggestion: str # 优化建议
|
||
score: float = 0.0 # 该项得分
|
||
max_score: float = 0.0 # 该项满分
|
||
|
||
|
||
@dataclass
|
||
class GEODimensionScore:
|
||
"""单个维度的诊断评分详情"""
|
||
name: str # 维度名称
|
||
score: float # 该维度得分 (0-max_score)
|
||
max_score: float # 该维度满分
|
||
items: list[DiagnosisItem] = field(default_factory=list)
|
||
status: str = "pass" # pass/warning/fail
|
||
percentage: float = 0.0 # 得分率 (0-100)
|
||
detail: dict = field(default_factory=dict)
|
||
|
||
|
||
@dataclass
|
||
class GEORecommendation:
|
||
"""优化建议"""
|
||
priority: str # P0/P1/P2
|
||
dimension: str # 所属维度
|
||
title: str # 建议标题
|
||
description: str # 建议描述
|
||
impact: str # 预期影响: high/medium/low
|
||
effort: str # 实施难度: easy/medium/hard
|
||
|
||
|
||
@dataclass
|
||
class GEODiagnosisResult:
|
||
"""GEO诊断结果"""
|
||
overall_score: float = 0.0 # 综合评分 0-100
|
||
dimensions: list[GEODimensionScore] = field(default_factory=list)
|
||
recommendations: list[GEORecommendation] = field(default_factory=list)
|
||
health_level: str = "danger" # excellent/good/pass/danger
|
||
|
||
def __post_init__(self):
|
||
"""计算健康等级"""
|
||
if self.overall_score >= 80:
|
||
self.health_level = "excellent"
|
||
elif self.overall_score >= 60:
|
||
self.health_level = "good"
|
||
elif self.overall_score >= 40:
|
||
self.health_level = "pass"
|
||
else:
|
||
self.health_level = "danger"
|
||
|
||
def to_dict(self) -> dict:
|
||
"""转换为字典格式"""
|
||
return {
|
||
"overall_score": round(self.overall_score, 2),
|
||
"health_level": self.health_level,
|
||
"health_level_label": get_health_level_label(self.health_level),
|
||
"dimensions": [
|
||
{
|
||
"name": dim.name,
|
||
"score": round(dim.score, 2),
|
||
"max_score": dim.max_score,
|
||
"percentage": round(dim.percentage, 2),
|
||
"status": dim.status,
|
||
"items": [
|
||
{
|
||
"name": item.name,
|
||
"status": item.status,
|
||
"description": item.description,
|
||
"suggestion": item.suggestion,
|
||
"score": round(item.score, 2),
|
||
"max_score": item.max_score,
|
||
}
|
||
for item in dim.items
|
||
],
|
||
"detail": dim.detail,
|
||
}
|
||
for dim in self.dimensions
|
||
],
|
||
"recommendations": [
|
||
{
|
||
"priority": rec.priority,
|
||
"dimension": rec.dimension,
|
||
"title": rec.title,
|
||
"description": rec.description,
|
||
"impact": rec.impact,
|
||
"effort": rec.effort,
|
||
}
|
||
for rec in self.recommendations
|
||
],
|
||
}
|
||
|
||
|
||
# ============================================================
|
||
# 维度1: 内容可提取性诊断 (满分20)
|
||
# ============================================================
|
||
|
||
def diagnose_content_extractability(
|
||
has_direct_answer: bool = False,
|
||
has_qa_headings: bool = False,
|
||
has_structured_data: bool = False,
|
||
has_internal_links: bool = False,
|
||
has_freshness_info: bool = False,
|
||
update_days_ago: int | None = None,
|
||
) -> GEODimensionScore:
|
||
"""
|
||
诊断内容可提取性 (满分20)
|
||
|
||
AI需要能够轻松提取和理解内容。
|
||
|
||
Args:
|
||
has_direct_answer: 是否有直接回答块(页面首段简洁明确的答案)
|
||
has_qa_headings: 是否有问答式标题(H2/H3采用问题形式)
|
||
has_structured_data: 是否使用列表和表格等结构化数据
|
||
has_internal_links: 是否有内链到子意图页
|
||
has_freshness_info: 是否有内容新鲜度信息(更新日期和作者)
|
||
update_days_ago: 内容最后更新距今天数
|
||
|
||
Returns:
|
||
GEODimensionScore: 内容可提取性维度评分
|
||
"""
|
||
max_score = 20.0
|
||
items = []
|
||
|
||
# 1. 直接回答块 (P0, 6分)
|
||
direct_answer_score = 6.0 if has_direct_answer else 0.0
|
||
items.append(DiagnosisItem(
|
||
name="直接回答块",
|
||
status="pass" if has_direct_answer else "fail",
|
||
description="页面首段是否包含简洁明确的答案,便于AI直接提取",
|
||
suggestion="在页面首段添加2-3句话的简洁答案,直接回答用户核心问题",
|
||
score=direct_answer_score,
|
||
max_score=6.0,
|
||
))
|
||
|
||
# 2. 问答式标题 (P0, 5分)
|
||
qa_headings_score = 5.0 if has_qa_headings else 0.0
|
||
items.append(DiagnosisItem(
|
||
name="问答式标题",
|
||
status="pass" if has_qa_headings else "fail",
|
||
description="H2/H3标题是否采用问题形式,帮助AI理解内容结构",
|
||
suggestion="将关键H2/H3标题改为问题形式,如'什么是X'、'如何使用Y'",
|
||
score=qa_headings_score,
|
||
max_score=5.0,
|
||
))
|
||
|
||
# 3. 列表和表格 (P0, 4分)
|
||
structured_score = 4.0 if has_structured_data else 0.0
|
||
items.append(DiagnosisItem(
|
||
name="列表和表格",
|
||
status="pass" if has_structured_data else "fail",
|
||
description="是否使用列表、表格等结构化数据展示信息",
|
||
suggestion="使用HTML列表(ul/ol)和表格(table)组织信息,便于AI解析",
|
||
score=structured_score,
|
||
max_score=4.0,
|
||
))
|
||
|
||
# 4. 内链到子意图页 (P1, 3分)
|
||
internal_links_score = 3.0 if has_internal_links else 0.0
|
||
items.append(DiagnosisItem(
|
||
name="内链到子意图页",
|
||
status="pass" if has_internal_links else "warning",
|
||
description="是否链接到相关深度内容页面",
|
||
suggestion="添加内链到相关子话题页面,形成内容网络",
|
||
score=internal_links_score,
|
||
max_score=3.0,
|
||
))
|
||
|
||
# 5. 内容新鲜度 (P1, 2分)
|
||
freshness_score = 0.0
|
||
freshness_status = "fail"
|
||
if has_freshness_info:
|
||
if update_days_ago is not None:
|
||
if update_days_ago <= 30:
|
||
freshness_score = 2.0
|
||
freshness_status = "pass"
|
||
elif update_days_ago <= 90:
|
||
freshness_score = 1.5
|
||
freshness_status = "warning"
|
||
else:
|
||
freshness_score = 0.5
|
||
freshness_status = "warning"
|
||
else:
|
||
freshness_score = 1.0
|
||
freshness_status = "warning"
|
||
|
||
items.append(DiagnosisItem(
|
||
name="内容新鲜度",
|
||
status=freshness_status,
|
||
description="是否有更新日期和作者信息,体现内容时效性",
|
||
suggestion="在页面显眼位置展示最后更新日期和作者信息",
|
||
score=freshness_score,
|
||
max_score=2.0,
|
||
))
|
||
|
||
total_score = sum(item.score for item in items)
|
||
percentage = (total_score / max_score) * 100
|
||
|
||
# 维度状态:如果有fail项则为warning,全pass则为pass
|
||
has_fail = any(item.status == "fail" for item in items)
|
||
status = "warning" if has_fail else "pass"
|
||
|
||
return GEODimensionScore(
|
||
name="内容可提取性",
|
||
score=total_score,
|
||
max_score=max_score,
|
||
items=items,
|
||
status=status,
|
||
percentage=round(percentage, 2),
|
||
detail={
|
||
"has_direct_answer": has_direct_answer,
|
||
"has_qa_headings": has_qa_headings,
|
||
"has_structured_data": has_structured_data,
|
||
"has_internal_links": has_internal_links,
|
||
"has_freshness_info": has_freshness_info,
|
||
"update_days_ago": update_days_ago,
|
||
},
|
||
)
|
||
|
||
|
||
# ============================================================
|
||
# 维度2: 实体清晰度诊断 (满分15)
|
||
# ============================================================
|
||
|
||
def diagnose_entity_clarity(
|
||
has_brand_definition: bool = False,
|
||
has_target_audience: bool = False,
|
||
has_unique_value: bool = False,
|
||
has_industry_classification: bool = False,
|
||
) -> GEODimensionScore:
|
||
"""
|
||
诊断实体清晰度 (满分15)
|
||
|
||
AI需要能够理解品牌是什么。
|
||
|
||
Args:
|
||
has_brand_definition: 是否清晰说明品牌做什么
|
||
has_target_audience: 是否明确服务谁
|
||
has_unique_value: 是否有差异化价值主张
|
||
has_industry_classification: 是否有行业分类信息
|
||
|
||
Returns:
|
||
GEODimensionScore: 实体清晰度维度评分
|
||
"""
|
||
max_score = 15.0
|
||
items = []
|
||
|
||
# 1. 品牌定义 (5分)
|
||
brand_def_score = 5.0 if has_brand_definition else 0.0
|
||
items.append(DiagnosisItem(
|
||
name="品牌定义",
|
||
status="pass" if has_brand_definition else "fail",
|
||
description="是否清晰说明品牌做什么,AI理解准确率目标≥95%",
|
||
suggestion="在首页和About页面添加清晰的品牌定义,包含核心业务和价值主张",
|
||
score=brand_def_score,
|
||
max_score=5.0,
|
||
))
|
||
|
||
# 2. 目标受众 (4分)
|
||
audience_score = 4.0 if has_target_audience else 0.0
|
||
items.append(DiagnosisItem(
|
||
name="目标受众",
|
||
status="pass" if has_target_audience else "fail",
|
||
description="是否明确服务谁,实体识别准确率目标≥90%",
|
||
suggestion="明确描述目标用户群体,如'为中小企业提供XX服务'",
|
||
score=audience_score,
|
||
max_score=4.0,
|
||
))
|
||
|
||
# 3. 差异化价值 (3分)
|
||
value_score = 3.0 if has_unique_value else 0.0
|
||
items.append(DiagnosisItem(
|
||
name="差异化价值",
|
||
status="pass" if has_unique_value else "warning",
|
||
description="为什么选择这个品牌,独特性评分目标≥80",
|
||
suggestion="突出品牌独特优势,如技术领先、服务优质、价格合理等",
|
||
score=value_score,
|
||
max_score=3.0,
|
||
))
|
||
|
||
# 4. 行业分类 (3分)
|
||
industry_score = 3.0 if has_industry_classification else 0.0
|
||
items.append(DiagnosisItem(
|
||
name="行业分类",
|
||
status="pass" if has_industry_classification else "warning",
|
||
description="品牌属于什么行业,分类准确率目标≥95%",
|
||
suggestion="在页面中明确标注行业分类,如'SaaS'、'电子商务'等",
|
||
score=industry_score,
|
||
max_score=3.0,
|
||
))
|
||
|
||
total_score = sum(item.score for item in items)
|
||
percentage = (total_score / max_score) * 100
|
||
|
||
has_fail = any(item.status == "fail" for item in items)
|
||
status = "warning" if has_fail else "pass"
|
||
|
||
return GEODimensionScore(
|
||
name="实体清晰度",
|
||
score=total_score,
|
||
max_score=max_score,
|
||
items=items,
|
||
status=status,
|
||
percentage=round(percentage, 2),
|
||
detail={
|
||
"has_brand_definition": has_brand_definition,
|
||
"has_target_audience": has_target_audience,
|
||
"has_unique_value": has_unique_value,
|
||
"has_industry_classification": has_industry_classification,
|
||
},
|
||
)
|
||
|
||
|
||
# ============================================================
|
||
# 维度3: E-E-A-T信号诊断 (满分20)
|
||
# ============================================================
|
||
|
||
def diagnose_eeat_signals(
|
||
has_author_bio: bool = False,
|
||
author_credentials_complete: float = 0.0,
|
||
has_certifications: bool = False,
|
||
certification_count: int = 0,
|
||
has_data_sources: bool = False,
|
||
authoritative_source_ratio: float = 0.0,
|
||
has_expert_endorsements: bool = False,
|
||
endorsement_count: int = 0,
|
||
) -> GEODimensionScore:
|
||
"""
|
||
诊断E-E-A-T信号 (满分20)
|
||
|
||
AI需要验证品牌的可信度(经验、专业性、权威性、可信度)。
|
||
|
||
Args:
|
||
has_author_bio: 是否有作者资质信息
|
||
author_credentials_complete: 作者简介完整度 (0-1)
|
||
has_certifications: 是否有专业认证
|
||
certification_count: 认证/奖项数量
|
||
has_data_sources: 是否引用数据来源
|
||
authoritative_source_ratio: 权威源引用比例 (0-1)
|
||
has_expert_endorsements: 是否有专家背书
|
||
endorsement_count: 专家背书数量
|
||
|
||
Returns:
|
||
GEODimensionScore: E-E-A-T信号维度评分
|
||
"""
|
||
max_score = 20.0
|
||
items = []
|
||
|
||
# 1. 作者资质 (6分)
|
||
author_score = 0.0
|
||
if has_author_bio:
|
||
author_score = author_credentials_complete * 6.0
|
||
author_status = "pass" if author_score >= 5.4 else ("warning" if author_score >= 3.0 else "fail")
|
||
items.append(DiagnosisItem(
|
||
name="作者资质",
|
||
status=author_status,
|
||
description="内容作者是否有专业背景,作者简介完整度目标≥90%",
|
||
suggestion="添加作者详细简介,包含教育背景、工作经验、专业领域",
|
||
score=author_score,
|
||
max_score=6.0,
|
||
))
|
||
|
||
# 2. 专业认证 (5分)
|
||
cert_score = 0.0
|
||
if has_certifications:
|
||
if certification_count >= 5:
|
||
cert_score = 5.0
|
||
elif certification_count >= 3:
|
||
cert_score = 4.0
|
||
elif certification_count >= 1:
|
||
cert_score = 2.5
|
||
cert_status = "pass" if cert_score >= 4.0 else ("warning" if cert_score >= 2.0 else "fail")
|
||
items.append(DiagnosisItem(
|
||
name="专业认证",
|
||
status=cert_status,
|
||
description="是否有行业认证/奖项,认证展示率目标≥80%",
|
||
suggestion="展示行业认证、奖项、资质,如ISO认证、行业奖项等",
|
||
score=cert_score,
|
||
max_score=5.0,
|
||
))
|
||
|
||
# 3. 数据来源 (5分)
|
||
source_score = authoritative_source_ratio * 5.0 if has_data_sources else 0.0
|
||
source_status = "pass" if source_score >= 4.0 else ("warning" if source_score >= 2.0 else "fail")
|
||
items.append(DiagnosisItem(
|
||
name="数据来源",
|
||
status=source_status,
|
||
description="是否引用可靠数据,引用权威源比例目标≥70%",
|
||
suggestion="引用权威机构数据,如政府报告、学术研究、行业报告",
|
||
score=source_score,
|
||
max_score=5.0,
|
||
))
|
||
|
||
# 4. 专家背书 (4分)
|
||
endorsement_score = 0.0
|
||
if has_expert_endorsements:
|
||
if endorsement_count >= 5:
|
||
endorsement_score = 4.0
|
||
elif endorsement_count >= 3:
|
||
endorsement_score = 3.0
|
||
elif endorsement_count >= 1:
|
||
endorsement_score = 1.5
|
||
endorsement_status = "pass" if endorsement_score >= 3.0 else ("warning" if endorsement_score >= 1.5 else "fail")
|
||
items.append(DiagnosisItem(
|
||
name="专家背书",
|
||
status=endorsement_status,
|
||
description="是否有行业专家认可,背书数量目标≥3",
|
||
suggestion="获取行业专家推荐、用户评价、案例研究",
|
||
score=endorsement_score,
|
||
max_score=4.0,
|
||
))
|
||
|
||
total_score = sum(item.score for item in items)
|
||
percentage = (total_score / max_score) * 100
|
||
|
||
has_fail = any(item.status == "fail" for item in items)
|
||
status = "warning" if has_fail else "pass"
|
||
|
||
return GEODimensionScore(
|
||
name="E-E-A-T信号",
|
||
score=total_score,
|
||
max_score=max_score,
|
||
items=items,
|
||
status=status,
|
||
percentage=round(percentage, 2),
|
||
detail={
|
||
"has_author_bio": has_author_bio,
|
||
"author_credentials_complete": round(author_credentials_complete, 2),
|
||
"has_certifications": has_certifications,
|
||
"certification_count": certification_count,
|
||
"has_data_sources": has_data_sources,
|
||
"authoritative_source_ratio": round(authoritative_source_ratio, 2),
|
||
"has_expert_endorsements": has_expert_endorsements,
|
||
"endorsement_count": endorsement_count,
|
||
},
|
||
)
|
||
|
||
|
||
# ============================================================
|
||
# 维度4: Schema标记诊断 (满分15)
|
||
# ============================================================
|
||
|
||
def diagnose_schema_markup(
|
||
has_organization: bool = False,
|
||
has_product: bool = False,
|
||
has_article: bool = False,
|
||
has_faq: bool = False,
|
||
has_howto: bool = False,
|
||
has_breadcrumb: bool = False,
|
||
) -> GEODimensionScore:
|
||
"""
|
||
诊断Schema标记完整性 (满分15)
|
||
|
||
结构化数据帮助AI理解内容。
|
||
|
||
Args:
|
||
has_organization: 是否有Organization标记(企业主页)
|
||
has_product: 是否有Product标记(产品页)
|
||
has_article: 是否有Article/BlogPosting标记(博客文章)
|
||
has_faq: 是否有FAQPage标记(常见问题)
|
||
has_howto: 是否有HowTo标记(操作指南)
|
||
has_breadcrumb: 是否有BreadcrumbList标记(导航结构)
|
||
|
||
Returns:
|
||
GEODimensionScore: Schema标记维度评分
|
||
"""
|
||
max_score = 15.0
|
||
items = []
|
||
|
||
# 1. Organization (P0必须, 4分)
|
||
org_score = 4.0 if has_organization else 0.0
|
||
items.append(DiagnosisItem(
|
||
name="Organization",
|
||
status="pass" if has_organization else "fail",
|
||
description="企业主页的Organization标记,包含名称、logo、联系方式",
|
||
suggestion="添加Organization Schema,包含@type: Organization、name、url、logo",
|
||
score=org_score,
|
||
max_score=4.0,
|
||
))
|
||
|
||
# 2. Product (P0必须, 3分)
|
||
product_score = 3.0 if has_product else 0.0
|
||
items.append(DiagnosisItem(
|
||
name="Product",
|
||
status="pass" if has_product else "fail",
|
||
description="产品页的Product标记,包含名称、描述、价格、评价",
|
||
suggestion="为产品页添加Product Schema,包含name、description、offers、aggregateRating",
|
||
score=product_score,
|
||
max_score=3.0,
|
||
))
|
||
|
||
# 3. Article/BlogPosting (P0必须, 3分)
|
||
article_score = 3.0 if has_article else 0.0
|
||
items.append(DiagnosisItem(
|
||
name="Article/BlogPosting",
|
||
status="pass" if has_article else "fail",
|
||
description="博客文章的Article标记,包含作者、发布日期、摘要",
|
||
suggestion="为文章添加Article或BlogPosting Schema,包含author、datePublished、headline",
|
||
score=article_score,
|
||
max_score=3.0,
|
||
))
|
||
|
||
# 4. FAQPage (P1推荐, 2分)
|
||
faq_score = 2.0 if has_faq else 0.0
|
||
items.append(DiagnosisItem(
|
||
name="FAQPage",
|
||
status="pass" if has_faq else "warning",
|
||
description="常见问题的FAQPage标记",
|
||
suggestion="为FAQ页面添加FAQPage Schema,包含问题和答案对",
|
||
score=faq_score,
|
||
max_score=2.0,
|
||
))
|
||
|
||
# 5. HowTo (P1推荐, 2分)
|
||
howto_score = 2.0 if has_howto else 0.0
|
||
items.append(DiagnosisItem(
|
||
name="HowTo",
|
||
status="pass" if has_howto else "warning",
|
||
description="操作指南的HowTo标记",
|
||
suggestion="为教程类内容添加HowTo Schema,包含步骤列表",
|
||
score=howto_score,
|
||
max_score=2.0,
|
||
))
|
||
|
||
# 6. BreadcrumbList (P1推荐, 1分)
|
||
breadcrumb_score = 1.0 if has_breadcrumb else 0.0
|
||
items.append(DiagnosisItem(
|
||
name="BreadcrumbList",
|
||
status="pass" if has_breadcrumb else "warning",
|
||
description="导航结构的BreadcrumbList标记",
|
||
suggestion="添加BreadcrumbList Schema,帮助AI理解页面层级关系",
|
||
score=breadcrumb_score,
|
||
max_score=1.0,
|
||
))
|
||
|
||
total_score = sum(item.score for item in items)
|
||
percentage = (total_score / max_score) * 100
|
||
|
||
has_fail = any(item.status == "fail" for item in items)
|
||
status = "warning" if has_fail else "pass"
|
||
|
||
return GEODimensionScore(
|
||
name="Schema标记",
|
||
score=total_score,
|
||
max_score=max_score,
|
||
items=items,
|
||
status=status,
|
||
percentage=round(percentage, 2),
|
||
detail={
|
||
"has_organization": has_organization,
|
||
"has_product": has_product,
|
||
"has_article": has_article,
|
||
"has_faq": has_faq,
|
||
"has_howto": has_howto,
|
||
"has_breadcrumb": has_breadcrumb,
|
||
"schema_count": sum([
|
||
has_organization, has_product, has_article,
|
||
has_faq, has_howto, has_breadcrumb,
|
||
]),
|
||
},
|
||
)
|
||
|
||
|
||
# ============================================================
|
||
# 维度5: 主题权威诊断 (满分15)
|
||
# ============================================================
|
||
|
||
def diagnose_topic_authority(
|
||
content_depth_score: float = 0.0,
|
||
topic_coverage_ratio: float = 0.0,
|
||
entity_consistency_score: float = 0.0,
|
||
cluster_completeness: float = 0.0,
|
||
total_content_count: int = 0,
|
||
topic_cluster_count: int = 0,
|
||
) -> GEODimensionScore:
|
||
"""
|
||
诊断主题权威 (满分15)
|
||
|
||
AI需要验证品牌在特定领域的权威性。
|
||
|
||
Args:
|
||
content_depth_score: 内容深度评分 (0-1),目标≥4.6/5即0.92
|
||
topic_coverage_ratio: 话题覆盖度 (0-1),目标≥80%
|
||
entity_consistency_score: 实体信号一致性 (0-1),目标≥85%
|
||
cluster_completeness: 内链网络集群完整度 (0-1),目标≥70%
|
||
total_content_count: 总内容数量
|
||
topic_cluster_count: 主题集群数量
|
||
|
||
Returns:
|
||
GEODimensionScore: 主题权威维度评分
|
||
"""
|
||
max_score = 15.0
|
||
items = []
|
||
|
||
# 1. 内容深度 (5分)
|
||
depth_score = content_depth_score * 5.0
|
||
depth_status = "pass" if content_depth_score >= 0.8 else ("warning" if content_depth_score >= 0.5 else "fail")
|
||
items.append(DiagnosisItem(
|
||
name="内容深度",
|
||
status=depth_status,
|
||
description="是否全面覆盖主题,内容质量QScore目标≥4.6/5",
|
||
suggestion="增加内容深度,包含详细解释、案例分析、数据支撑",
|
||
score=depth_score,
|
||
max_score=5.0,
|
||
))
|
||
|
||
# 2. 话题覆盖度 (4分)
|
||
coverage_score = topic_coverage_ratio * 4.0
|
||
coverage_status = "pass" if topic_coverage_ratio >= 0.8 else ("warning" if topic_coverage_ratio >= 0.5 else "fail")
|
||
items.append(DiagnosisItem(
|
||
name="话题覆盖度",
|
||
status=coverage_status,
|
||
description="是否覆盖相关子话题,话题覆盖率目标≥80%",
|
||
suggestion="创建覆盖核心话题及其子话题的内容矩阵",
|
||
score=coverage_score,
|
||
max_score=4.0,
|
||
))
|
||
|
||
# 3. 实体信号一致性 (3分)
|
||
consistency_score = entity_consistency_score * 3.0
|
||
consistency_status = "pass" if entity_consistency_score >= 0.85 else ("warning" if entity_consistency_score >= 0.6 else "fail")
|
||
items.append(DiagnosisItem(
|
||
name="实体信号一致性",
|
||
status=consistency_status,
|
||
description="各页面实体信号是否一致,一致性评分目标≥85%",
|
||
suggestion="确保各页面使用一致的品牌名称、描述、行业分类",
|
||
score=consistency_score,
|
||
max_score=3.0,
|
||
))
|
||
|
||
# 4. 内链网络 (3分)
|
||
network_score = cluster_completeness * 3.0
|
||
network_status = "pass" if cluster_completeness >= 0.7 else ("warning" if cluster_completeness >= 0.4 else "fail")
|
||
items.append(DiagnosisItem(
|
||
name="内链网络",
|
||
status=network_status,
|
||
description="是否形成主题内容集群,集群完整度目标≥70%",
|
||
suggestion="建立主题集群,通过内链将相关内容连接成网络",
|
||
score=network_score,
|
||
max_score=3.0,
|
||
))
|
||
|
||
total_score = sum(item.score for item in items)
|
||
percentage = (total_score / max_score) * 100
|
||
|
||
has_fail = any(item.status == "fail" for item in items)
|
||
status = "warning" if has_fail else "pass"
|
||
|
||
return GEODimensionScore(
|
||
name="主题权威",
|
||
score=total_score,
|
||
max_score=max_score,
|
||
items=items,
|
||
status=status,
|
||
percentage=round(percentage, 2),
|
||
detail={
|
||
"content_depth_score": round(content_depth_score, 2),
|
||
"topic_coverage_ratio": round(topic_coverage_ratio, 2),
|
||
"entity_consistency_score": round(entity_consistency_score, 2),
|
||
"cluster_completeness": round(cluster_completeness, 2),
|
||
"total_content_count": total_content_count,
|
||
"topic_cluster_count": topic_cluster_count,
|
||
},
|
||
)
|
||
|
||
|
||
# ============================================================
|
||
# 维度6: 引用就绪度诊断 (满分15)
|
||
# ============================================================
|
||
|
||
def diagnose_citation_readiness(
|
||
answer_ownership_rate: float = 0.0,
|
||
citation_accuracy: float = 0.0,
|
||
ai_sov: float = 0.0,
|
||
competitor_gap: float = 0.0,
|
||
total_ai_responses: int = 0,
|
||
brand_mention_count: int = 0,
|
||
accurate_citation_count: int = 0,
|
||
) -> GEODimensionScore:
|
||
"""
|
||
诊断引用就绪度 (满分15)
|
||
|
||
评估品牌在AI回答中被引用的可能性。
|
||
|
||
Args:
|
||
answer_ownership_rate: AOR - Answer Ownership Rate (0-1),目标≥50%
|
||
citation_accuracy: 引用准确率 (0-1),目标≥90%
|
||
ai_sov: AI Share of Voice (0-1),目标≥30%
|
||
competitor_gap: 与竞品差距 (pp),目标≤10pp
|
||
total_ai_responses: AI回答总数
|
||
brand_mention_count: 品牌被提及次数
|
||
accurate_citation_count: 准确引用次数
|
||
|
||
Returns:
|
||
GEODimensionScore: 引用就绪度维度评分
|
||
"""
|
||
max_score = 15.0
|
||
items = []
|
||
|
||
# 1. 引用频率 AOR (5分)
|
||
aor_score = 0.0
|
||
if answer_ownership_rate >= 0.5:
|
||
aor_score = 5.0
|
||
elif answer_ownership_rate >= 0.3:
|
||
aor_score = 3.5
|
||
elif answer_ownership_rate >= 0.1:
|
||
aor_score = 2.0
|
||
else:
|
||
aor_score = answer_ownership_rate * 10.0
|
||
aor_status = "pass" if answer_ownership_rate >= 0.5 else ("warning" if answer_ownership_rate >= 0.2 else "fail")
|
||
items.append(DiagnosisItem(
|
||
name="引用频率 (AOR)",
|
||
status=aor_status,
|
||
description="品牌在AI回答中被提及的频率,AOR目标≥50%",
|
||
suggestion="优化内容结构,提高被AI引用的概率",
|
||
score=aor_score,
|
||
max_score=5.0,
|
||
))
|
||
|
||
# 2. 引用质量 (4分)
|
||
accuracy_score = citation_accuracy * 4.0
|
||
accuracy_status = "pass" if citation_accuracy >= 0.9 else ("warning" if citation_accuracy >= 0.7 else "fail")
|
||
# 确保满分时得到满分
|
||
if citation_accuracy >= 1.0:
|
||
accuracy_score = 4.0
|
||
items.append(DiagnosisItem(
|
||
name="引用质量",
|
||
status=accuracy_status,
|
||
description="引用内容是否准确完整,引用准确率目标≥90%",
|
||
suggestion="确保内容准确无误,避免过时或错误信息",
|
||
score=accuracy_score,
|
||
max_score=4.0,
|
||
))
|
||
|
||
# 3. AI声量占比 (3分)
|
||
sov_score = 0.0
|
||
if ai_sov >= 0.3:
|
||
sov_score = 3.0
|
||
elif ai_sov >= 0.15:
|
||
sov_score = 2.0
|
||
elif ai_sov >= 0.05:
|
||
sov_score = 1.0
|
||
else:
|
||
sov_score = ai_sov * 10.0
|
||
sov_status = "pass" if ai_sov >= 0.3 else ("warning" if ai_sov >= 0.1 else "fail")
|
||
items.append(DiagnosisItem(
|
||
name="AI声量占比",
|
||
status=sov_status,
|
||
description="品牌在AI回答中的占比,AI SOV目标≥30%",
|
||
suggestion="增加品牌曝光,提高在AI回答中的出现频率",
|
||
score=sov_score,
|
||
max_score=3.0,
|
||
))
|
||
|
||
# 4. 竞品对比 (3分)
|
||
gap_score = 0.0
|
||
if competitor_gap <= 0.1:
|
||
gap_score = 3.0
|
||
elif competitor_gap <= 0.2:
|
||
gap_score = 2.0
|
||
elif competitor_gap <= 0.3:
|
||
gap_score = 1.0
|
||
else:
|
||
gap_score = max(0.0, 3.0 - competitor_gap * 5)
|
||
# 确保差距为0时得满分,差距过大时得0分
|
||
if competitor_gap <= 0.0:
|
||
gap_score = 3.0
|
||
if competitor_gap >= 0.6:
|
||
gap_score = 0.0
|
||
gap_status = "pass" if competitor_gap <= 0.1 else ("warning" if competitor_gap <= 0.25 else "fail")
|
||
items.append(DiagnosisItem(
|
||
name="竞品对比",
|
||
status=gap_status,
|
||
description="与竞品在AI回答中的表现差距,差距目标≤10pp",
|
||
suggestion="分析竞品优势,针对性优化内容策略",
|
||
score=gap_score,
|
||
max_score=3.0,
|
||
))
|
||
|
||
total_score = sum(item.score for item in items)
|
||
percentage = (total_score / max_score) * 100
|
||
|
||
has_fail = any(item.status == "fail" for item in items)
|
||
status = "warning" if has_fail else "pass"
|
||
|
||
return GEODimensionScore(
|
||
name="引用就绪度",
|
||
score=total_score,
|
||
max_score=max_score,
|
||
items=items,
|
||
status=status,
|
||
percentage=round(percentage, 2),
|
||
detail={
|
||
"answer_ownership_rate": round(answer_ownership_rate, 2),
|
||
"citation_accuracy": round(citation_accuracy, 2),
|
||
"ai_sov": round(ai_sov, 2),
|
||
"competitor_gap": round(competitor_gap, 2),
|
||
"total_ai_responses": total_ai_responses,
|
||
"brand_mention_count": brand_mention_count,
|
||
"accurate_citation_count": accurate_citation_count,
|
||
},
|
||
)
|
||
|
||
|
||
# ============================================================
|
||
# 推荐生成
|
||
# ============================================================
|
||
|
||
def generate_recommendations(dimensions: list[GEODimensionScore]) -> list[GEORecommendation]:
|
||
"""
|
||
根据诊断结果生成优化建议
|
||
|
||
Args:
|
||
dimensions: 各维度诊断结果
|
||
|
||
Returns:
|
||
list[GEORecommendation]: 优化建议列表
|
||
"""
|
||
recommendations = []
|
||
|
||
for dim in dimensions:
|
||
for item in dim.items:
|
||
if item.status == "fail":
|
||
priority = "P0"
|
||
impact = "high"
|
||
elif item.status == "warning":
|
||
priority = "P1"
|
||
impact = "medium"
|
||
else:
|
||
continue
|
||
|
||
# 根据诊断项确定实施难度
|
||
effort = "medium"
|
||
if "Schema" in item.name or "标记" in item.name:
|
||
effort = "easy"
|
||
elif "内容深度" in item.name or "话题覆盖" in item.name:
|
||
effort = "hard"
|
||
|
||
recommendations.append(GEORecommendation(
|
||
priority=priority,
|
||
dimension=dim.name,
|
||
title=f"优化: {item.name}",
|
||
description=item.suggestion,
|
||
impact=impact,
|
||
effort=effort,
|
||
))
|
||
|
||
# 按优先级排序
|
||
priority_order = {"P0": 0, "P1": 1, "P2": 2}
|
||
recommendations.sort(key=lambda r: priority_order.get(r.priority, 3))
|
||
|
||
return recommendations
|
||
|
||
|
||
# ============================================================
|
||
# 工具函数
|
||
# ============================================================
|
||
|
||
def get_health_level(score: float) -> str:
|
||
"""
|
||
根据评分获取健康等级
|
||
|
||
80+ -> excellent (优秀/绿)
|
||
60-79 -> good (良好/黄)
|
||
40-59 -> pass (及格/橙)
|
||
<40 -> danger (危险/红)
|
||
"""
|
||
if score >= 80:
|
||
return "excellent"
|
||
if score >= 60:
|
||
return "good"
|
||
if score >= 40:
|
||
return "pass"
|
||
return "danger"
|
||
|
||
|
||
def get_health_level_label(level: str) -> str:
|
||
"""获取健康等级中文标签"""
|
||
labels = {
|
||
"excellent": "优秀",
|
||
"good": "良好",
|
||
"pass": "及格",
|
||
"danger": "危险",
|
||
}
|
||
return labels.get(level, "未知")
|
||
|
||
|
||
# ============================================================
|
||
# GEODiagnosisService 服务类
|
||
# ============================================================
|
||
|
||
@dataclass
|
||
class GEODiagnosisInput:
|
||
"""GEO诊断输入参数"""
|
||
# 内容可提取性
|
||
has_direct_answer: bool = False
|
||
has_qa_headings: bool = False
|
||
has_structured_data: bool = False
|
||
has_internal_links: bool = False
|
||
has_freshness_info: bool = False
|
||
update_days_ago: int | None = None
|
||
|
||
# 实体清晰度
|
||
has_brand_definition: bool = False
|
||
has_target_audience: bool = False
|
||
has_unique_value: bool = False
|
||
has_industry_classification: bool = False
|
||
|
||
# E-E-A-T信号
|
||
has_author_bio: bool = False
|
||
author_credentials_complete: float = 0.0
|
||
has_certifications: bool = False
|
||
certification_count: int = 0
|
||
has_data_sources: bool = False
|
||
authoritative_source_ratio: float = 0.0
|
||
has_expert_endorsements: bool = False
|
||
endorsement_count: int = 0
|
||
|
||
# Schema标记
|
||
has_organization: bool = False
|
||
has_product: bool = False
|
||
has_article: bool = False
|
||
has_faq: bool = False
|
||
has_howto: bool = False
|
||
has_breadcrumb: bool = False
|
||
|
||
# 主题权威
|
||
content_depth_score: float = 0.0
|
||
topic_coverage_ratio: float = 0.0
|
||
entity_consistency_score: float = 0.0
|
||
cluster_completeness: float = 0.0
|
||
total_content_count: int = 0
|
||
topic_cluster_count: int = 0
|
||
|
||
# 引用就绪度
|
||
answer_ownership_rate: float = 0.0
|
||
citation_accuracy: float = 0.0
|
||
ai_sov: float = 0.0
|
||
competitor_gap: float = 0.0
|
||
total_ai_responses: int = 0
|
||
brand_mention_count: int = 0
|
||
accurate_citation_count: int = 0
|
||
|
||
|
||
class GEODiagnosisService:
|
||
"""GEO诊断服务"""
|
||
|
||
def diagnose(self, input_data: GEODiagnosisInput) -> GEODiagnosisResult:
|
||
"""
|
||
执行GEO诊断
|
||
|
||
Args:
|
||
input_data: 诊断输入参数
|
||
|
||
Returns:
|
||
GEODiagnosisResult: 诊断结果
|
||
"""
|
||
# 1. 内容可提取性诊断 (20分)
|
||
content_extractability = diagnose_content_extractability(
|
||
has_direct_answer=input_data.has_direct_answer,
|
||
has_qa_headings=input_data.has_qa_headings,
|
||
has_structured_data=input_data.has_structured_data,
|
||
has_internal_links=input_data.has_internal_links,
|
||
has_freshness_info=input_data.has_freshness_info,
|
||
update_days_ago=input_data.update_days_ago,
|
||
)
|
||
|
||
# 2. 实体清晰度诊断 (15分)
|
||
entity_clarity = diagnose_entity_clarity(
|
||
has_brand_definition=input_data.has_brand_definition,
|
||
has_target_audience=input_data.has_target_audience,
|
||
has_unique_value=input_data.has_unique_value,
|
||
has_industry_classification=input_data.has_industry_classification,
|
||
)
|
||
|
||
# 3. E-E-A-T信号诊断 (20分)
|
||
eeat_signals = diagnose_eeat_signals(
|
||
has_author_bio=input_data.has_author_bio,
|
||
author_credentials_complete=input_data.author_credentials_complete,
|
||
has_certifications=input_data.has_certifications,
|
||
certification_count=input_data.certification_count,
|
||
has_data_sources=input_data.has_data_sources,
|
||
authoritative_source_ratio=input_data.authoritative_source_ratio,
|
||
has_expert_endorsements=input_data.has_expert_endorsements,
|
||
endorsement_count=input_data.endorsement_count,
|
||
)
|
||
|
||
# 4. Schema标记诊断 (15分)
|
||
schema_markup = diagnose_schema_markup(
|
||
has_organization=input_data.has_organization,
|
||
has_product=input_data.has_product,
|
||
has_article=input_data.has_article,
|
||
has_faq=input_data.has_faq,
|
||
has_howto=input_data.has_howto,
|
||
has_breadcrumb=input_data.has_breadcrumb,
|
||
)
|
||
|
||
# 5. 主题权威诊断 (15分)
|
||
topic_authority = diagnose_topic_authority(
|
||
content_depth_score=input_data.content_depth_score,
|
||
topic_coverage_ratio=input_data.topic_coverage_ratio,
|
||
entity_consistency_score=input_data.entity_consistency_score,
|
||
cluster_completeness=input_data.cluster_completeness,
|
||
total_content_count=input_data.total_content_count,
|
||
topic_cluster_count=input_data.topic_cluster_count,
|
||
)
|
||
|
||
# 6. 引用就绪度诊断 (15分)
|
||
citation_readiness = diagnose_citation_readiness(
|
||
answer_ownership_rate=input_data.answer_ownership_rate,
|
||
citation_accuracy=input_data.citation_accuracy,
|
||
ai_sov=input_data.ai_sov,
|
||
competitor_gap=input_data.competitor_gap,
|
||
total_ai_responses=input_data.total_ai_responses,
|
||
brand_mention_count=input_data.brand_mention_count,
|
||
accurate_citation_count=input_data.accurate_citation_count,
|
||
)
|
||
|
||
# 汇总维度
|
||
dimensions = [
|
||
content_extractability,
|
||
entity_clarity,
|
||
eeat_signals,
|
||
schema_markup,
|
||
topic_authority,
|
||
citation_readiness,
|
||
]
|
||
|
||
# 计算综合评分
|
||
overall_score = sum(dim.score for dim in dimensions)
|
||
overall_score = round(min(100.0, max(0.0, overall_score)), 2)
|
||
|
||
# 生成推荐
|
||
recommendations = generate_recommendations(dimensions)
|
||
|
||
return GEODiagnosisResult(
|
||
overall_score=overall_score,
|
||
dimensions=dimensions,
|
||
recommendations=recommendations,
|
||
)
|
||
|
||
def diagnose_from_dict(self, data: dict) -> GEODiagnosisResult:
|
||
"""
|
||
从字典执行GEO诊断(便捷方法)
|
||
|
||
Args:
|
||
data: 诊断参数字典
|
||
|
||
Returns:
|
||
GEODiagnosisResult: 诊断结果
|
||
"""
|
||
input_data = GEODiagnosisInput(**data)
|
||
return self.diagnose(input_data)
|