geo/backend/app/services/seo_diagnosis.py

1490 lines
53 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
SEO诊断服务 - 5维度检测系统
诊断维度总分100
- 技术SEO (Technical SEO): 25分 - 索引、爬取、Core Web Vitals等
- 页面SEO (On-Page SEO): 20分 - Title/Meta、H标签、关键词等
- 内容质量 (Content Quality): 20分 - 可读性、E-E-A-T、新鲜度等
- 外链分析 (Backlink Analysis): 15分 - 反向链接质量、毒性信号等
- 用户体验 (User Experience): 20分 - 移动适配、页面速度、转化路径等
"""
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from enum import Enum
from typing import Any
logger = logging.getLogger(__name__)
# ============================================================
# 枚举定义
# ============================================================
class DiagnosisStatus(str, Enum):
"""诊断状态"""
PASS = "pass"
WARNING = "warning"
FAIL = "fail"
class DimensionName(str, Enum):
"""诊断维度名称"""
TECHNICAL_SEO = "技术SEO"
ON_PAGE_SEO = "页面SEO"
CONTENT_QUALITY = "内容质量"
BACKLINK_ANALYSIS = "外链分析"
USER_EXPERIENCE = "用户体验"
# ============================================================
# 数据结构
# ============================================================
@dataclass
class DiagnosisItem:
"""单个诊断项"""
name: str # 诊断项名称
status: DiagnosisStatus # 诊断状态
description: str # 诊断说明
suggestion: str # 优化建议
score: float = 0.0 # 该项得分 (0-1)
details: dict[str, Any] = field(default_factory=dict) # 额外详情
@dataclass
class SEODimensionScore:
"""单个维度的诊断结果"""
name: str # 维度名称
score: float # 维度得分 (0-max_score)
max_score: float # 维度满分
items: list[DiagnosisItem] # 诊断项列表
status: DiagnosisStatus # 整体状态
@property
def percentage(self) -> float:
"""得分率 (0-100)"""
if self.max_score <= 0:
return 0.0
return round((self.score / self.max_score) * 100, 2)
def __post_init__(self):
"""计算整体状态"""
if not self.items:
self.status = DiagnosisStatus.WARNING
return
fail_count = sum(1 for item in self.items if item.status == DiagnosisStatus.FAIL)
warning_count = sum(1 for item in self.items if item.status == DiagnosisStatus.WARNING)
total = len(self.items)
if fail_count > total * 0.3:
self.status = DiagnosisStatus.FAIL
elif warning_count > total * 0.3 or fail_count > 0:
self.status = DiagnosisStatus.WARNING
else:
self.status = DiagnosisStatus.PASS
@dataclass
class SEORecommendation:
"""优化建议"""
priority: str # high/medium/low
dimension: str # 所属维度
item_name: str # 诊断项名称
description: str # 建议描述
impact: str # 预期影响
effort: str # 实施难度 easy/medium/hard
@dataclass
class SEODiagnosisResult:
"""SEO诊断结果"""
overall_score: float # 综合评分 0-100
dimensions: list[SEODimensionScore] # 各维度得分
recommendations: list[SEORecommendation] # 优化建议
health_level: str = "danger" # 健康等级
def __post_init__(self):
"""计算健康等级"""
self.overall_score = round(min(100.0, max(0.0, self.overall_score)), 2)
if self.overall_score >= 80:
self.health_level = "excellent"
elif self.overall_score >= 60:
self.health_level = "good"
elif self.overall_score >= 40:
self.health_level = "pass"
else:
self.health_level = "danger"
def to_dict(self) -> dict:
"""转换为字典格式"""
return {
"overall_score": self.overall_score,
"health_level": self.health_level,
"health_level_label": self._get_health_label(),
"dimensions": [
{
"name": dim.name,
"score": round(dim.score, 2),
"max_score": dim.max_score,
"percentage": dim.percentage,
"status": dim.status.value,
"items": [
{
"name": item.name,
"status": item.status.value,
"description": item.description,
"suggestion": item.suggestion,
"score": round(item.score, 2),
"details": item.details,
}
for item in dim.items
],
}
for dim in self.dimensions
],
"recommendations": [
{
"priority": rec.priority,
"dimension": rec.dimension,
"item_name": rec.item_name,
"description": rec.description,
"impact": rec.impact,
"effort": rec.effort,
}
for rec in self.recommendations
],
}
def _get_health_label(self) -> str:
"""获取健康等级中文标签"""
labels = {
"excellent": "优秀",
"good": "良好",
"pass": "及格",
"danger": "危险",
}
return labels.get(self.health_level, "未知")
# ============================================================
# 诊断数据输入
# ============================================================
@dataclass
class TechnicalSEOData:
"""技术SEO检测数据"""
is_indexed: bool = True # 是否被索引
crawl_errors: int = 0 # 爬取错误数
redirect_chains: int = 0 # 重定向链数
lcp_seconds: float = 2.0 # Largest Contentful Paint (秒)
fid_ms: float = 50.0 # First Input Delay (毫秒)
cls_score: float = 0.05 # Cumulative Layout Shift
has_robots_txt: bool = True # 是否有robots.txt
robots_txt_blocks_important: bool = False # robots.txt是否阻止重要页面
has_sitemap: bool = True # 是否有sitemap
sitemap_valid: bool = True # sitemap是否有效
url_structure_normalized: bool = True # URL结构是否规范
@dataclass
class OnPageSEOData:
"""页面SEO检测数据"""
has_title: bool = True # 是否有Title标签
title_length: int = 50 # Title长度
title_keyword_stuffing: bool = False # Title是否关键词堆砌
has_meta_description: bool = True # 是否有Meta Description
meta_description_length: int = 140 # Meta Description长度
h1_count: int = 1 # H1标签数量
h_structure_valid: bool = True # H标签结构是否合理
keyword_density: float = 2.0 # 关键词密度 (%)
internal_links: int = 10 # 内链数量
broken_internal_links: int = 0 # 死链数量
images_without_alt: int = 0 # 缺少Alt文本的图片数
total_images: int = 5 # 总图片数
@dataclass
class ContentQualityData:
"""内容质量检测数据"""
readability_score: float = 70.0 # 可读性评分 (0-100)
word_count: int = 1500 # 字数
topic_coverage: float = 0.8 # 主题覆盖率 (0-1)
has_author_info: bool = True # 是否有作者信息
has_publication_date: bool = True # 是否有发布日期
last_updated_days: int = 30 # 最后更新天数
has_citations: bool = True # 是否有引用/参考
citation_authority: float = 0.8 # 引用权威性 (0-1)
duplicate_content_ratio: float = 0.05 # 重复内容比例 (0-1)
has_expert_review: bool = False # 是否有专家审核
@dataclass
class BacklinkData:
"""外链检测数据"""
total_backlinks: int = 100 # 总反向链接数
referring_domains: int = 20 # 引用域名数
high_authority_links: int = 10 # 高权威链接数
toxic_links: int = 2 # 毒性链接数
nofollow_ratio: float = 0.3 # Nofollow比例
anchor_text_diversity: float = 0.8 # 锚文本多样性 (0-1)
exact_match_anchor_ratio: float = 0.2 # 精确匹配锚文本比例
@dataclass
class UserExperienceData:
"""用户体验检测数据"""
is_mobile_friendly: bool = True # 是否移动友好
mobile_viewport_set: bool = True # 是否设置viewport
page_load_time: float = 2.5 # 页面加载时间 (秒)
has_https: bool = True # 是否使用HTTPS
has_breadcrumbs: bool = True # 是否有面包屑导航
conversion_path_clear: bool = True # 转化路径是否清晰
has_cta: bool = True # 是否有明确的CTA
form_usability: float = 0.9 # 表单可用性 (0-1)
has_search: bool = True # 是否有站内搜索
# ============================================================
# 维度诊断函数
# ============================================================
def diagnose_technical_seo(data: TechnicalSEOData) -> SEODimensionScore:
"""
技术SEO诊断 (满分25分)
评分项:
- 索引状态 (4分)
- 爬取错误 (4分)
- Core Web Vitals (6分)
- URL结构 (3分)
- robots.txt (4分)
- sitemap (4分)
"""
max_score = 25.0
items: list[DiagnosisItem] = []
total_score = 0.0
# 1. 索引状态检查 (4分)
if data.is_indexed:
items.append(DiagnosisItem(
name="索引状态",
status=DiagnosisStatus.PASS,
description="网站已被搜索引擎正确索引",
suggestion="保持当前索引状态",
score=1.0,
))
total_score += 4.0
else:
items.append(DiagnosisItem(
name="索引状态",
status=DiagnosisStatus.FAIL,
description="网站未被搜索引擎索引",
suggestion="检查Search Console提交sitemap确保没有被noindex",
score=0.0,
))
# 2. 爬取错误检测 (4分)
if data.crawl_errors == 0:
items.append(DiagnosisItem(
name="爬取错误",
status=DiagnosisStatus.PASS,
description="未发现爬取错误",
suggestion="定期检查Search Console的爬取错误报告",
score=1.0,
))
total_score += 4.0
elif data.crawl_errors <= 5:
items.append(DiagnosisItem(
name="爬取错误",
status=DiagnosisStatus.WARNING,
description=f"发现{data.crawl_errors}个爬取错误",
suggestion="修复404页面检查5xx服务器错误优化重定向链",
score=0.5,
details={"error_count": data.crawl_errors},
))
total_score += 2.0
else:
items.append(DiagnosisItem(
name="爬取错误",
status=DiagnosisStatus.FAIL,
description=f"发现{data.crawl_errors}个爬取错误,数量过多",
suggestion="立即修复所有爬取错误特别是5xx服务器错误",
score=0.0,
details={"error_count": data.crawl_errors},
))
# 3. Core Web Vitals评估 (6分)
cwv_score = 0.0
cwv_items = []
# LCP评估 (2分)
if data.lcp_seconds <= 2.5:
cwv_score += 2.0
cwv_items.append(DiagnosisItem(
name="LCP",
status=DiagnosisStatus.PASS,
description=f"LCP为{data.lcp_seconds}s符合<2.5s标准",
suggestion="保持当前性能水平",
score=1.0,
details={"value": data.lcp_seconds, "threshold": 2.5},
))
elif data.lcp_seconds <= 4.0:
cwv_score += 1.0
cwv_items.append(DiagnosisItem(
name="LCP",
status=DiagnosisStatus.WARNING,
description=f"LCP为{data.lcp_seconds}s超过2.5s标准",
suggestion="优化图片加载使用CDN减少服务器响应时间",
score=0.5,
details={"value": data.lcp_seconds, "threshold": 2.5},
))
else:
cwv_items.append(DiagnosisItem(
name="LCP",
status=DiagnosisStatus.FAIL,
description=f"LCP为{data.lcp_seconds}s严重超过标准",
suggestion="立即优化页面加载性能",
score=0.0,
details={"value": data.lcp_seconds, "threshold": 2.5},
))
# FID评估 (2分)
if data.fid_ms <= 100:
cwv_score += 2.0
cwv_items.append(DiagnosisItem(
name="FID",
status=DiagnosisStatus.PASS,
description=f"FID为{data.fid_ms}ms符合<100ms标准",
suggestion="保持当前交互性能",
score=1.0,
details={"value": data.fid_ms, "threshold": 100},
))
elif data.fid_ms <= 300:
cwv_score += 1.0
cwv_items.append(DiagnosisItem(
name="FID",
status=DiagnosisStatus.WARNING,
description=f"FID为{data.fid_ms}ms超过100ms标准",
suggestion="减少JavaScript执行时间优化主线程工作",
score=0.5,
details={"value": data.fid_ms, "threshold": 100},
))
else:
cwv_items.append(DiagnosisItem(
name="FID",
status=DiagnosisStatus.FAIL,
description=f"FID为{data.fid_ms}ms严重超过标准",
suggestion="立即优化JavaScript减少主线程阻塞",
score=0.0,
details={"value": data.fid_ms, "threshold": 100},
))
# CLS评估 (2分)
if data.cls_score <= 0.1:
cwv_score += 2.0
cwv_items.append(DiagnosisItem(
name="CLS",
status=DiagnosisStatus.PASS,
description=f"CLS为{data.cls_score},符合<0.1标准",
suggestion="保持当前视觉稳定性",
score=1.0,
details={"value": data.cls_score, "threshold": 0.1},
))
elif data.cls_score <= 0.25:
cwv_score += 1.0
cwv_items.append(DiagnosisItem(
name="CLS",
status=DiagnosisStatus.WARNING,
description=f"CLS为{data.cls_score}超过0.1标准",
suggestion="为图片和广告预留空间,避免动态插入内容",
score=0.5,
details={"value": data.cls_score, "threshold": 0.1},
))
else:
cwv_items.append(DiagnosisItem(
name="CLS",
status=DiagnosisStatus.FAIL,
description=f"CLS为{data.cls_score},严重超过标准",
suggestion="立即修复布局偏移问题",
score=0.0,
details={"value": data.cls_score, "threshold": 0.1},
))
items.extend(cwv_items)
total_score += cwv_score
# 4. URL结构规范化 (3分)
if data.url_structure_normalized:
items.append(DiagnosisItem(
name="URL结构",
status=DiagnosisStatus.PASS,
description="URL结构规范无重复URL问题",
suggestion="保持当前URL结构",
score=1.0,
))
total_score += 3.0
else:
items.append(DiagnosisItem(
name="URL结构",
status=DiagnosisStatus.WARNING,
description="URL结构存在问题可能有重复URL",
suggestion="使用canonical标签统一URL格式",
score=0.5,
))
total_score += 1.5
# 5. robots.txt配置检查 (4分)
if data.has_robots_txt and not data.robots_txt_blocks_important:
items.append(DiagnosisItem(
name="robots.txt",
status=DiagnosisStatus.PASS,
description="robots.txt配置正确未阻止重要页面",
suggestion="定期检查robots.txt配置",
score=1.0,
))
total_score += 4.0
elif data.has_robots_txt:
items.append(DiagnosisItem(
name="robots.txt",
status=DiagnosisStatus.FAIL,
description="robots.txt阻止了重要页面",
suggestion="检查并修改robots.txt确保重要页面可被爬取",
score=0.0,
))
else:
items.append(DiagnosisItem(
name="robots.txt",
status=DiagnosisStatus.WARNING,
description="未找到robots.txt文件",
suggestion="创建robots.txt文件明确指定允许和禁止爬取的路径",
score=0.5,
))
total_score += 2.0
# 6. sitemap完整性验证 (4分)
if data.has_sitemap and data.sitemap_valid:
items.append(DiagnosisItem(
name="sitemap",
status=DiagnosisStatus.PASS,
description="sitemap存在且有效",
suggestion="定期更新sitemap确保包含所有重要页面",
score=1.0,
))
total_score += 4.0
elif data.has_sitemap:
items.append(DiagnosisItem(
name="sitemap",
status=DiagnosisStatus.WARNING,
description="sitemap存在但可能无效",
suggestion="验证sitemap格式确保所有URL可访问",
score=0.5,
))
total_score += 2.0
else:
items.append(DiagnosisItem(
name="sitemap",
status=DiagnosisStatus.FAIL,
description="未找到sitemap",
suggestion="创建并提交sitemap到Search Console",
score=0.0,
))
return SEODimensionScore(
name=DimensionName.TECHNICAL_SEO,
score=total_score,
max_score=max_score,
items=items,
status=DiagnosisStatus.PASS, # 会在__post_init__中重新计算
)
def diagnose_on_page_seo(data: OnPageSEOData) -> SEODimensionScore:
"""
页面SEO诊断 (满分20分)
评分项:
- Title/Meta标签 (5分)
- H标签结构 (4分)
- 关键词密度 (4分)
- 内链结构 (4分)
- 图片Alt文本 (3分)
"""
max_score = 20.0
items: list[DiagnosisItem] = []
total_score = 0.0
# 1. Title/Meta标签完整性 (5分)
title_score = 0.0
meta_score = 0.0
# Title检查 (2.5分)
if data.has_title:
if 30 <= data.title_length <= 60:
title_score += 2.5
if not data.title_keyword_stuffing:
items.append(DiagnosisItem(
name="Title标签",
status=DiagnosisStatus.PASS,
description=f"Title长度{data.title_length}字符,格式规范",
suggestion="保持当前Title优化",
score=1.0,
details={"length": data.title_length},
))
else:
title_score -= 1.0
items.append(DiagnosisItem(
name="Title标签",
status=DiagnosisStatus.WARNING,
description="Title存在关键词堆砌问题",
suggestion="简化Title自然使用关键词",
score=0.5,
))
else:
title_score += 1.0
items.append(DiagnosisItem(
name="Title标签",
status=DiagnosisStatus.WARNING,
description=f"Title长度{data.title_length}字符建议30-60字符",
suggestion="调整Title长度到推荐范围",
score=0.5,
details={"length": data.title_length},
))
else:
items.append(DiagnosisItem(
name="Title标签",
status=DiagnosisStatus.FAIL,
description="页面缺少Title标签",
suggestion="为每个页面添加唯一且描述性的Title",
score=0.0,
))
# Meta Description检查 (2.5分)
if data.has_meta_description:
if 120 <= data.meta_description_length <= 160:
meta_score += 2.5
items.append(DiagnosisItem(
name="Meta Description",
status=DiagnosisStatus.PASS,
description=f"Meta Description长度{data.meta_description_length}字符,格式规范",
suggestion="保持当前Meta Description优化",
score=1.0,
details={"length": data.meta_description_length},
))
else:
meta_score += 1.0
items.append(DiagnosisItem(
name="Meta Description",
status=DiagnosisStatus.WARNING,
description=f"Meta Description长度{data.meta_description_length}字符建议120-160字符",
suggestion="调整Meta Description长度",
score=0.5,
details={"length": data.meta_description_length},
))
else:
items.append(DiagnosisItem(
name="Meta Description",
status=DiagnosisStatus.FAIL,
description="页面缺少Meta Description",
suggestion="为每个页面添加描述性的Meta Description",
score=0.0,
))
total_score += title_score + meta_score
# 2. H标签结构层级 (4分)
if data.h1_count == 1 and data.h_structure_valid:
items.append(DiagnosisItem(
name="H标签结构",
status=DiagnosisStatus.PASS,
description="H标签结构清晰有且仅有1个H1",
suggestion="保持当前H标签结构",
score=1.0,
details={"h1_count": data.h1_count},
))
total_score += 4.0
elif data.h1_count == 1:
items.append(DiagnosisItem(
name="H标签结构",
status=DiagnosisStatus.WARNING,
description="有1个H1但H标签层级可能不规范",
suggestion="确保H标签层级正确不跳过级别",
score=0.5,
details={"h1_count": data.h1_count},
))
total_score += 2.0
elif data.h1_count == 0:
items.append(DiagnosisItem(
name="H标签结构",
status=DiagnosisStatus.FAIL,
description="页面缺少H1标签",
suggestion="添加唯一的H1标签包含主要关键词",
score=0.0,
details={"h1_count": data.h1_count},
))
else:
items.append(DiagnosisItem(
name="H标签结构",
status=DiagnosisStatus.WARNING,
description=f"页面有{data.h1_count}个H1标签建议只有1个",
suggestion="确保每个页面只有1个H1标签",
score=0.5,
details={"h1_count": data.h1_count},
))
total_score += 2.0
# 3. 关键词密度合理性 (4分)
if 1.0 <= data.keyword_density <= 3.0:
items.append(DiagnosisItem(
name="关键词密度",
status=DiagnosisStatus.PASS,
description=f"关键词密度{data.keyword_density}%,在合理范围内",
suggestion="保持当前关键词使用频率",
score=1.0,
details={"density": data.keyword_density},
))
total_score += 4.0
elif 0.5 <= data.keyword_density < 1.0 or 3.0 < data.keyword_density <= 5.0:
items.append(DiagnosisItem(
name="关键词密度",
status=DiagnosisStatus.WARNING,
description=f"关键词密度{data.keyword_density}%建议1-3%",
suggestion="调整关键词使用频率到推荐范围",
score=0.5,
details={"density": data.keyword_density},
))
total_score += 2.0
else:
status = DiagnosisStatus.FAIL if data.keyword_density > 5.0 else DiagnosisStatus.WARNING
items.append(DiagnosisItem(
name="关键词密度",
status=status,
description=f"关键词密度{data.keyword_density}%,不合理",
suggestion="优化关键词使用,避免堆砌或过少",
score=0.0,
details={"density": data.keyword_density},
))
# 4. 内链结构 (4分)
if data.internal_links > 0:
if data.broken_internal_links == 0:
items.append(DiagnosisItem(
name="内链结构",
status=DiagnosisStatus.PASS,
description=f"内链结构良好,共{data.internal_links}个内链,无死链",
suggestion="保持内链更新,定期检查死链",
score=1.0,
details={"total": data.internal_links, "broken": 0},
))
total_score += 4.0
elif data.broken_internal_links <= 3:
items.append(DiagnosisItem(
name="内链结构",
status=DiagnosisStatus.WARNING,
description=f"发现{data.broken_internal_links}个死链",
suggestion="修复所有死链,更新或移除无效链接",
score=0.5,
details={"total": data.internal_links, "broken": data.broken_internal_links},
))
total_score += 2.0
else:
items.append(DiagnosisItem(
name="内链结构",
status=DiagnosisStatus.FAIL,
description=f"发现{data.broken_internal_links}个死链,数量过多",
suggestion="立即修复所有死链",
score=0.0,
details={"total": data.internal_links, "broken": data.broken_internal_links},
))
else:
items.append(DiagnosisItem(
name="内链结构",
status=DiagnosisStatus.FAIL,
description="页面没有内链",
suggestion="添加相关页面的内链,提升网站结构",
score=0.0,
))
# 5. 图片Alt文本 (3分)
if data.total_images == 0:
items.append(DiagnosisItem(
name="图片Alt文本",
status=DiagnosisStatus.PASS,
description="页面无图片",
suggestion="考虑添加相关图片并设置Alt文本",
score=1.0,
))
total_score += 3.0
elif data.images_without_alt == 0:
items.append(DiagnosisItem(
name="图片Alt文本",
status=DiagnosisStatus.PASS,
description=f"所有{data.total_images}张图片都有Alt文本",
suggestion="保持为所有图片添加描述性Alt文本",
score=1.0,
details={"total": data.total_images, "without_alt": 0},
))
total_score += 3.0
elif data.images_without_alt <= data.total_images * 0.3:
items.append(DiagnosisItem(
name="图片Alt文本",
status=DiagnosisStatus.WARNING,
description=f"{data.images_without_alt}/{data.total_images}张图片缺少Alt文本",
suggestion="为所有图片添加描述性Alt文本",
score=0.5,
details={"total": data.total_images, "without_alt": data.images_without_alt},
))
total_score += 1.5
else:
items.append(DiagnosisItem(
name="图片Alt文本",
status=DiagnosisStatus.FAIL,
description=f"{data.images_without_alt}/{data.total_images}张图片缺少Alt文本",
suggestion="立即为所有图片添加Alt文本",
score=0.0,
details={"total": data.total_images, "without_alt": data.images_without_alt},
))
return SEODimensionScore(
name=DimensionName.ON_PAGE_SEO,
score=total_score,
max_score=max_score,
items=items,
status=DiagnosisStatus.PASS,
)
def diagnose_content_quality(data: ContentQualityData) -> SEODimensionScore:
"""
内容质量诊断 (满分20分)
评分项:
- 可读性评分 (4分)
- 信息深度 (4分)
- E-E-A-T信号 (5分)
- 内容新鲜度 (4分)
- 重复内容检测 (3分)
"""
max_score = 20.0
items: list[DiagnosisItem] = []
total_score = 0.0
# 1. 可读性评分 (4分)
if data.readability_score >= 70:
items.append(DiagnosisItem(
name="可读性",
status=DiagnosisStatus.PASS,
description=f"可读性评分{data.readability_score},内容易于理解",
suggestion="保持当前内容质量",
score=1.0,
details={"score": data.readability_score},
))
total_score += 4.0
elif data.readability_score >= 50:
items.append(DiagnosisItem(
name="可读性",
status=DiagnosisStatus.WARNING,
description=f"可读性评分{data.readability_score},内容较难理解",
suggestion="简化语言,使用短句,增加段落分隔",
score=0.5,
details={"score": data.readability_score},
))
total_score += 2.0
else:
items.append(DiagnosisItem(
name="可读性",
status=DiagnosisStatus.FAIL,
description=f"可读性评分{data.readability_score},内容难以理解",
suggestion="大幅简化内容,使用更通俗的语言",
score=0.0,
details={"score": data.readability_score},
))
# 2. 信息深度评估 (4分)
if data.word_count >= 1500 and data.topic_coverage >= 0.8:
items.append(DiagnosisItem(
name="信息深度",
status=DiagnosisStatus.PASS,
description=f"内容深度良好,{data.word_count}字,主题覆盖率{data.topic_coverage*100:.0f}%",
suggestion="保持当前内容深度",
score=1.0,
details={"word_count": data.word_count, "coverage": data.topic_coverage},
))
total_score += 4.0
elif data.word_count >= 800 and data.topic_coverage >= 0.6:
items.append(DiagnosisItem(
name="信息深度",
status=DiagnosisStatus.WARNING,
description=f"内容深度一般,{data.word_count}字,主题覆盖率{data.topic_coverage*100:.0f}%",
suggestion="扩展内容深度,覆盖更多相关子话题",
score=0.5,
details={"word_count": data.word_count, "coverage": data.topic_coverage},
))
total_score += 2.0
else:
items.append(DiagnosisItem(
name="信息深度",
status=DiagnosisStatus.FAIL,
description=f"内容深度不足,{data.word_count}字,主题覆盖率{data.topic_coverage*100:.0f}%",
suggestion="大幅扩展内容,全面覆盖主题",
score=0.0,
details={"word_count": data.word_count, "coverage": data.topic_coverage},
))
# 3. E-E-A-T信号检测 (5分)
eeat_score = 0.0
# 作者信息 (1.5分)
if data.has_author_info:
eeat_score += 1.5
items.append(DiagnosisItem(
name="作者资质",
status=DiagnosisStatus.PASS,
description="内容包含作者信息",
suggestion="保持展示作者资质",
score=1.0,
))
else:
items.append(DiagnosisItem(
name="作者资质",
status=DiagnosisStatus.WARNING,
description="内容缺少作者信息",
suggestion="添加作者信息和专业背景",
score=0.0,
))
# 专业认证/专家审核 (1.5分)
if data.has_expert_review:
eeat_score += 1.5
items.append(DiagnosisItem(
name="专家审核",
status=DiagnosisStatus.PASS,
description="内容经过专家审核",
suggestion="保持专家审核流程",
score=1.0,
))
else:
items.append(DiagnosisItem(
name="专家审核",
status=DiagnosisStatus.WARNING,
description="内容未经专家审核",
suggestion="考虑邀请行业专家审核重要内容",
score=0.0,
))
# 数据来源权威性 (2分)
if data.has_citations and data.citation_authority >= 0.7:
eeat_score += 2.0
items.append(DiagnosisItem(
name="数据来源",
status=DiagnosisStatus.PASS,
description=f"引用权威数据源,权威性评分{data.citation_authority:.2f}",
suggestion="保持引用高质量数据源",
score=1.0,
details={"authority": data.citation_authority},
))
elif data.has_citations:
eeat_score += 1.0
items.append(DiagnosisItem(
name="数据来源",
status=DiagnosisStatus.WARNING,
description=f"有引用但数据源权威性一般,评分{data.citation_authority:.2f}",
suggestion="引用更权威的数据源",
score=0.5,
details={"authority": data.citation_authority},
))
else:
items.append(DiagnosisItem(
name="数据来源",
status=DiagnosisStatus.FAIL,
description="内容未引用任何数据源",
suggestion="引用权威数据支持内容观点",
score=0.0,
))
total_score += eeat_score
# 4. 内容新鲜度 (4分)
if data.has_publication_date and data.last_updated_days <= 30:
items.append(DiagnosisItem(
name="内容新鲜度",
status=DiagnosisStatus.PASS,
description=f"内容更新于{data.last_updated_days}天前",
suggestion="保持定期更新内容",
score=1.0,
details={"last_updated_days": data.last_updated_days},
))
total_score += 4.0
elif data.has_publication_date and data.last_updated_days <= 90:
items.append(DiagnosisItem(
name="内容新鲜度",
status=DiagnosisStatus.WARNING,
description=f"内容更新于{data.last_updated_days}天前建议30天内更新",
suggestion="定期更新内容,保持信息时效性",
score=0.5,
details={"last_updated_days": data.last_updated_days},
))
total_score += 2.0
else:
items.append(DiagnosisItem(
name="内容新鲜度",
status=DiagnosisStatus.FAIL,
description="内容长时间未更新或缺少发布日期",
suggestion="更新内容并显示发布/更新日期",
score=0.0,
details={"last_updated_days": data.last_updated_days},
))
# 5. 重复内容检测 (3分)
if data.duplicate_content_ratio <= 0.1:
items.append(DiagnosisItem(
name="重复内容",
status=DiagnosisStatus.PASS,
description=f"重复内容比例{data.duplicate_content_ratio*100:.0f}%,在可接受范围内",
suggestion="保持内容原创性",
score=1.0,
details={"duplicate_ratio": data.duplicate_content_ratio},
))
total_score += 3.0
elif data.duplicate_content_ratio <= 0.3:
items.append(DiagnosisItem(
name="重复内容",
status=DiagnosisStatus.WARNING,
description=f"重复内容比例{data.duplicate_content_ratio*100:.0f}%",
suggestion="减少重复内容使用canonical标签",
score=0.5,
details={"duplicate_ratio": data.duplicate_content_ratio},
))
total_score += 1.5
else:
items.append(DiagnosisItem(
name="重复内容",
status=DiagnosisStatus.FAIL,
description=f"重复内容比例{data.duplicate_content_ratio*100:.0f}%,过高",
suggestion="重写重复内容,确保每页独特价值",
score=0.0,
details={"duplicate_ratio": data.duplicate_content_ratio},
))
return SEODimensionScore(
name=DimensionName.CONTENT_QUALITY,
score=total_score,
max_score=max_score,
items=items,
status=DiagnosisStatus.PASS,
)
def diagnose_backlinks(data: BacklinkData) -> SEODimensionScore:
"""
外链分析 (满分15分)
评分项:
- 反向链接质量 (6分)
- 毒性信号检测 (5分)
- 锚文本分布 (4分)
"""
max_score = 15.0
items: list[DiagnosisItem] = []
total_score = 0.0
# 1. 反向链接质量 (6分)
quality_score = 0.0
# 引用域名数 (2分)
if data.referring_domains >= 20:
quality_score += 2.0
items.append(DiagnosisItem(
name="引用域名",
status=DiagnosisStatus.PASS,
description=f"{data.referring_domains}个引用域名",
suggestion="继续增加高质量外链",
score=1.0,
details={"referring_domains": data.referring_domains},
))
elif data.referring_domains >= 10:
quality_score += 1.0
items.append(DiagnosisItem(
name="引用域名",
status=DiagnosisStatus.WARNING,
description=f"{data.referring_domains}个引用域名,建议增加",
suggestion="通过内容营销获取更多外链",
score=0.5,
details={"referring_domains": data.referring_domains},
))
else:
items.append(DiagnosisItem(
name="引用域名",
status=DiagnosisStatus.FAIL,
description=f"仅有{data.referring_domains}个引用域名",
suggestion="积极开展外链建设",
score=0.0,
details={"referring_domains": data.referring_domains},
))
# 高权威链接 (2分)
if data.high_authority_links >= 5:
quality_score += 2.0
items.append(DiagnosisItem(
name="高权威链接",
status=DiagnosisStatus.PASS,
description=f"{data.high_authority_links}个高权威外链",
suggestion="保持获取高质量外链",
score=1.0,
details={"high_authority_links": data.high_authority_links},
))
elif data.high_authority_links >= 2:
quality_score += 1.0
items.append(DiagnosisItem(
name="高权威链接",
status=DiagnosisStatus.WARNING,
description=f"{data.high_authority_links}个高权威外链",
suggestion="争取更多权威网站的外链",
score=0.5,
details={"high_authority_links": data.high_authority_links},
))
else:
items.append(DiagnosisItem(
name="高权威链接",
status=DiagnosisStatus.FAIL,
description=f"仅有{data.high_authority_links}个高权威外链",
suggestion="重点获取权威网站外链",
score=0.0,
details={"high_authority_links": data.high_authority_links},
))
# Nofollow比例 (2分)
if 0.2 <= data.nofollow_ratio <= 0.6:
quality_score += 2.0
items.append(DiagnosisItem(
name="Nofollow比例",
status=DiagnosisStatus.PASS,
description=f"Nofollow比例{data.nofollow_ratio*100:.0f}%,自然合理",
suggestion="保持自然的外链结构",
score=1.0,
details={"nofollow_ratio": data.nofollow_ratio},
))
else:
quality_score += 1.0
items.append(DiagnosisItem(
name="Nofollow比例",
status=DiagnosisStatus.WARNING,
description=f"Nofollow比例{data.nofollow_ratio*100:.0f}%,可能不自然",
suggestion="确保外链结构自然多样",
score=0.5,
details={"nofollow_ratio": data.nofollow_ratio},
))
total_score += quality_score
# 2. 毒性信号检测 (5分)
toxic_ratio = data.toxic_links / data.total_backlinks if data.total_backlinks > 0 else 0
if data.toxic_links == 0:
items.append(DiagnosisItem(
name="毒性链接",
status=DiagnosisStatus.PASS,
description="未发现毒性外链",
suggestion="定期监控外链质量",
score=1.0,
details={"toxic_count": 0},
))
total_score += 5.0
elif toxic_ratio <= 0.05:
items.append(DiagnosisItem(
name="毒性链接",
status=DiagnosisStatus.WARNING,
description=f"发现{data.toxic_links}个毒性外链",
suggestion="使用Disavow工具拒绝毒性外链",
score=0.5,
details={"toxic_count": data.toxic_links, "ratio": toxic_ratio},
))
total_score += 2.5
else:
items.append(DiagnosisItem(
name="毒性链接",
status=DiagnosisStatus.FAIL,
description=f"发现{data.toxic_links}个毒性外链,比例过高",
suggestion="立即使用Disavow工具拒绝所有毒性外链",
score=0.0,
details={"toxic_count": data.toxic_links, "ratio": toxic_ratio},
))
# 3. 锚文本分布 (4分)
if data.anchor_text_diversity >= 0.7 and data.exact_match_anchor_ratio <= 0.3:
items.append(DiagnosisItem(
name="锚文本分布",
status=DiagnosisStatus.PASS,
description=f"锚文本多样性{data.anchor_text_diversity:.2f},精确匹配比例{data.exact_match_anchor_ratio*100:.0f}%",
suggestion="保持自然的锚文本结构",
score=1.0,
details={
"diversity": data.anchor_text_diversity,
"exact_match_ratio": data.exact_match_anchor_ratio,
},
))
total_score += 4.0
elif data.anchor_text_diversity >= 0.5:
items.append(DiagnosisItem(
name="锚文本分布",
status=DiagnosisStatus.WARNING,
description=f"锚文本多样性{data.anchor_text_diversity:.2f},建议增加多样性",
suggestion="使用更多样化的锚文本",
score=0.5,
details={
"diversity": data.anchor_text_diversity,
"exact_match_ratio": data.exact_match_anchor_ratio,
},
))
total_score += 2.0
else:
items.append(DiagnosisItem(
name="锚文本分布",
status=DiagnosisStatus.FAIL,
description=f"锚文本多样性{data.anchor_text_diversity:.2f},过于单一",
suggestion="大幅增加锚文本多样性",
score=0.0,
details={
"diversity": data.anchor_text_diversity,
"exact_match_ratio": data.exact_match_anchor_ratio,
},
))
return SEODimensionScore(
name=DimensionName.BACKLINK_ANALYSIS,
score=total_score,
max_score=max_score,
items=items,
status=DiagnosisStatus.PASS,
)
def diagnose_user_experience(data: UserExperienceData) -> SEODimensionScore:
"""
用户体验诊断 (满分20分)
评分项:
- 移动适配检查 (6分)
- 页面速度评估 (5分)
- 转化路径分析 (5分)
- 基础体验 (4分)
"""
max_score = 20.0
items: list[DiagnosisItem] = []
total_score = 0.0
# 1. 移动适配检查 (6分)
if data.is_mobile_friendly and data.mobile_viewport_set:
items.append(DiagnosisItem(
name="移动适配",
status=DiagnosisStatus.PASS,
description="页面移动适配良好",
suggestion="保持移动端优化",
score=1.0,
))
total_score += 6.0
elif data.is_mobile_friendly:
items.append(DiagnosisItem(
name="移动适配",
status=DiagnosisStatus.WARNING,
description="页面基本适配移动端但缺少viewport设置",
suggestion="添加viewport meta标签",
score=0.5,
))
total_score += 3.0
else:
items.append(DiagnosisItem(
name="移动适配",
status=DiagnosisStatus.FAIL,
description="页面未适配移动端",
suggestion="立即实现响应式设计或移动版本",
score=0.0,
))
# 2. 页面速度评估 (5分)
if data.page_load_time <= 2.0:
items.append(DiagnosisItem(
name="页面速度",
status=DiagnosisStatus.PASS,
description=f"页面加载时间{data.page_load_time}s性能优秀",
suggestion="保持当前性能水平",
score=1.0,
details={"load_time": data.page_load_time},
))
total_score += 5.0
elif data.page_load_time <= 3.0:
items.append(DiagnosisItem(
name="页面速度",
status=DiagnosisStatus.WARNING,
description=f"页面加载时间{data.page_load_time}s建议优化到2s内",
suggestion="优化图片、启用缓存、使用CDN",
score=0.5,
details={"load_time": data.page_load_time},
))
total_score += 2.5
else:
items.append(DiagnosisItem(
name="页面速度",
status=DiagnosisStatus.FAIL,
description=f"页面加载时间{data.page_load_time}s严重超时",
suggestion="立即优化页面加载性能",
score=0.0,
details={"load_time": data.page_load_time},
))
# 3. 转化路径分析 (5分)
conversion_score = 0.0
# CTA检查 (2分)
if data.has_cta:
conversion_score += 2.0
items.append(DiagnosisItem(
name="CTA",
status=DiagnosisStatus.PASS,
description="页面有明确的行动号召",
suggestion="保持清晰的CTA",
score=1.0,
))
else:
items.append(DiagnosisItem(
name="CTA",
status=DiagnosisStatus.WARNING,
description="页面缺少明确的行动号召",
suggestion="添加清晰的CTA按钮",
score=0.0,
))
# 转化路径清晰度 (2分)
if data.conversion_path_clear:
conversion_score += 2.0
items.append(DiagnosisItem(
name="转化路径",
status=DiagnosisStatus.PASS,
description="转化路径清晰",
suggestion="保持当前转化流程",
score=1.0,
))
else:
items.append(DiagnosisItem(
name="转化路径",
status=DiagnosisStatus.WARNING,
description="转化路径不够清晰",
suggestion="简化转化流程,减少步骤",
score=0.0,
))
# 表单可用性 (1分)
if data.form_usability >= 0.8:
conversion_score += 1.0
items.append(DiagnosisItem(
name="表单可用性",
status=DiagnosisStatus.PASS,
description=f"表单可用性{data.form_usability*100:.0f}%",
suggestion="保持表单体验",
score=1.0,
details={"usability": data.form_usability},
))
else:
items.append(DiagnosisItem(
name="表单可用性",
status=DiagnosisStatus.WARNING,
description=f"表单可用性{data.form_usability*100:.0f}%,需要优化",
suggestion="简化表单,减少必填字段",
score=0.0,
details={"usability": data.form_usability},
))
total_score += conversion_score
# 4. 基础体验 (4分)
# HTTPS检查 (2分)
if data.has_https:
items.append(DiagnosisItem(
name="HTTPS",
status=DiagnosisStatus.PASS,
description="网站使用HTTPS",
suggestion="保持HTTPS配置",
score=1.0,
))
total_score += 2.0
else:
items.append(DiagnosisItem(
name="HTTPS",
status=DiagnosisStatus.FAIL,
description="网站未使用HTTPS",
suggestion="立即启用HTTPS",
score=0.0,
))
# 面包屑导航 (1分)
if data.has_breadcrumbs:
items.append(DiagnosisItem(
name="面包屑导航",
status=DiagnosisStatus.PASS,
description="页面有面包屑导航",
suggestion="保持面包屑导航",
score=1.0,
))
total_score += 1.0
else:
items.append(DiagnosisItem(
name="面包屑导航",
status=DiagnosisStatus.WARNING,
description="页面缺少面包屑导航",
suggestion="添加面包屑导航提升用户体验",
score=0.0,
))
# 站内搜索 (1分)
if data.has_search:
items.append(DiagnosisItem(
name="站内搜索",
status=DiagnosisStatus.PASS,
description="网站有站内搜索功能",
suggestion="保持搜索功能优化",
score=1.0,
))
total_score += 1.0
else:
items.append(DiagnosisItem(
name="站内搜索",
status=DiagnosisStatus.WARNING,
description="网站缺少站内搜索",
suggestion="添加站内搜索功能",
score=0.0,
))
return SEODimensionScore(
name=DimensionName.USER_EXPERIENCE,
score=total_score,
max_score=max_score,
items=items,
status=DiagnosisStatus.PASS,
)
# ============================================================
# 建议生成
# ============================================================
def generate_recommendations(result: SEODiagnosisResult) -> list[SEORecommendation]:
"""
根据诊断结果生成优化建议
优先级规则:
- FAIL状态 -> high priority
- WARNING状态 -> medium priority
- 影响大的建议 -> high priority
"""
recommendations: list[SEORecommendation] = []
for dimension in result.dimensions:
for item in dimension.items:
if item.status == DiagnosisStatus.FAIL:
recommendations.append(SEORecommendation(
priority="high",
dimension=dimension.name,
item_name=item.name,
description=item.suggestion,
impact="修复后可显著提升SEO表现",
effort="medium",
))
elif item.status == DiagnosisStatus.WARNING:
recommendations.append(SEORecommendation(
priority="medium",
dimension=dimension.name,
item_name=item.name,
description=item.suggestion,
impact="优化后可改善SEO表现",
effort="easy",
))
# 按优先级排序
priority_order = {"high": 0, "medium": 1, "low": 2}
recommendations.sort(key=lambda r: priority_order.get(r.priority, 3))
return recommendations
# ============================================================
# 主诊断服务
# ============================================================
class SEODiagnosisService:
"""SEO诊断服务"""
def diagnose(
self,
technical_data: TechnicalSEOData | None = None,
on_page_data: OnPageSEOData | None = None,
content_data: ContentQualityData | None = None,
backlink_data: BacklinkData | None = None,
ux_data: UserExperienceData | None = None,
) -> SEODiagnosisResult:
"""
执行完整SEO诊断
Args:
technical_data: 技术SEO检测数据
on_page_data: 页面SEO检测数据
content_data: 内容质量检测数据
backlink_data: 外链检测数据
ux_data: 用户体验检测数据
Returns:
SEODiagnosisResult: 诊断结果
"""
# 使用默认数据(模拟数据)
technical_data = technical_data or TechnicalSEOData()
on_page_data = on_page_data or OnPageSEOData()
content_data = content_data or ContentQualityData()
backlink_data = backlink_data or BacklinkData()
ux_data = ux_data or UserExperienceData()
# 执行5维度诊断
dimensions = [
diagnose_technical_seo(technical_data),
diagnose_on_page_seo(on_page_data),
diagnose_content_quality(content_data),
diagnose_backlinks(backlink_data),
diagnose_user_experience(ux_data),
]
# 计算综合评分
overall_score = sum(dim.score for dim in dimensions)
# 创建初步结果
result = SEODiagnosisResult(
overall_score=overall_score,
dimensions=dimensions,
recommendations=[],
)
# 生成优化建议
result.recommendations = generate_recommendations(result)
return result
def diagnose_technical_only(self, data: TechnicalSEOData | None = None) -> SEODimensionScore:
"""仅执行技术SEO诊断"""
return diagnose_technical_seo(data or TechnicalSEOData())
def diagnose_on_page_only(self, data: OnPageSEOData | None = None) -> SEODimensionScore:
"""仅执行页面SEO诊断"""
return diagnose_on_page_seo(data or OnPageSEOData())
def diagnose_content_only(self, data: ContentQualityData | None = None) -> SEODimensionScore:
"""仅执行内容质量诊断"""
return diagnose_content_quality(data or ContentQualityData())
def diagnose_backlinks_only(self, data: BacklinkData | None = None) -> SEODimensionScore:
"""仅执行外链分析"""
return diagnose_backlinks(data or BacklinkData())
def diagnose_ux_only(self, data: UserExperienceData | None = None) -> SEODimensionScore:
"""仅执行用户体验诊断"""
return diagnose_user_experience(data or UserExperienceData())