""" SEO诊断服务 - 5维度检测系统 诊断维度(总分100): - 技术SEO (Technical SEO): 25分 - 索引、爬取、Core Web Vitals等 - 页面SEO (On-Page SEO): 20分 - Title/Meta、H标签、关键词等 - 内容质量 (Content Quality): 20分 - 可读性、E-E-A-T、新鲜度等 - 外链分析 (Backlink Analysis): 15分 - 反向链接质量、毒性信号等 - 用户体验 (User Experience): 20分 - 移动适配、页面速度、转化路径等 """ from __future__ import annotations import logging from dataclasses import dataclass, field from enum import Enum from typing import Any logger = logging.getLogger(__name__) # ============================================================ # 枚举定义 # ============================================================ class DiagnosisStatus(str, Enum): """诊断状态""" PASS = "pass" WARNING = "warning" FAIL = "fail" class DimensionName(str, Enum): """诊断维度名称""" TECHNICAL_SEO = "技术SEO" ON_PAGE_SEO = "页面SEO" CONTENT_QUALITY = "内容质量" BACKLINK_ANALYSIS = "外链分析" USER_EXPERIENCE = "用户体验" # ============================================================ # 数据结构 # ============================================================ @dataclass class DiagnosisItem: """单个诊断项""" name: str # 诊断项名称 status: DiagnosisStatus # 诊断状态 description: str # 诊断说明 suggestion: str # 优化建议 score: float = 0.0 # 该项得分 (0-1) details: dict[str, Any] = field(default_factory=dict) # 额外详情 @dataclass class SEODimensionScore: """单个维度的诊断结果""" name: str # 维度名称 score: float # 维度得分 (0-max_score) max_score: float # 维度满分 items: list[DiagnosisItem] # 诊断项列表 status: DiagnosisStatus # 整体状态 @property def percentage(self) -> float: """得分率 (0-100)""" if self.max_score <= 0: return 0.0 return round((self.score / self.max_score) * 100, 2) def __post_init__(self): """计算整体状态""" if not self.items: self.status = DiagnosisStatus.WARNING return fail_count = sum(1 for item in self.items if item.status == DiagnosisStatus.FAIL) warning_count = sum(1 for item in self.items if item.status == DiagnosisStatus.WARNING) total = len(self.items) if fail_count > total * 0.3: self.status = DiagnosisStatus.FAIL elif warning_count > total * 0.3 or fail_count > 0: self.status = DiagnosisStatus.WARNING else: self.status = DiagnosisStatus.PASS @dataclass class SEORecommendation: """优化建议""" priority: str # high/medium/low dimension: str # 所属维度 item_name: str # 诊断项名称 description: str # 建议描述 impact: str # 预期影响 effort: str # 实施难度 easy/medium/hard @dataclass class SEODiagnosisResult: """SEO诊断结果""" overall_score: float # 综合评分 0-100 dimensions: list[SEODimensionScore] # 各维度得分 recommendations: list[SEORecommendation] # 优化建议 health_level: str = "danger" # 健康等级 def __post_init__(self): """计算健康等级""" self.overall_score = round(min(100.0, max(0.0, self.overall_score)), 2) if self.overall_score >= 80: self.health_level = "excellent" elif self.overall_score >= 60: self.health_level = "good" elif self.overall_score >= 40: self.health_level = "pass" else: self.health_level = "danger" def to_dict(self) -> dict: """转换为字典格式""" return { "overall_score": self.overall_score, "health_level": self.health_level, "health_level_label": self._get_health_label(), "dimensions": [ { "name": dim.name, "score": round(dim.score, 2), "max_score": dim.max_score, "percentage": dim.percentage, "status": dim.status.value, "items": [ { "name": item.name, "status": item.status.value, "description": item.description, "suggestion": item.suggestion, "score": round(item.score, 2), "details": item.details, } for item in dim.items ], } for dim in self.dimensions ], "recommendations": [ { "priority": rec.priority, "dimension": rec.dimension, "item_name": rec.item_name, "description": rec.description, "impact": rec.impact, "effort": rec.effort, } for rec in self.recommendations ], } def _get_health_label(self) -> str: """获取健康等级中文标签""" labels = { "excellent": "优秀", "good": "良好", "pass": "及格", "danger": "危险", } return labels.get(self.health_level, "未知") # ============================================================ # 诊断数据输入 # ============================================================ @dataclass class TechnicalSEOData: """技术SEO检测数据""" is_indexed: bool = True # 是否被索引 crawl_errors: int = 0 # 爬取错误数 redirect_chains: int = 0 # 重定向链数 lcp_seconds: float = 2.0 # Largest Contentful Paint (秒) fid_ms: float = 50.0 # First Input Delay (毫秒) cls_score: float = 0.05 # Cumulative Layout Shift has_robots_txt: bool = True # 是否有robots.txt robots_txt_blocks_important: bool = False # robots.txt是否阻止重要页面 has_sitemap: bool = True # 是否有sitemap sitemap_valid: bool = True # sitemap是否有效 url_structure_normalized: bool = True # URL结构是否规范 @dataclass class OnPageSEOData: """页面SEO检测数据""" has_title: bool = True # 是否有Title标签 title_length: int = 50 # Title长度 title_keyword_stuffing: bool = False # Title是否关键词堆砌 has_meta_description: bool = True # 是否有Meta Description meta_description_length: int = 140 # Meta Description长度 h1_count: int = 1 # H1标签数量 h_structure_valid: bool = True # H标签结构是否合理 keyword_density: float = 2.0 # 关键词密度 (%) internal_links: int = 10 # 内链数量 broken_internal_links: int = 0 # 死链数量 images_without_alt: int = 0 # 缺少Alt文本的图片数 total_images: int = 5 # 总图片数 @dataclass class ContentQualityData: """内容质量检测数据""" readability_score: float = 70.0 # 可读性评分 (0-100) word_count: int = 1500 # 字数 topic_coverage: float = 0.8 # 主题覆盖率 (0-1) has_author_info: bool = True # 是否有作者信息 has_publication_date: bool = True # 是否有发布日期 last_updated_days: int = 30 # 最后更新天数 has_citations: bool = True # 是否有引用/参考 citation_authority: float = 0.8 # 引用权威性 (0-1) duplicate_content_ratio: float = 0.05 # 重复内容比例 (0-1) has_expert_review: bool = False # 是否有专家审核 @dataclass class BacklinkData: """外链检测数据""" total_backlinks: int = 100 # 总反向链接数 referring_domains: int = 20 # 引用域名数 high_authority_links: int = 10 # 高权威链接数 toxic_links: int = 2 # 毒性链接数 nofollow_ratio: float = 0.3 # Nofollow比例 anchor_text_diversity: float = 0.8 # 锚文本多样性 (0-1) exact_match_anchor_ratio: float = 0.2 # 精确匹配锚文本比例 @dataclass class UserExperienceData: """用户体验检测数据""" is_mobile_friendly: bool = True # 是否移动友好 mobile_viewport_set: bool = True # 是否设置viewport page_load_time: float = 2.5 # 页面加载时间 (秒) has_https: bool = True # 是否使用HTTPS has_breadcrumbs: bool = True # 是否有面包屑导航 conversion_path_clear: bool = True # 转化路径是否清晰 has_cta: bool = True # 是否有明确的CTA form_usability: float = 0.9 # 表单可用性 (0-1) has_search: bool = True # 是否有站内搜索 # ============================================================ # 维度诊断函数 # ============================================================ def diagnose_technical_seo(data: TechnicalSEOData) -> SEODimensionScore: """ 技术SEO诊断 (满分25分) 评分项: - 索引状态 (4分) - 爬取错误 (4分) - Core Web Vitals (6分) - URL结构 (3分) - robots.txt (4分) - sitemap (4分) """ max_score = 25.0 items: list[DiagnosisItem] = [] total_score = 0.0 # 1. 索引状态检查 (4分) if data.is_indexed: items.append(DiagnosisItem( name="索引状态", status=DiagnosisStatus.PASS, description="网站已被搜索引擎正确索引", suggestion="保持当前索引状态", score=1.0, )) total_score += 4.0 else: items.append(DiagnosisItem( name="索引状态", status=DiagnosisStatus.FAIL, description="网站未被搜索引擎索引", suggestion="检查Search Console,提交sitemap,确保没有被noindex", score=0.0, )) # 2. 爬取错误检测 (4分) if data.crawl_errors == 0: items.append(DiagnosisItem( name="爬取错误", status=DiagnosisStatus.PASS, description="未发现爬取错误", suggestion="定期检查Search Console的爬取错误报告", score=1.0, )) total_score += 4.0 elif data.crawl_errors <= 5: items.append(DiagnosisItem( name="爬取错误", status=DiagnosisStatus.WARNING, description=f"发现{data.crawl_errors}个爬取错误", suggestion="修复404页面,检查5xx服务器错误,优化重定向链", score=0.5, details={"error_count": data.crawl_errors}, )) total_score += 2.0 else: items.append(DiagnosisItem( name="爬取错误", status=DiagnosisStatus.FAIL, description=f"发现{data.crawl_errors}个爬取错误,数量过多", suggestion="立即修复所有爬取错误,特别是5xx服务器错误", score=0.0, details={"error_count": data.crawl_errors}, )) # 3. Core Web Vitals评估 (6分) cwv_score = 0.0 cwv_items = [] # LCP评估 (2分) if data.lcp_seconds <= 2.5: cwv_score += 2.0 cwv_items.append(DiagnosisItem( name="LCP", status=DiagnosisStatus.PASS, description=f"LCP为{data.lcp_seconds}s,符合<2.5s标准", suggestion="保持当前性能水平", score=1.0, details={"value": data.lcp_seconds, "threshold": 2.5}, )) elif data.lcp_seconds <= 4.0: cwv_score += 1.0 cwv_items.append(DiagnosisItem( name="LCP", status=DiagnosisStatus.WARNING, description=f"LCP为{data.lcp_seconds}s,超过2.5s标准", suggestion="优化图片加载,使用CDN,减少服务器响应时间", score=0.5, details={"value": data.lcp_seconds, "threshold": 2.5}, )) else: cwv_items.append(DiagnosisItem( name="LCP", status=DiagnosisStatus.FAIL, description=f"LCP为{data.lcp_seconds}s,严重超过标准", suggestion="立即优化页面加载性能", score=0.0, details={"value": data.lcp_seconds, "threshold": 2.5}, )) # FID评估 (2分) if data.fid_ms <= 100: cwv_score += 2.0 cwv_items.append(DiagnosisItem( name="FID", status=DiagnosisStatus.PASS, description=f"FID为{data.fid_ms}ms,符合<100ms标准", suggestion="保持当前交互性能", score=1.0, details={"value": data.fid_ms, "threshold": 100}, )) elif data.fid_ms <= 300: cwv_score += 1.0 cwv_items.append(DiagnosisItem( name="FID", status=DiagnosisStatus.WARNING, description=f"FID为{data.fid_ms}ms,超过100ms标准", suggestion="减少JavaScript执行时间,优化主线程工作", score=0.5, details={"value": data.fid_ms, "threshold": 100}, )) else: cwv_items.append(DiagnosisItem( name="FID", status=DiagnosisStatus.FAIL, description=f"FID为{data.fid_ms}ms,严重超过标准", suggestion="立即优化JavaScript,减少主线程阻塞", score=0.0, details={"value": data.fid_ms, "threshold": 100}, )) # CLS评估 (2分) if data.cls_score <= 0.1: cwv_score += 2.0 cwv_items.append(DiagnosisItem( name="CLS", status=DiagnosisStatus.PASS, description=f"CLS为{data.cls_score},符合<0.1标准", suggestion="保持当前视觉稳定性", score=1.0, details={"value": data.cls_score, "threshold": 0.1}, )) elif data.cls_score <= 0.25: cwv_score += 1.0 cwv_items.append(DiagnosisItem( name="CLS", status=DiagnosisStatus.WARNING, description=f"CLS为{data.cls_score},超过0.1标准", suggestion="为图片和广告预留空间,避免动态插入内容", score=0.5, details={"value": data.cls_score, "threshold": 0.1}, )) else: cwv_items.append(DiagnosisItem( name="CLS", status=DiagnosisStatus.FAIL, description=f"CLS为{data.cls_score},严重超过标准", suggestion="立即修复布局偏移问题", score=0.0, details={"value": data.cls_score, "threshold": 0.1}, )) items.extend(cwv_items) total_score += cwv_score # 4. URL结构规范化 (3分) if data.url_structure_normalized: items.append(DiagnosisItem( name="URL结构", status=DiagnosisStatus.PASS, description="URL结构规范,无重复URL问题", suggestion="保持当前URL结构", score=1.0, )) total_score += 3.0 else: items.append(DiagnosisItem( name="URL结构", status=DiagnosisStatus.WARNING, description="URL结构存在问题,可能有重复URL", suggestion="使用canonical标签,统一URL格式", score=0.5, )) total_score += 1.5 # 5. robots.txt配置检查 (4分) if data.has_robots_txt and not data.robots_txt_blocks_important: items.append(DiagnosisItem( name="robots.txt", status=DiagnosisStatus.PASS, description="robots.txt配置正确,未阻止重要页面", suggestion="定期检查robots.txt配置", score=1.0, )) total_score += 4.0 elif data.has_robots_txt: items.append(DiagnosisItem( name="robots.txt", status=DiagnosisStatus.FAIL, description="robots.txt阻止了重要页面", suggestion="检查并修改robots.txt,确保重要页面可被爬取", score=0.0, )) else: items.append(DiagnosisItem( name="robots.txt", status=DiagnosisStatus.WARNING, description="未找到robots.txt文件", suggestion="创建robots.txt文件,明确指定允许和禁止爬取的路径", score=0.5, )) total_score += 2.0 # 6. sitemap完整性验证 (4分) if data.has_sitemap and data.sitemap_valid: items.append(DiagnosisItem( name="sitemap", status=DiagnosisStatus.PASS, description="sitemap存在且有效", suggestion="定期更新sitemap,确保包含所有重要页面", score=1.0, )) total_score += 4.0 elif data.has_sitemap: items.append(DiagnosisItem( name="sitemap", status=DiagnosisStatus.WARNING, description="sitemap存在但可能无效", suggestion="验证sitemap格式,确保所有URL可访问", score=0.5, )) total_score += 2.0 else: items.append(DiagnosisItem( name="sitemap", status=DiagnosisStatus.FAIL, description="未找到sitemap", suggestion="创建并提交sitemap到Search Console", score=0.0, )) return SEODimensionScore( name=DimensionName.TECHNICAL_SEO, score=total_score, max_score=max_score, items=items, status=DiagnosisStatus.PASS, # 会在__post_init__中重新计算 ) def diagnose_on_page_seo(data: OnPageSEOData) -> SEODimensionScore: """ 页面SEO诊断 (满分20分) 评分项: - Title/Meta标签 (5分) - H标签结构 (4分) - 关键词密度 (4分) - 内链结构 (4分) - 图片Alt文本 (3分) """ max_score = 20.0 items: list[DiagnosisItem] = [] total_score = 0.0 # 1. Title/Meta标签完整性 (5分) title_score = 0.0 meta_score = 0.0 # Title检查 (2.5分) if data.has_title: if 30 <= data.title_length <= 60: title_score += 2.5 if not data.title_keyword_stuffing: items.append(DiagnosisItem( name="Title标签", status=DiagnosisStatus.PASS, description=f"Title长度{data.title_length}字符,格式规范", suggestion="保持当前Title优化", score=1.0, details={"length": data.title_length}, )) else: title_score -= 1.0 items.append(DiagnosisItem( name="Title标签", status=DiagnosisStatus.WARNING, description="Title存在关键词堆砌问题", suggestion="简化Title,自然使用关键词", score=0.5, )) else: title_score += 1.0 items.append(DiagnosisItem( name="Title标签", status=DiagnosisStatus.WARNING, description=f"Title长度{data.title_length}字符,建议30-60字符", suggestion="调整Title长度到推荐范围", score=0.5, details={"length": data.title_length}, )) else: items.append(DiagnosisItem( name="Title标签", status=DiagnosisStatus.FAIL, description="页面缺少Title标签", suggestion="为每个页面添加唯一且描述性的Title", score=0.0, )) # Meta Description检查 (2.5分) if data.has_meta_description: if 120 <= data.meta_description_length <= 160: meta_score += 2.5 items.append(DiagnosisItem( name="Meta Description", status=DiagnosisStatus.PASS, description=f"Meta Description长度{data.meta_description_length}字符,格式规范", suggestion="保持当前Meta Description优化", score=1.0, details={"length": data.meta_description_length}, )) else: meta_score += 1.0 items.append(DiagnosisItem( name="Meta Description", status=DiagnosisStatus.WARNING, description=f"Meta Description长度{data.meta_description_length}字符,建议120-160字符", suggestion="调整Meta Description长度", score=0.5, details={"length": data.meta_description_length}, )) else: items.append(DiagnosisItem( name="Meta Description", status=DiagnosisStatus.FAIL, description="页面缺少Meta Description", suggestion="为每个页面添加描述性的Meta Description", score=0.0, )) total_score += title_score + meta_score # 2. H标签结构层级 (4分) if data.h1_count == 1 and data.h_structure_valid: items.append(DiagnosisItem( name="H标签结构", status=DiagnosisStatus.PASS, description="H标签结构清晰,有且仅有1个H1", suggestion="保持当前H标签结构", score=1.0, details={"h1_count": data.h1_count}, )) total_score += 4.0 elif data.h1_count == 1: items.append(DiagnosisItem( name="H标签结构", status=DiagnosisStatus.WARNING, description="有1个H1,但H标签层级可能不规范", suggestion="确保H标签层级正确,不跳过级别", score=0.5, details={"h1_count": data.h1_count}, )) total_score += 2.0 elif data.h1_count == 0: items.append(DiagnosisItem( name="H标签结构", status=DiagnosisStatus.FAIL, description="页面缺少H1标签", suggestion="添加唯一的H1标签,包含主要关键词", score=0.0, details={"h1_count": data.h1_count}, )) else: items.append(DiagnosisItem( name="H标签结构", status=DiagnosisStatus.WARNING, description=f"页面有{data.h1_count}个H1标签,建议只有1个", suggestion="确保每个页面只有1个H1标签", score=0.5, details={"h1_count": data.h1_count}, )) total_score += 2.0 # 3. 关键词密度合理性 (4分) if 1.0 <= data.keyword_density <= 3.0: items.append(DiagnosisItem( name="关键词密度", status=DiagnosisStatus.PASS, description=f"关键词密度{data.keyword_density}%,在合理范围内", suggestion="保持当前关键词使用频率", score=1.0, details={"density": data.keyword_density}, )) total_score += 4.0 elif 0.5 <= data.keyword_density < 1.0 or 3.0 < data.keyword_density <= 5.0: items.append(DiagnosisItem( name="关键词密度", status=DiagnosisStatus.WARNING, description=f"关键词密度{data.keyword_density}%,建议1-3%", suggestion="调整关键词使用频率到推荐范围", score=0.5, details={"density": data.keyword_density}, )) total_score += 2.0 else: status = DiagnosisStatus.FAIL if data.keyword_density > 5.0 else DiagnosisStatus.WARNING items.append(DiagnosisItem( name="关键词密度", status=status, description=f"关键词密度{data.keyword_density}%,不合理", suggestion="优化关键词使用,避免堆砌或过少", score=0.0, details={"density": data.keyword_density}, )) # 4. 内链结构 (4分) if data.internal_links > 0: if data.broken_internal_links == 0: items.append(DiagnosisItem( name="内链结构", status=DiagnosisStatus.PASS, description=f"内链结构良好,共{data.internal_links}个内链,无死链", suggestion="保持内链更新,定期检查死链", score=1.0, details={"total": data.internal_links, "broken": 0}, )) total_score += 4.0 elif data.broken_internal_links <= 3: items.append(DiagnosisItem( name="内链结构", status=DiagnosisStatus.WARNING, description=f"发现{data.broken_internal_links}个死链", suggestion="修复所有死链,更新或移除无效链接", score=0.5, details={"total": data.internal_links, "broken": data.broken_internal_links}, )) total_score += 2.0 else: items.append(DiagnosisItem( name="内链结构", status=DiagnosisStatus.FAIL, description=f"发现{data.broken_internal_links}个死链,数量过多", suggestion="立即修复所有死链", score=0.0, details={"total": data.internal_links, "broken": data.broken_internal_links}, )) else: items.append(DiagnosisItem( name="内链结构", status=DiagnosisStatus.FAIL, description="页面没有内链", suggestion="添加相关页面的内链,提升网站结构", score=0.0, )) # 5. 图片Alt文本 (3分) if data.total_images == 0: items.append(DiagnosisItem( name="图片Alt文本", status=DiagnosisStatus.PASS, description="页面无图片", suggestion="考虑添加相关图片并设置Alt文本", score=1.0, )) total_score += 3.0 elif data.images_without_alt == 0: items.append(DiagnosisItem( name="图片Alt文本", status=DiagnosisStatus.PASS, description=f"所有{data.total_images}张图片都有Alt文本", suggestion="保持为所有图片添加描述性Alt文本", score=1.0, details={"total": data.total_images, "without_alt": 0}, )) total_score += 3.0 elif data.images_without_alt <= data.total_images * 0.3: items.append(DiagnosisItem( name="图片Alt文本", status=DiagnosisStatus.WARNING, description=f"{data.images_without_alt}/{data.total_images}张图片缺少Alt文本", suggestion="为所有图片添加描述性Alt文本", score=0.5, details={"total": data.total_images, "without_alt": data.images_without_alt}, )) total_score += 1.5 else: items.append(DiagnosisItem( name="图片Alt文本", status=DiagnosisStatus.FAIL, description=f"{data.images_without_alt}/{data.total_images}张图片缺少Alt文本", suggestion="立即为所有图片添加Alt文本", score=0.0, details={"total": data.total_images, "without_alt": data.images_without_alt}, )) return SEODimensionScore( name=DimensionName.ON_PAGE_SEO, score=total_score, max_score=max_score, items=items, status=DiagnosisStatus.PASS, ) def diagnose_content_quality(data: ContentQualityData) -> SEODimensionScore: """ 内容质量诊断 (满分20分) 评分项: - 可读性评分 (4分) - 信息深度 (4分) - E-E-A-T信号 (5分) - 内容新鲜度 (4分) - 重复内容检测 (3分) """ max_score = 20.0 items: list[DiagnosisItem] = [] total_score = 0.0 # 1. 可读性评分 (4分) if data.readability_score >= 70: items.append(DiagnosisItem( name="可读性", status=DiagnosisStatus.PASS, description=f"可读性评分{data.readability_score},内容易于理解", suggestion="保持当前内容质量", score=1.0, details={"score": data.readability_score}, )) total_score += 4.0 elif data.readability_score >= 50: items.append(DiagnosisItem( name="可读性", status=DiagnosisStatus.WARNING, description=f"可读性评分{data.readability_score},内容较难理解", suggestion="简化语言,使用短句,增加段落分隔", score=0.5, details={"score": data.readability_score}, )) total_score += 2.0 else: items.append(DiagnosisItem( name="可读性", status=DiagnosisStatus.FAIL, description=f"可读性评分{data.readability_score},内容难以理解", suggestion="大幅简化内容,使用更通俗的语言", score=0.0, details={"score": data.readability_score}, )) # 2. 信息深度评估 (4分) if data.word_count >= 1500 and data.topic_coverage >= 0.8: items.append(DiagnosisItem( name="信息深度", status=DiagnosisStatus.PASS, description=f"内容深度良好,{data.word_count}字,主题覆盖率{data.topic_coverage*100:.0f}%", suggestion="保持当前内容深度", score=1.0, details={"word_count": data.word_count, "coverage": data.topic_coverage}, )) total_score += 4.0 elif data.word_count >= 800 and data.topic_coverage >= 0.6: items.append(DiagnosisItem( name="信息深度", status=DiagnosisStatus.WARNING, description=f"内容深度一般,{data.word_count}字,主题覆盖率{data.topic_coverage*100:.0f}%", suggestion="扩展内容深度,覆盖更多相关子话题", score=0.5, details={"word_count": data.word_count, "coverage": data.topic_coverage}, )) total_score += 2.0 else: items.append(DiagnosisItem( name="信息深度", status=DiagnosisStatus.FAIL, description=f"内容深度不足,{data.word_count}字,主题覆盖率{data.topic_coverage*100:.0f}%", suggestion="大幅扩展内容,全面覆盖主题", score=0.0, details={"word_count": data.word_count, "coverage": data.topic_coverage}, )) # 3. E-E-A-T信号检测 (5分) eeat_score = 0.0 # 作者信息 (1.5分) if data.has_author_info: eeat_score += 1.5 items.append(DiagnosisItem( name="作者资质", status=DiagnosisStatus.PASS, description="内容包含作者信息", suggestion="保持展示作者资质", score=1.0, )) else: items.append(DiagnosisItem( name="作者资质", status=DiagnosisStatus.WARNING, description="内容缺少作者信息", suggestion="添加作者信息和专业背景", score=0.0, )) # 专业认证/专家审核 (1.5分) if data.has_expert_review: eeat_score += 1.5 items.append(DiagnosisItem( name="专家审核", status=DiagnosisStatus.PASS, description="内容经过专家审核", suggestion="保持专家审核流程", score=1.0, )) else: items.append(DiagnosisItem( name="专家审核", status=DiagnosisStatus.WARNING, description="内容未经专家审核", suggestion="考虑邀请行业专家审核重要内容", score=0.0, )) # 数据来源权威性 (2分) if data.has_citations and data.citation_authority >= 0.7: eeat_score += 2.0 items.append(DiagnosisItem( name="数据来源", status=DiagnosisStatus.PASS, description=f"引用权威数据源,权威性评分{data.citation_authority:.2f}", suggestion="保持引用高质量数据源", score=1.0, details={"authority": data.citation_authority}, )) elif data.has_citations: eeat_score += 1.0 items.append(DiagnosisItem( name="数据来源", status=DiagnosisStatus.WARNING, description=f"有引用但数据源权威性一般,评分{data.citation_authority:.2f}", suggestion="引用更权威的数据源", score=0.5, details={"authority": data.citation_authority}, )) else: items.append(DiagnosisItem( name="数据来源", status=DiagnosisStatus.FAIL, description="内容未引用任何数据源", suggestion="引用权威数据支持内容观点", score=0.0, )) total_score += eeat_score # 4. 内容新鲜度 (4分) if data.has_publication_date and data.last_updated_days <= 30: items.append(DiagnosisItem( name="内容新鲜度", status=DiagnosisStatus.PASS, description=f"内容更新于{data.last_updated_days}天前", suggestion="保持定期更新内容", score=1.0, details={"last_updated_days": data.last_updated_days}, )) total_score += 4.0 elif data.has_publication_date and data.last_updated_days <= 90: items.append(DiagnosisItem( name="内容新鲜度", status=DiagnosisStatus.WARNING, description=f"内容更新于{data.last_updated_days}天前,建议30天内更新", suggestion="定期更新内容,保持信息时效性", score=0.5, details={"last_updated_days": data.last_updated_days}, )) total_score += 2.0 else: items.append(DiagnosisItem( name="内容新鲜度", status=DiagnosisStatus.FAIL, description="内容长时间未更新或缺少发布日期", suggestion="更新内容并显示发布/更新日期", score=0.0, details={"last_updated_days": data.last_updated_days}, )) # 5. 重复内容检测 (3分) if data.duplicate_content_ratio <= 0.1: items.append(DiagnosisItem( name="重复内容", status=DiagnosisStatus.PASS, description=f"重复内容比例{data.duplicate_content_ratio*100:.0f}%,在可接受范围内", suggestion="保持内容原创性", score=1.0, details={"duplicate_ratio": data.duplicate_content_ratio}, )) total_score += 3.0 elif data.duplicate_content_ratio <= 0.3: items.append(DiagnosisItem( name="重复内容", status=DiagnosisStatus.WARNING, description=f"重复内容比例{data.duplicate_content_ratio*100:.0f}%", suggestion="减少重复内容,使用canonical标签", score=0.5, details={"duplicate_ratio": data.duplicate_content_ratio}, )) total_score += 1.5 else: items.append(DiagnosisItem( name="重复内容", status=DiagnosisStatus.FAIL, description=f"重复内容比例{data.duplicate_content_ratio*100:.0f}%,过高", suggestion="重写重复内容,确保每页独特价值", score=0.0, details={"duplicate_ratio": data.duplicate_content_ratio}, )) return SEODimensionScore( name=DimensionName.CONTENT_QUALITY, score=total_score, max_score=max_score, items=items, status=DiagnosisStatus.PASS, ) def diagnose_backlinks(data: BacklinkData) -> SEODimensionScore: """ 外链分析 (满分15分) 评分项: - 反向链接质量 (6分) - 毒性信号检测 (5分) - 锚文本分布 (4分) """ max_score = 15.0 items: list[DiagnosisItem] = [] total_score = 0.0 # 1. 反向链接质量 (6分) quality_score = 0.0 # 引用域名数 (2分) if data.referring_domains >= 20: quality_score += 2.0 items.append(DiagnosisItem( name="引用域名", status=DiagnosisStatus.PASS, description=f"有{data.referring_domains}个引用域名", suggestion="继续增加高质量外链", score=1.0, details={"referring_domains": data.referring_domains}, )) elif data.referring_domains >= 10: quality_score += 1.0 items.append(DiagnosisItem( name="引用域名", status=DiagnosisStatus.WARNING, description=f"有{data.referring_domains}个引用域名,建议增加", suggestion="通过内容营销获取更多外链", score=0.5, details={"referring_domains": data.referring_domains}, )) else: items.append(DiagnosisItem( name="引用域名", status=DiagnosisStatus.FAIL, description=f"仅有{data.referring_domains}个引用域名", suggestion="积极开展外链建设", score=0.0, details={"referring_domains": data.referring_domains}, )) # 高权威链接 (2分) if data.high_authority_links >= 5: quality_score += 2.0 items.append(DiagnosisItem( name="高权威链接", status=DiagnosisStatus.PASS, description=f"有{data.high_authority_links}个高权威外链", suggestion="保持获取高质量外链", score=1.0, details={"high_authority_links": data.high_authority_links}, )) elif data.high_authority_links >= 2: quality_score += 1.0 items.append(DiagnosisItem( name="高权威链接", status=DiagnosisStatus.WARNING, description=f"有{data.high_authority_links}个高权威外链", suggestion="争取更多权威网站的外链", score=0.5, details={"high_authority_links": data.high_authority_links}, )) else: items.append(DiagnosisItem( name="高权威链接", status=DiagnosisStatus.FAIL, description=f"仅有{data.high_authority_links}个高权威外链", suggestion="重点获取权威网站外链", score=0.0, details={"high_authority_links": data.high_authority_links}, )) # Nofollow比例 (2分) if 0.2 <= data.nofollow_ratio <= 0.6: quality_score += 2.0 items.append(DiagnosisItem( name="Nofollow比例", status=DiagnosisStatus.PASS, description=f"Nofollow比例{data.nofollow_ratio*100:.0f}%,自然合理", suggestion="保持自然的外链结构", score=1.0, details={"nofollow_ratio": data.nofollow_ratio}, )) else: quality_score += 1.0 items.append(DiagnosisItem( name="Nofollow比例", status=DiagnosisStatus.WARNING, description=f"Nofollow比例{data.nofollow_ratio*100:.0f}%,可能不自然", suggestion="确保外链结构自然多样", score=0.5, details={"nofollow_ratio": data.nofollow_ratio}, )) total_score += quality_score # 2. 毒性信号检测 (5分) toxic_ratio = data.toxic_links / data.total_backlinks if data.total_backlinks > 0 else 0 if data.toxic_links == 0: items.append(DiagnosisItem( name="毒性链接", status=DiagnosisStatus.PASS, description="未发现毒性外链", suggestion="定期监控外链质量", score=1.0, details={"toxic_count": 0}, )) total_score += 5.0 elif toxic_ratio <= 0.05: items.append(DiagnosisItem( name="毒性链接", status=DiagnosisStatus.WARNING, description=f"发现{data.toxic_links}个毒性外链", suggestion="使用Disavow工具拒绝毒性外链", score=0.5, details={"toxic_count": data.toxic_links, "ratio": toxic_ratio}, )) total_score += 2.5 else: items.append(DiagnosisItem( name="毒性链接", status=DiagnosisStatus.FAIL, description=f"发现{data.toxic_links}个毒性外链,比例过高", suggestion="立即使用Disavow工具拒绝所有毒性外链", score=0.0, details={"toxic_count": data.toxic_links, "ratio": toxic_ratio}, )) # 3. 锚文本分布 (4分) if data.anchor_text_diversity >= 0.7 and data.exact_match_anchor_ratio <= 0.3: items.append(DiagnosisItem( name="锚文本分布", status=DiagnosisStatus.PASS, description=f"锚文本多样性{data.anchor_text_diversity:.2f},精确匹配比例{data.exact_match_anchor_ratio*100:.0f}%", suggestion="保持自然的锚文本结构", score=1.0, details={ "diversity": data.anchor_text_diversity, "exact_match_ratio": data.exact_match_anchor_ratio, }, )) total_score += 4.0 elif data.anchor_text_diversity >= 0.5: items.append(DiagnosisItem( name="锚文本分布", status=DiagnosisStatus.WARNING, description=f"锚文本多样性{data.anchor_text_diversity:.2f},建议增加多样性", suggestion="使用更多样化的锚文本", score=0.5, details={ "diversity": data.anchor_text_diversity, "exact_match_ratio": data.exact_match_anchor_ratio, }, )) total_score += 2.0 else: items.append(DiagnosisItem( name="锚文本分布", status=DiagnosisStatus.FAIL, description=f"锚文本多样性{data.anchor_text_diversity:.2f},过于单一", suggestion="大幅增加锚文本多样性", score=0.0, details={ "diversity": data.anchor_text_diversity, "exact_match_ratio": data.exact_match_anchor_ratio, }, )) return SEODimensionScore( name=DimensionName.BACKLINK_ANALYSIS, score=total_score, max_score=max_score, items=items, status=DiagnosisStatus.PASS, ) def diagnose_user_experience(data: UserExperienceData) -> SEODimensionScore: """ 用户体验诊断 (满分20分) 评分项: - 移动适配检查 (6分) - 页面速度评估 (5分) - 转化路径分析 (5分) - 基础体验 (4分) """ max_score = 20.0 items: list[DiagnosisItem] = [] total_score = 0.0 # 1. 移动适配检查 (6分) if data.is_mobile_friendly and data.mobile_viewport_set: items.append(DiagnosisItem( name="移动适配", status=DiagnosisStatus.PASS, description="页面移动适配良好", suggestion="保持移动端优化", score=1.0, )) total_score += 6.0 elif data.is_mobile_friendly: items.append(DiagnosisItem( name="移动适配", status=DiagnosisStatus.WARNING, description="页面基本适配移动端,但缺少viewport设置", suggestion="添加viewport meta标签", score=0.5, )) total_score += 3.0 else: items.append(DiagnosisItem( name="移动适配", status=DiagnosisStatus.FAIL, description="页面未适配移动端", suggestion="立即实现响应式设计或移动版本", score=0.0, )) # 2. 页面速度评估 (5分) if data.page_load_time <= 2.0: items.append(DiagnosisItem( name="页面速度", status=DiagnosisStatus.PASS, description=f"页面加载时间{data.page_load_time}s,性能优秀", suggestion="保持当前性能水平", score=1.0, details={"load_time": data.page_load_time}, )) total_score += 5.0 elif data.page_load_time <= 3.0: items.append(DiagnosisItem( name="页面速度", status=DiagnosisStatus.WARNING, description=f"页面加载时间{data.page_load_time}s,建议优化到2s内", suggestion="优化图片、启用缓存、使用CDN", score=0.5, details={"load_time": data.page_load_time}, )) total_score += 2.5 else: items.append(DiagnosisItem( name="页面速度", status=DiagnosisStatus.FAIL, description=f"页面加载时间{data.page_load_time}s,严重超时", suggestion="立即优化页面加载性能", score=0.0, details={"load_time": data.page_load_time}, )) # 3. 转化路径分析 (5分) conversion_score = 0.0 # CTA检查 (2分) if data.has_cta: conversion_score += 2.0 items.append(DiagnosisItem( name="CTA", status=DiagnosisStatus.PASS, description="页面有明确的行动号召", suggestion="保持清晰的CTA", score=1.0, )) else: items.append(DiagnosisItem( name="CTA", status=DiagnosisStatus.WARNING, description="页面缺少明确的行动号召", suggestion="添加清晰的CTA按钮", score=0.0, )) # 转化路径清晰度 (2分) if data.conversion_path_clear: conversion_score += 2.0 items.append(DiagnosisItem( name="转化路径", status=DiagnosisStatus.PASS, description="转化路径清晰", suggestion="保持当前转化流程", score=1.0, )) else: items.append(DiagnosisItem( name="转化路径", status=DiagnosisStatus.WARNING, description="转化路径不够清晰", suggestion="简化转化流程,减少步骤", score=0.0, )) # 表单可用性 (1分) if data.form_usability >= 0.8: conversion_score += 1.0 items.append(DiagnosisItem( name="表单可用性", status=DiagnosisStatus.PASS, description=f"表单可用性{data.form_usability*100:.0f}%", suggestion="保持表单体验", score=1.0, details={"usability": data.form_usability}, )) else: items.append(DiagnosisItem( name="表单可用性", status=DiagnosisStatus.WARNING, description=f"表单可用性{data.form_usability*100:.0f}%,需要优化", suggestion="简化表单,减少必填字段", score=0.0, details={"usability": data.form_usability}, )) total_score += conversion_score # 4. 基础体验 (4分) # HTTPS检查 (2分) if data.has_https: items.append(DiagnosisItem( name="HTTPS", status=DiagnosisStatus.PASS, description="网站使用HTTPS", suggestion="保持HTTPS配置", score=1.0, )) total_score += 2.0 else: items.append(DiagnosisItem( name="HTTPS", status=DiagnosisStatus.FAIL, description="网站未使用HTTPS", suggestion="立即启用HTTPS", score=0.0, )) # 面包屑导航 (1分) if data.has_breadcrumbs: items.append(DiagnosisItem( name="面包屑导航", status=DiagnosisStatus.PASS, description="页面有面包屑导航", suggestion="保持面包屑导航", score=1.0, )) total_score += 1.0 else: items.append(DiagnosisItem( name="面包屑导航", status=DiagnosisStatus.WARNING, description="页面缺少面包屑导航", suggestion="添加面包屑导航提升用户体验", score=0.0, )) # 站内搜索 (1分) if data.has_search: items.append(DiagnosisItem( name="站内搜索", status=DiagnosisStatus.PASS, description="网站有站内搜索功能", suggestion="保持搜索功能优化", score=1.0, )) total_score += 1.0 else: items.append(DiagnosisItem( name="站内搜索", status=DiagnosisStatus.WARNING, description="网站缺少站内搜索", suggestion="添加站内搜索功能", score=0.0, )) return SEODimensionScore( name=DimensionName.USER_EXPERIENCE, score=total_score, max_score=max_score, items=items, status=DiagnosisStatus.PASS, ) # ============================================================ # 建议生成 # ============================================================ def generate_recommendations(result: SEODiagnosisResult) -> list[SEORecommendation]: """ 根据诊断结果生成优化建议 优先级规则: - FAIL状态 -> high priority - WARNING状态 -> medium priority - 影响大的建议 -> high priority """ recommendations: list[SEORecommendation] = [] for dimension in result.dimensions: for item in dimension.items: if item.status == DiagnosisStatus.FAIL: recommendations.append(SEORecommendation( priority="high", dimension=dimension.name, item_name=item.name, description=item.suggestion, impact="修复后可显著提升SEO表现", effort="medium", )) elif item.status == DiagnosisStatus.WARNING: recommendations.append(SEORecommendation( priority="medium", dimension=dimension.name, item_name=item.name, description=item.suggestion, impact="优化后可改善SEO表现", effort="easy", )) # 按优先级排序 priority_order = {"high": 0, "medium": 1, "low": 2} recommendations.sort(key=lambda r: priority_order.get(r.priority, 3)) return recommendations # ============================================================ # 主诊断服务 # ============================================================ class SEODiagnosisService: """SEO诊断服务""" def diagnose( self, technical_data: TechnicalSEOData | None = None, on_page_data: OnPageSEOData | None = None, content_data: ContentQualityData | None = None, backlink_data: BacklinkData | None = None, ux_data: UserExperienceData | None = None, ) -> SEODiagnosisResult: """ 执行完整SEO诊断 Args: technical_data: 技术SEO检测数据 on_page_data: 页面SEO检测数据 content_data: 内容质量检测数据 backlink_data: 外链检测数据 ux_data: 用户体验检测数据 Returns: SEODiagnosisResult: 诊断结果 """ # 使用默认数据(模拟数据) technical_data = technical_data or TechnicalSEOData() on_page_data = on_page_data or OnPageSEOData() content_data = content_data or ContentQualityData() backlink_data = backlink_data or BacklinkData() ux_data = ux_data or UserExperienceData() # 执行5维度诊断 dimensions = [ diagnose_technical_seo(technical_data), diagnose_on_page_seo(on_page_data), diagnose_content_quality(content_data), diagnose_backlinks(backlink_data), diagnose_user_experience(ux_data), ] # 计算综合评分 overall_score = sum(dim.score for dim in dimensions) # 创建初步结果 result = SEODiagnosisResult( overall_score=overall_score, dimensions=dimensions, recommendations=[], ) # 生成优化建议 result.recommendations = generate_recommendations(result) return result def diagnose_technical_only(self, data: TechnicalSEOData | None = None) -> SEODimensionScore: """仅执行技术SEO诊断""" return diagnose_technical_seo(data or TechnicalSEOData()) def diagnose_on_page_only(self, data: OnPageSEOData | None = None) -> SEODimensionScore: """仅执行页面SEO诊断""" return diagnose_on_page_seo(data or OnPageSEOData()) def diagnose_content_only(self, data: ContentQualityData | None = None) -> SEODimensionScore: """仅执行内容质量诊断""" return diagnose_content_quality(data or ContentQualityData()) def diagnose_backlinks_only(self, data: BacklinkData | None = None) -> SEODimensionScore: """仅执行外链分析""" return diagnose_backlinks(data or BacklinkData()) def diagnose_ux_only(self, data: UserExperienceData | None = None) -> SEODimensionScore: """仅执行用户体验诊断""" return diagnose_user_experience(data or UserExperienceData())