geo/backend/app/services/geo_diagnosis.py

"""
GEO诊断服务 - 6大维度检测系统

诊断维度（总分100）：
- 内容可提取性 (Content Extractability): 20分 - AI能否轻松提取和理解内容
- 实体清晰度 (Entity Clarity): 15分 - AI能否理解品牌是什么
- E-E-A-T信号 (E-E-A-T Signals): 20分 - 经验、专业性、权威性、可信度
- Schema标记 (Schema Markup): 15分 - 结构化数据完整性
- 主题权威 (Topic Authority): 15分 - 品牌在特定领域的权威性
- 引用就绪度 (Citation Readiness): 15分 - 品牌在AI回答中被引用的可能性
"""
from __future__ import annotations

import logging
from dataclasses import dataclass, field

logger = logging.getLogger(__name__)


# ============================================================
# 诊断数据结构
# ============================================================

@dataclass
class DiagnosisItem:
    """单个诊断项"""
    name: str           # 诊断项名称
    status: str         # pass/warning/fail
    description: str    # 诊断说明
    suggestion: str     # 优化建议
    score: float = 0.0  # 该项得分
    max_score: float = 0.0  # 该项满分


@dataclass
class GEODimensionScore:
    """单个维度的诊断评分详情"""
    name: str           # 维度名称
    score: float        # 该维度得分 (0-max_score)
    max_score: float    # 该维度满分
    items: list[DiagnosisItem] = field(default_factory=list)
    status: str = "pass"  # pass/warning/fail
    percentage: float = 0.0  # 得分率 (0-100)
    detail: dict = field(default_factory=dict)


@dataclass
class GEORecommendation:
    """优化建议"""
    priority: str       # P0/P1/P2
    dimension: str      # 所属维度
    title: str          # 建议标题
    description: str    # 建议描述
    impact: str         # 预期影响: high/medium/low
    effort: str         # 实施难度: easy/medium/hard


@dataclass
class GEODiagnosisResult:
    """GEO诊断结果"""
    overall_score: float = 0.0  # 综合评分 0-100
    dimensions: list[GEODimensionScore] = field(default_factory=list)
    recommendations: list[GEORecommendation] = field(default_factory=list)
    health_level: str = "danger"  # excellent/good/pass/danger

    def __post_init__(self):
        """计算健康等级"""
        if self.overall_score >= 80:
            self.health_level = "excellent"
        elif self.overall_score >= 60:
            self.health_level = "good"
        elif self.overall_score >= 40:
            self.health_level = "pass"
        else:
            self.health_level = "danger"

    def to_dict(self) -> dict:
        """转换为字典格式"""
        return {
            "overall_score": round(self.overall_score, 2),
            "health_level": self.health_level,
            "health_level_label": get_health_level_label(self.health_level),
            "dimensions": [
                {
                    "name": dim.name,
                    "score": round(dim.score, 2),
                    "max_score": dim.max_score,
                    "percentage": round(dim.percentage, 2),
                    "status": dim.status,
                    "items": [
                        {
                            "name": item.name,
                            "status": item.status,
                            "description": item.description,
                            "suggestion": item.suggestion,
                            "score": round(item.score, 2),
                            "max_score": item.max_score,
                        }
                        for item in dim.items
                    ],
                    "detail": dim.detail,
                }
                for dim in self.dimensions
            ],
            "recommendations": [
                {
                    "priority": rec.priority,
                    "dimension": rec.dimension,
                    "title": rec.title,
                    "description": rec.description,
                    "impact": rec.impact,
                    "effort": rec.effort,
                }
                for rec in self.recommendations
            ],
        }


# ============================================================
# 维度1: 内容可提取性诊断 (满分20)
# ============================================================

def diagnose_content_extractability(
    has_direct_answer: bool = False,
    has_qa_headings: bool = False,
    has_structured_data: bool = False,
    has_internal_links: bool = False,
    has_freshness_info: bool = False,
    update_days_ago: int | None = None,
) -> GEODimensionScore:
    """
    诊断内容可提取性 (满分20)

    AI需要能够轻松提取和理解内容。

    Args:
        has_direct_answer: 是否有直接回答块（页面首段简洁明确的答案）
        has_qa_headings: 是否有问答式标题（H2/H3采用问题形式）
        has_structured_data: 是否使用列表和表格等结构化数据
        has_internal_links: 是否有内链到子意图页
        has_freshness_info: 是否有内容新鲜度信息（更新日期和作者）
        update_days_ago: 内容最后更新距今天数

    Returns:
        GEODimensionScore: 内容可提取性维度评分
    """
    max_score = 20.0
    items = []

    # 1. 直接回答块 (P0, 6分)
    direct_answer_score = 6.0 if has_direct_answer else 0.0
    items.append(DiagnosisItem(
        name="直接回答块",
        status="pass" if has_direct_answer else "fail",
        description="页面首段是否包含简洁明确的答案，便于AI直接提取",
        suggestion="在页面首段添加2-3句话的简洁答案，直接回答用户核心问题",
        score=direct_answer_score,
        max_score=6.0,
    ))

    # 2. 问答式标题 (P0, 5分)
    qa_headings_score = 5.0 if has_qa_headings else 0.0
    items.append(DiagnosisItem(
        name="问答式标题",
        status="pass" if has_qa_headings else "fail",
        description="H2/H3标题是否采用问题形式，帮助AI理解内容结构",
        suggestion="将关键H2/H3标题改为问题形式，如'什么是X'、'如何使用Y'",
        score=qa_headings_score,
        max_score=5.0,
    ))

    # 3. 列表和表格 (P0, 4分)
    structured_score = 4.0 if has_structured_data else 0.0
    items.append(DiagnosisItem(
        name="列表和表格",
        status="pass" if has_structured_data else "fail",
        description="是否使用列表、表格等结构化数据展示信息",
        suggestion="使用HTML列表(ul/ol)和表格(table)组织信息，便于AI解析",
        score=structured_score,
        max_score=4.0,
    ))

    # 4. 内链到子意图页 (P1, 3分)
    internal_links_score = 3.0 if has_internal_links else 0.0
    items.append(DiagnosisItem(
        name="内链到子意图页",
        status="pass" if has_internal_links else "warning",
        description="是否链接到相关深度内容页面",
        suggestion="添加内链到相关子话题页面，形成内容网络",
        score=internal_links_score,
        max_score=3.0,
    ))

    # 5. 内容新鲜度 (P1, 2分)
    freshness_score = 0.0
    freshness_status = "fail"
    if has_freshness_info:
        if update_days_ago is not None:
            if update_days_ago <= 30:
                freshness_score = 2.0
                freshness_status = "pass"
            elif update_days_ago <= 90:
                freshness_score = 1.5
                freshness_status = "warning"
            else:
                freshness_score = 0.5
                freshness_status = "warning"
        else:
            freshness_score = 1.0
            freshness_status = "warning"

    items.append(DiagnosisItem(
        name="内容新鲜度",
        status=freshness_status,
        description="是否有更新日期和作者信息，体现内容时效性",
        suggestion="在页面显眼位置展示最后更新日期和作者信息",
        score=freshness_score,
        max_score=2.0,
    ))

    total_score = sum(item.score for item in items)
    percentage = (total_score / max_score) * 100

    # 维度状态：如果有fail项则为warning，全pass则为pass
    has_fail = any(item.status == "fail" for item in items)
    status = "warning" if has_fail else "pass"

    return GEODimensionScore(
        name="内容可提取性",
        score=total_score,
        max_score=max_score,
        items=items,
        status=status,
        percentage=round(percentage, 2),
        detail={
            "has_direct_answer": has_direct_answer,
            "has_qa_headings": has_qa_headings,
            "has_structured_data": has_structured_data,
            "has_internal_links": has_internal_links,
            "has_freshness_info": has_freshness_info,
            "update_days_ago": update_days_ago,
        },
    )


# ============================================================
# 维度2: 实体清晰度诊断 (满分15)
# ============================================================

def diagnose_entity_clarity(
    has_brand_definition: bool = False,
    has_target_audience: bool = False,
    has_unique_value: bool = False,
    has_industry_classification: bool = False,
) -> GEODimensionScore:
    """
    诊断实体清晰度 (满分15)

    AI需要能够理解品牌是什么。

    Args:
        has_brand_definition: 是否清晰说明品牌做什么
        has_target_audience: 是否明确服务谁
        has_unique_value: 是否有差异化价值主张
        has_industry_classification: 是否有行业分类信息

    Returns:
        GEODimensionScore: 实体清晰度维度评分
    """
    max_score = 15.0
    items = []

    # 1. 品牌定义 (5分)
    brand_def_score = 5.0 if has_brand_definition else 0.0
    items.append(DiagnosisItem(
        name="品牌定义",
        status="pass" if has_brand_definition else "fail",
        description="是否清晰说明品牌做什么，AI理解准确率目标≥95%",
        suggestion="在首页和About页面添加清晰的品牌定义，包含核心业务和价值主张",
        score=brand_def_score,
        max_score=5.0,
    ))

    # 2. 目标受众 (4分)
    audience_score = 4.0 if has_target_audience else 0.0
    items.append(DiagnosisItem(
        name="目标受众",
        status="pass" if has_target_audience else "fail",
        description="是否明确服务谁，实体识别准确率目标≥90%",
        suggestion="明确描述目标用户群体，如'为中小企业提供XX服务'",
        score=audience_score,
        max_score=4.0,
    ))

    # 3. 差异化价值 (3分)
    value_score = 3.0 if has_unique_value else 0.0
    items.append(DiagnosisItem(
        name="差异化价值",
        status="pass" if has_unique_value else "warning",
        description="为什么选择这个品牌，独特性评分目标≥80",
        suggestion="突出品牌独特优势，如技术领先、服务优质、价格合理等",
        score=value_score,
        max_score=3.0,
    ))

    # 4. 行业分类 (3分)
    industry_score = 3.0 if has_industry_classification else 0.0
    items.append(DiagnosisItem(
        name="行业分类",
        status="pass" if has_industry_classification else "warning",
        description="品牌属于什么行业，分类准确率目标≥95%",
        suggestion="在页面中明确标注行业分类，如'SaaS'、'电子商务'等",
        score=industry_score,
        max_score=3.0,
    ))

    total_score = sum(item.score for item in items)
    percentage = (total_score / max_score) * 100

    has_fail = any(item.status == "fail" for item in items)
    status = "warning" if has_fail else "pass"

    return GEODimensionScore(
        name="实体清晰度",
        score=total_score,
        max_score=max_score,
        items=items,
        status=status,
        percentage=round(percentage, 2),
        detail={
            "has_brand_definition": has_brand_definition,
            "has_target_audience": has_target_audience,
            "has_unique_value": has_unique_value,
            "has_industry_classification": has_industry_classification,
        },
    )


# ============================================================
# 维度3: E-E-A-T信号诊断 (满分20)
# ============================================================

def diagnose_eeat_signals(
    has_author_bio: bool = False,
    author_credentials_complete: float = 0.0,
    has_certifications: bool = False,
    certification_count: int = 0,
    has_data_sources: bool = False,
    authoritative_source_ratio: float = 0.0,
    has_expert_endorsements: bool = False,
    endorsement_count: int = 0,
) -> GEODimensionScore:
    """
    诊断E-E-A-T信号 (满分20)

    AI需要验证品牌的可信度（经验、专业性、权威性、可信度）。

    Args:
        has_author_bio: 是否有作者资质信息
        author_credentials_complete: 作者简介完整度 (0-1)
        has_certifications: 是否有专业认证
        certification_count: 认证/奖项数量
        has_data_sources: 是否引用数据来源
        authoritative_source_ratio: 权威源引用比例 (0-1)
        has_expert_endorsements: 是否有专家背书
        endorsement_count: 专家背书数量

    Returns:
        GEODimensionScore: E-E-A-T信号维度评分
    """
    max_score = 20.0
    items = []

    # 1. 作者资质 (6分)
    author_score = 0.0
    if has_author_bio:
        author_score = author_credentials_complete * 6.0
    author_status = "pass" if author_score >= 5.4 else ("warning" if author_score >= 3.0 else "fail")
    items.append(DiagnosisItem(
        name="作者资质",
        status=author_status,
        description="内容作者是否有专业背景，作者简介完整度目标≥90%",
        suggestion="添加作者详细简介，包含教育背景、工作经验、专业领域",
        score=author_score,
        max_score=6.0,
    ))

    # 2. 专业认证 (5分)
    cert_score = 0.0
    if has_certifications:
        if certification_count >= 5:
            cert_score = 5.0
        elif certification_count >= 3:
            cert_score = 4.0
        elif certification_count >= 1:
            cert_score = 2.5
    cert_status = "pass" if cert_score >= 4.0 else ("warning" if cert_score >= 2.0 else "fail")
    items.append(DiagnosisItem(
        name="专业认证",
        status=cert_status,
        description="是否有行业认证/奖项，认证展示率目标≥80%",
        suggestion="展示行业认证、奖项、资质，如ISO认证、行业奖项等",
        score=cert_score,
        max_score=5.0,
    ))

    # 3. 数据来源 (5分)
    source_score = authoritative_source_ratio * 5.0 if has_data_sources else 0.0
    source_status = "pass" if source_score >= 4.0 else ("warning" if source_score >= 2.0 else "fail")
    items.append(DiagnosisItem(
        name="数据来源",
        status=source_status,
        description="是否引用可靠数据，引用权威源比例目标≥70%",
        suggestion="引用权威机构数据，如政府报告、学术研究、行业报告",
        score=source_score,
        max_score=5.0,
    ))

    # 4. 专家背书 (4分)
    endorsement_score = 0.0
    if has_expert_endorsements:
        if endorsement_count >= 5:
            endorsement_score = 4.0
        elif endorsement_count >= 3:
            endorsement_score = 3.0
        elif endorsement_count >= 1:
            endorsement_score = 1.5
    endorsement_status = "pass" if endorsement_score >= 3.0 else ("warning" if endorsement_score >= 1.5 else "fail")
    items.append(DiagnosisItem(
        name="专家背书",
        status=endorsement_status,
        description="是否有行业专家认可，背书数量目标≥3",
        suggestion="获取行业专家推荐、用户评价、案例研究",
        score=endorsement_score,
        max_score=4.0,
    ))

    total_score = sum(item.score for item in items)
    percentage = (total_score / max_score) * 100

    has_fail = any(item.status == "fail" for item in items)
    status = "warning" if has_fail else "pass"

    return GEODimensionScore(
        name="E-E-A-T信号",
        score=total_score,
        max_score=max_score,
        items=items,
        status=status,
        percentage=round(percentage, 2),
        detail={
            "has_author_bio": has_author_bio,
            "author_credentials_complete": round(author_credentials_complete, 2),
            "has_certifications": has_certifications,
            "certification_count": certification_count,
            "has_data_sources": has_data_sources,
            "authoritative_source_ratio": round(authoritative_source_ratio, 2),
            "has_expert_endorsements": has_expert_endorsements,
            "endorsement_count": endorsement_count,
        },
    )


# ============================================================
# 维度4: Schema标记诊断 (满分15)
# ============================================================

def diagnose_schema_markup(
    has_organization: bool = False,
    has_product: bool = False,
    has_article: bool = False,
    has_faq: bool = False,
    has_howto: bool = False,
    has_breadcrumb: bool = False,
) -> GEODimensionScore:
    """
    诊断Schema标记完整性 (满分15)

    结构化数据帮助AI理解内容。

    Args:
        has_organization: 是否有Organization标记（企业主页）
        has_product: 是否有Product标记（产品页）
        has_article: 是否有Article/BlogPosting标记（博客文章）
        has_faq: 是否有FAQPage标记（常见问题）
        has_howto: 是否有HowTo标记（操作指南）
        has_breadcrumb: 是否有BreadcrumbList标记（导航结构）

    Returns:
        GEODimensionScore: Schema标记维度评分
    """
    max_score = 15.0
    items = []

    # 1. Organization (P0必须, 4分)
    org_score = 4.0 if has_organization else 0.0
    items.append(DiagnosisItem(
        name="Organization",
        status="pass" if has_organization else "fail",
        description="企业主页的Organization标记，包含名称、logo、联系方式",
        suggestion="添加Organization Schema，包含@type: Organization、name、url、logo",
        score=org_score,
        max_score=4.0,
    ))

    # 2. Product (P0必须, 3分)
    product_score = 3.0 if has_product else 0.0
    items.append(DiagnosisItem(
        name="Product",
        status="pass" if has_product else "fail",
        description="产品页的Product标记，包含名称、描述、价格、评价",
        suggestion="为产品页添加Product Schema，包含name、description、offers、aggregateRating",
        score=product_score,
        max_score=3.0,
    ))

    # 3. Article/BlogPosting (P0必须, 3分)
    article_score = 3.0 if has_article else 0.0
    items.append(DiagnosisItem(
        name="Article/BlogPosting",
        status="pass" if has_article else "fail",
        description="博客文章的Article标记，包含作者、发布日期、摘要",
        suggestion="为文章添加Article或BlogPosting Schema，包含author、datePublished、headline",
        score=article_score,
        max_score=3.0,
    ))

    # 4. FAQPage (P1推荐, 2分)
    faq_score = 2.0 if has_faq else 0.0
    items.append(DiagnosisItem(
        name="FAQPage",
        status="pass" if has_faq else "warning",
        description="常见问题的FAQPage标记",
        suggestion="为FAQ页面添加FAQPage Schema，包含问题和答案对",
        score=faq_score,
        max_score=2.0,
    ))

    # 5. HowTo (P1推荐, 2分)
    howto_score = 2.0 if has_howto else 0.0
    items.append(DiagnosisItem(
        name="HowTo",
        status="pass" if has_howto else "warning",
        description="操作指南的HowTo标记",
        suggestion="为教程类内容添加HowTo Schema，包含步骤列表",
        score=howto_score,
        max_score=2.0,
    ))

    # 6. BreadcrumbList (P1推荐, 1分)
    breadcrumb_score = 1.0 if has_breadcrumb else 0.0
    items.append(DiagnosisItem(
        name="BreadcrumbList",
        status="pass" if has_breadcrumb else "warning",
        description="导航结构的BreadcrumbList标记",
        suggestion="添加BreadcrumbList Schema，帮助AI理解页面层级关系",
        score=breadcrumb_score,
        max_score=1.0,
    ))

    total_score = sum(item.score for item in items)
    percentage = (total_score / max_score) * 100

    has_fail = any(item.status == "fail" for item in items)
    status = "warning" if has_fail else "pass"

    return GEODimensionScore(
        name="Schema标记",
        score=total_score,
        max_score=max_score,
        items=items,
        status=status,
        percentage=round(percentage, 2),
        detail={
            "has_organization": has_organization,
            "has_product": has_product,
            "has_article": has_article,
            "has_faq": has_faq,
            "has_howto": has_howto,
            "has_breadcrumb": has_breadcrumb,
            "schema_count": sum([
                has_organization, has_product, has_article,
                has_faq, has_howto, has_breadcrumb,
            ]),
        },
    )


# ============================================================
# 维度5: 主题权威诊断 (满分15)
# ============================================================

def diagnose_topic_authority(
    content_depth_score: float = 0.0,
    topic_coverage_ratio: float = 0.0,
    entity_consistency_score: float = 0.0,
    cluster_completeness: float = 0.0,
    total_content_count: int = 0,
    topic_cluster_count: int = 0,
) -> GEODimensionScore:
    """
    诊断主题权威 (满分15)

    AI需要验证品牌在特定领域的权威性。

    Args:
        content_depth_score: 内容深度评分 (0-1)，目标≥4.6/5即0.92
        topic_coverage_ratio: 话题覆盖度 (0-1)，目标≥80%
        entity_consistency_score: 实体信号一致性 (0-1)，目标≥85%
        cluster_completeness: 内链网络集群完整度 (0-1)，目标≥70%
        total_content_count: 总内容数量
        topic_cluster_count: 主题集群数量

    Returns:
        GEODimensionScore: 主题权威维度评分
    """
    max_score = 15.0
    items = []

    # 1. 内容深度 (5分)
    depth_score = content_depth_score * 5.0
    depth_status = "pass" if content_depth_score >= 0.8 else ("warning" if content_depth_score >= 0.5 else "fail")
    items.append(DiagnosisItem(
        name="内容深度",
        status=depth_status,
        description="是否全面覆盖主题，内容质量QScore目标≥4.6/5",
        suggestion="增加内容深度，包含详细解释、案例分析、数据支撑",
        score=depth_score,
        max_score=5.0,
    ))

    # 2. 话题覆盖度 (4分)
    coverage_score = topic_coverage_ratio * 4.0
    coverage_status = "pass" if topic_coverage_ratio >= 0.8 else ("warning" if topic_coverage_ratio >= 0.5 else "fail")
    items.append(DiagnosisItem(
        name="话题覆盖度",
        status=coverage_status,
        description="是否覆盖相关子话题，话题覆盖率目标≥80%",
        suggestion="创建覆盖核心话题及其子话题的内容矩阵",
        score=coverage_score,
        max_score=4.0,
    ))

    # 3. 实体信号一致性 (3分)
    consistency_score = entity_consistency_score * 3.0
    consistency_status = "pass" if entity_consistency_score >= 0.85 else ("warning" if entity_consistency_score >= 0.6 else "fail")
    items.append(DiagnosisItem(
        name="实体信号一致性",
        status=consistency_status,
        description="各页面实体信号是否一致，一致性评分目标≥85%",
        suggestion="确保各页面使用一致的品牌名称、描述、行业分类",
        score=consistency_score,
        max_score=3.0,
    ))

    # 4. 内链网络 (3分)
    network_score = cluster_completeness * 3.0
    network_status = "pass" if cluster_completeness >= 0.7 else ("warning" if cluster_completeness >= 0.4 else "fail")
    items.append(DiagnosisItem(
        name="内链网络",
        status=network_status,
        description="是否形成主题内容集群，集群完整度目标≥70%",
        suggestion="建立主题集群，通过内链将相关内容连接成网络",
        score=network_score,
        max_score=3.0,
    ))

    total_score = sum(item.score for item in items)
    percentage = (total_score / max_score) * 100

    has_fail = any(item.status == "fail" for item in items)
    status = "warning" if has_fail else "pass"

    return GEODimensionScore(
        name="主题权威",
        score=total_score,
        max_score=max_score,
        items=items,
        status=status,
        percentage=round(percentage, 2),
        detail={
            "content_depth_score": round(content_depth_score, 2),
            "topic_coverage_ratio": round(topic_coverage_ratio, 2),
            "entity_consistency_score": round(entity_consistency_score, 2),
            "cluster_completeness": round(cluster_completeness, 2),
            "total_content_count": total_content_count,
            "topic_cluster_count": topic_cluster_count,
        },
    )


# ============================================================
# 维度6: 引用就绪度诊断 (满分15)
# ============================================================

def diagnose_citation_readiness(
    answer_ownership_rate: float = 0.0,
    citation_accuracy: float = 0.0,
    ai_sov: float = 0.0,
    competitor_gap: float = 0.0,
    total_ai_responses: int = 0,
    brand_mention_count: int = 0,
    accurate_citation_count: int = 0,
) -> GEODimensionScore:
    """
    诊断引用就绪度 (满分15)

    评估品牌在AI回答中被引用的可能性。

    Args:
        answer_ownership_rate: AOR - Answer Ownership Rate (0-1)，目标≥50%
        citation_accuracy: 引用准确率 (0-1)，目标≥90%
        ai_sov: AI Share of Voice (0-1)，目标≥30%
        competitor_gap: 与竞品差距 (pp)，目标≤10pp
        total_ai_responses: AI回答总数
        brand_mention_count: 品牌被提及次数
        accurate_citation_count: 准确引用次数

    Returns:
        GEODimensionScore: 引用就绪度维度评分
    """
    max_score = 15.0
    items = []

    # 1. 引用频率 AOR (5分)
    aor_score = 0.0
    if answer_ownership_rate >= 0.5:
        aor_score = 5.0
    elif answer_ownership_rate >= 0.3:
        aor_score = 3.5
    elif answer_ownership_rate >= 0.1:
        aor_score = 2.0
    else:
        aor_score = answer_ownership_rate * 10.0
    aor_status = "pass" if answer_ownership_rate >= 0.5 else ("warning" if answer_ownership_rate >= 0.2 else "fail")
    items.append(DiagnosisItem(
        name="引用频率 (AOR)",
        status=aor_status,
        description="品牌在AI回答中被提及的频率，AOR目标≥50%",
        suggestion="优化内容结构，提高被AI引用的概率",
        score=aor_score,
        max_score=5.0,
    ))

    # 2. 引用质量 (4分)
    accuracy_score = citation_accuracy * 4.0
    accuracy_status = "pass" if citation_accuracy >= 0.9 else ("warning" if citation_accuracy >= 0.7 else "fail")
    # 确保满分时得到满分
    if citation_accuracy >= 1.0:
        accuracy_score = 4.0
    items.append(DiagnosisItem(
        name="引用质量",
        status=accuracy_status,
        description="引用内容是否准确完整，引用准确率目标≥90%",
        suggestion="确保内容准确无误，避免过时或错误信息",
        score=accuracy_score,
        max_score=4.0,
    ))

    # 3. AI声量占比 (3分)
    sov_score = 0.0
    if ai_sov >= 0.3:
        sov_score = 3.0
    elif ai_sov >= 0.15:
        sov_score = 2.0
    elif ai_sov >= 0.05:
        sov_score = 1.0
    else:
        sov_score = ai_sov * 10.0
    sov_status = "pass" if ai_sov >= 0.3 else ("warning" if ai_sov >= 0.1 else "fail")
    items.append(DiagnosisItem(
        name="AI声量占比",
        status=sov_status,
        description="品牌在AI回答中的占比，AI SOV目标≥30%",
        suggestion="增加品牌曝光，提高在AI回答中的出现频率",
        score=sov_score,
        max_score=3.0,
    ))

    # 4. 竞品对比 (3分)
    gap_score = 0.0
    if competitor_gap <= 0.1:
        gap_score = 3.0
    elif competitor_gap <= 0.2:
        gap_score = 2.0
    elif competitor_gap <= 0.3:
        gap_score = 1.0
    else:
        gap_score = max(0.0, 3.0 - competitor_gap * 5)
    # 确保差距为0时得满分，差距过大时得0分
    if competitor_gap <= 0.0:
        gap_score = 3.0
    if competitor_gap >= 0.6:
        gap_score = 0.0
    gap_status = "pass" if competitor_gap <= 0.1 else ("warning" if competitor_gap <= 0.25 else "fail")
    items.append(DiagnosisItem(
        name="竞品对比",
        status=gap_status,
        description="与竞品在AI回答中的表现差距，差距目标≤10pp",
        suggestion="分析竞品优势，针对性优化内容策略",
        score=gap_score,
        max_score=3.0,
    ))

    total_score = sum(item.score for item in items)
    percentage = (total_score / max_score) * 100

    has_fail = any(item.status == "fail" for item in items)
    status = "warning" if has_fail else "pass"

    return GEODimensionScore(
        name="引用就绪度",
        score=total_score,
        max_score=max_score,
        items=items,
        status=status,
        percentage=round(percentage, 2),
        detail={
            "answer_ownership_rate": round(answer_ownership_rate, 2),
            "citation_accuracy": round(citation_accuracy, 2),
            "ai_sov": round(ai_sov, 2),
            "competitor_gap": round(competitor_gap, 2),
            "total_ai_responses": total_ai_responses,
            "brand_mention_count": brand_mention_count,
            "accurate_citation_count": accurate_citation_count,
        },
    )


# ============================================================
# 推荐生成
# ============================================================

def generate_recommendations(dimensions: list[GEODimensionScore]) -> list[GEORecommendation]:
    """
    根据诊断结果生成优化建议

    Args:
        dimensions: 各维度诊断结果

    Returns:
        list[GEORecommendation]: 优化建议列表
    """
    recommendations = []

    for dim in dimensions:
        for item in dim.items:
            if item.status == "fail":
                priority = "P0"
                impact = "high"
            elif item.status == "warning":
                priority = "P1"
                impact = "medium"
            else:
                continue

            # 根据诊断项确定实施难度
            effort = "medium"
            if "Schema" in item.name or "标记" in item.name:
                effort = "easy"
            elif "内容深度" in item.name or "话题覆盖" in item.name:
                effort = "hard"

            recommendations.append(GEORecommendation(
                priority=priority,
                dimension=dim.name,
                title=f"优化: {item.name}",
                description=item.suggestion,
                impact=impact,
                effort=effort,
            ))

    # 按优先级排序
    priority_order = {"P0": 0, "P1": 1, "P2": 2}
    recommendations.sort(key=lambda r: priority_order.get(r.priority, 3))

    return recommendations


# ============================================================
# 工具函数
# ============================================================

def get_health_level(score: float) -> str:
    """
    根据评分获取健康等级

    80+  -> excellent (优秀/绿)
    60-79 -> good (良好/黄)
    40-59 -> pass (及格/橙)
    <40  -> danger (危险/红)
    """
    if score >= 80:
        return "excellent"
    if score >= 60:
        return "good"
    if score >= 40:
        return "pass"
    return "danger"


def get_health_level_label(level: str) -> str:
    """获取健康等级中文标签"""
    labels = {
        "excellent": "优秀",
        "good": "良好",
        "pass": "及格",
        "danger": "危险",
    }
    return labels.get(level, "未知")


# ============================================================
# GEODiagnosisService 服务类
# ============================================================

@dataclass
class GEODiagnosisInput:
    """GEO诊断输入参数"""
    # 内容可提取性
    has_direct_answer: bool = False
    has_qa_headings: bool = False
    has_structured_data: bool = False
    has_internal_links: bool = False
    has_freshness_info: bool = False
    update_days_ago: int | None = None

    # 实体清晰度
    has_brand_definition: bool = False
    has_target_audience: bool = False
    has_unique_value: bool = False
    has_industry_classification: bool = False

    # E-E-A-T信号
    has_author_bio: bool = False
    author_credentials_complete: float = 0.0
    has_certifications: bool = False
    certification_count: int = 0
    has_data_sources: bool = False
    authoritative_source_ratio: float = 0.0
    has_expert_endorsements: bool = False
    endorsement_count: int = 0

    # Schema标记
    has_organization: bool = False
    has_product: bool = False
    has_article: bool = False
    has_faq: bool = False
    has_howto: bool = False
    has_breadcrumb: bool = False

    # 主题权威
    content_depth_score: float = 0.0
    topic_coverage_ratio: float = 0.0
    entity_consistency_score: float = 0.0
    cluster_completeness: float = 0.0
    total_content_count: int = 0
    topic_cluster_count: int = 0

    # 引用就绪度
    answer_ownership_rate: float = 0.0
    citation_accuracy: float = 0.0
    ai_sov: float = 0.0
    competitor_gap: float = 0.0
    total_ai_responses: int = 0
    brand_mention_count: int = 0
    accurate_citation_count: int = 0


class GEODiagnosisService:
    """GEO诊断服务"""

    def diagnose(self, input_data: GEODiagnosisInput) -> GEODiagnosisResult:
        """
        执行GEO诊断

        Args:
            input_data: 诊断输入参数

        Returns:
            GEODiagnosisResult: 诊断结果
        """
        # 1. 内容可提取性诊断 (20分)
        content_extractability = diagnose_content_extractability(
            has_direct_answer=input_data.has_direct_answer,
            has_qa_headings=input_data.has_qa_headings,
            has_structured_data=input_data.has_structured_data,
            has_internal_links=input_data.has_internal_links,
            has_freshness_info=input_data.has_freshness_info,
            update_days_ago=input_data.update_days_ago,
        )

        # 2. 实体清晰度诊断 (15分)
        entity_clarity = diagnose_entity_clarity(
            has_brand_definition=input_data.has_brand_definition,
            has_target_audience=input_data.has_target_audience,
            has_unique_value=input_data.has_unique_value,
            has_industry_classification=input_data.has_industry_classification,
        )

        # 3. E-E-A-T信号诊断 (20分)
        eeat_signals = diagnose_eeat_signals(
            has_author_bio=input_data.has_author_bio,
            author_credentials_complete=input_data.author_credentials_complete,
            has_certifications=input_data.has_certifications,
            certification_count=input_data.certification_count,
            has_data_sources=input_data.has_data_sources,
            authoritative_source_ratio=input_data.authoritative_source_ratio,
            has_expert_endorsements=input_data.has_expert_endorsements,
            endorsement_count=input_data.endorsement_count,
        )

        # 4. Schema标记诊断 (15分)
        schema_markup = diagnose_schema_markup(
            has_organization=input_data.has_organization,
            has_product=input_data.has_product,
            has_article=input_data.has_article,
            has_faq=input_data.has_faq,
            has_howto=input_data.has_howto,
            has_breadcrumb=input_data.has_breadcrumb,
        )

        # 5. 主题权威诊断 (15分)
        topic_authority = diagnose_topic_authority(
            content_depth_score=input_data.content_depth_score,
            topic_coverage_ratio=input_data.topic_coverage_ratio,
            entity_consistency_score=input_data.entity_consistency_score,
            cluster_completeness=input_data.cluster_completeness,
            total_content_count=input_data.total_content_count,
            topic_cluster_count=input_data.topic_cluster_count,
        )

        # 6. 引用就绪度诊断 (15分)
        citation_readiness = diagnose_citation_readiness(
            answer_ownership_rate=input_data.answer_ownership_rate,
            citation_accuracy=input_data.citation_accuracy,
            ai_sov=input_data.ai_sov,
            competitor_gap=input_data.competitor_gap,
            total_ai_responses=input_data.total_ai_responses,
            brand_mention_count=input_data.brand_mention_count,
            accurate_citation_count=input_data.accurate_citation_count,
        )

        # 汇总维度
        dimensions = [
            content_extractability,
            entity_clarity,
            eeat_signals,
            schema_markup,
            topic_authority,
            citation_readiness,
        ]

        # 计算综合评分
        overall_score = sum(dim.score for dim in dimensions)
        overall_score = round(min(100.0, max(0.0, overall_score)), 2)

        # 生成推荐
        recommendations = generate_recommendations(dimensions)

        return GEODiagnosisResult(
            overall_score=overall_score,
            dimensions=dimensions,
            recommendations=recommendations,
        )

    def diagnose_from_dict(self, data: dict) -> GEODiagnosisResult:
        """
        从字典执行GEO诊断（便捷方法）

        Args:
            data: 诊断参数字典

        Returns:
            GEODiagnosisResult: 诊断结果
        """
        input_data = GEODiagnosisInput(**data)
        return self.diagnose(input_data)