geo/backend/tests/test_services/test_geo_diagnosis.py

"""
GEO诊断服务单元测试

测试6大维度诊断逻辑、评分算法、推荐生成和服务类
"""
import pytest
from app.services.diagnosis.geo_diagnosis import (
    GEODiagnosisService,
    GEODiagnosisInput,
    diagnose_content_extractability,
    diagnose_entity_clarity,
    diagnose_eeat_signals,
    diagnose_schema_markup,
    diagnose_topic_authority,
    diagnose_citation_readiness,
    generate_recommendations,
    get_health_level,
    get_health_level_label,
)


class TestContentExtractability:
    """内容可提取性诊断测试"""

    def test_all_pass(self):
        """所有项都通过"""
        result = diagnose_content_extractability(
            has_direct_answer=True,
            has_qa_headings=True,
            has_structured_data=True,
            has_internal_links=True,
            has_freshness_info=True,
            update_days_ago=10,
        )
        assert result.score == 20.0
        assert result.max_score == 20.0
        assert result.status == "pass"
        assert result.percentage == 100.0

    def test_all_fail(self):
        """所有项都失败"""
        result = diagnose_content_extractability(
            has_direct_answer=False,
            has_qa_headings=False,
            has_structured_data=False,
            has_internal_links=False,
            has_freshness_info=False,
        )
        assert result.score == 0.0
        assert result.status == "warning"

    def test_partial_pass(self):
        """部分通过"""
        result = diagnose_content_extractability(
            has_direct_answer=True,
            has_qa_headings=True,
            has_structured_data=False,
            has_internal_links=False,
            has_freshness_info=False,
        )
        assert result.score == 11.0  # 6 + 5

    def test_freshness_recent(self):
        """内容新鲜度 - 近期更新"""
        result = diagnose_content_extractability(
            has_freshness_info=True,
            update_days_ago=10,
        )
        freshness_item = [i for i in result.items if i.name == "内容新鲜度"][0]
        assert freshness_item.score == 2.0
        assert freshness_item.status == "pass"

    def test_freshness_old(self):
        """内容新鲜度 - 过期更新"""
        result = diagnose_content_extractability(
            has_freshness_info=True,
            update_days_ago=100,
        )
        freshness_item = [i for i in result.items if i.name == "内容新鲜度"][0]
        assert freshness_item.score == 0.5
        assert freshness_item.status == "warning"


class TestEntityClarity:
    """实体清晰度诊断测试"""

    def test_all_pass(self):
        """所有项都通过"""
        result = diagnose_entity_clarity(
            has_brand_definition=True,
            has_target_audience=True,
            has_unique_value=True,
            has_industry_classification=True,
        )
        assert result.score == 15.0
        assert result.max_score == 15.0
        assert result.status == "pass"

    def test_all_fail(self):
        """所有项都失败"""
        result = diagnose_entity_clarity(
            has_brand_definition=False,
            has_target_audience=False,
            has_unique_value=False,
            has_industry_classification=False,
        )
        assert result.score == 0.0
        assert result.status == "warning"

    def test_partial_pass(self):
        """部分通过"""
        result = diagnose_entity_clarity(
            has_brand_definition=True,
            has_target_audience=True,
            has_unique_value=False,
            has_industry_classification=False,
        )
        assert result.score == 9.0  # 5 + 4


class TestEEATSignals:
    """E-E-A-T信号诊断测试"""

    def test_all_pass(self):
        """所有项都通过"""
        result = diagnose_eeat_signals(
            has_author_bio=True,
            author_credentials_complete=1.0,
            has_certifications=True,
            certification_count=5,
            has_data_sources=True,
            authoritative_source_ratio=1.0,
            has_expert_endorsements=True,
            endorsement_count=5,
        )
        assert result.score == 20.0
        assert result.max_score == 20.0
        assert result.status == "pass"

    def test_all_fail(self):
        """所有项都失败"""
        result = diagnose_eeat_signals(
            has_author_bio=False,
            has_certifications=False,
            has_data_sources=False,
            has_expert_endorsements=False,
        )
        assert result.score == 0.0
        assert result.status == "warning"

    def test_author_partial(self):
        """作者资质部分完成"""
        result = diagnose_eeat_signals(
            has_author_bio=True,
            author_credentials_complete=0.5,
        )
        author_item = [i for i in result.items if i.name == "作者资质"][0]
        assert author_item.score == 3.0  # 0.5 * 6.0
        assert author_item.status == "warning"

    def test_certification_tiers(self):
        """认证数量分级测试"""
        # 5个以上
        result = diagnose_eeat_signals(has_certifications=True, certification_count=5)
        cert_item = [i for i in result.items if i.name == "专业认证"][0]
        assert cert_item.score == 5.0

        # 3-4个
        result = diagnose_eeat_signals(has_certifications=True, certification_count=3)
        cert_item = [i for i in result.items if i.name == "专业认证"][0]
        assert cert_item.score == 4.0

        # 1-2个
        result = diagnose_eeat_signals(has_certifications=True, certification_count=1)
        cert_item = [i for i in result.items if i.name == "专业认证"][0]
        assert cert_item.score == 2.5


class TestSchemaMarkup:
    """Schema标记诊断测试"""

    def test_all_pass(self):
        """所有项都通过"""
        result = diagnose_schema_markup(
            has_organization=True,
            has_product=True,
            has_article=True,
            has_faq=True,
            has_howto=True,
            has_breadcrumb=True,
        )
        assert result.score == 15.0
        assert result.max_score == 15.0
        assert result.status == "pass"

    def test_all_fail(self):
        """所有项都失败"""
        result = diagnose_schema_markup(
            has_organization=False,
            has_product=False,
            has_article=False,
            has_faq=False,
            has_howto=False,
            has_breadcrumb=False,
        )
        assert result.score == 0.0
        assert result.status == "warning"

    def test_p0_only(self):
        """仅P0必须项"""
        result = diagnose_schema_markup(
            has_organization=True,
            has_product=True,
            has_article=True,
        )
        assert result.score == 10.0  # 4 + 3 + 3
        assert result.status == "pass"

    def test_schema_count(self):
        """Schema计数"""
        result = diagnose_schema_markup(
            has_organization=True,
            has_product=True,
        )
        assert result.detail["schema_count"] == 2


class TestTopicAuthority:
    """主题权威诊断测试"""

    def test_all_pass(self):
        """所有项都通过"""
        result = diagnose_topic_authority(
            content_depth_score=1.0,
            topic_coverage_ratio=1.0,
            entity_consistency_score=1.0,
            cluster_completeness=1.0,
        )
        assert result.score == 15.0
        assert result.max_score == 15.0
        assert result.status == "pass"

    def test_all_fail(self):
        """所有项都失败"""
        result = diagnose_topic_authority(
            content_depth_score=0.0,
            topic_coverage_ratio=0.0,
            entity_consistency_score=0.0,
            cluster_completeness=0.0,
        )
        assert result.score == 0.0
        assert result.status == "warning"

    def test_partial_scores(self):
        """部分分数"""
        result = diagnose_topic_authority(
            content_depth_score=0.8,
            topic_coverage_ratio=0.5,
            entity_consistency_score=0.7,
            cluster_completeness=0.4,
        )
        # 0.8*5 + 0.5*4 + 0.7*3 + 0.4*3 = 4 + 2 + 2.1 + 1.2 = 9.3
        assert result.score == pytest.approx(9.3, rel=0.01)


class TestCitationReadiness:
    """引用就绪度诊断测试"""

    def test_all_pass(self):
        """所有项都通过"""
        result = diagnose_citation_readiness(
            answer_ownership_rate=0.6,
            citation_accuracy=1.0,
            ai_sov=0.35,
            competitor_gap=0.0,
        )
        assert result.score == 15.0
        assert result.max_score == 15.0
        assert result.status == "pass"

    def test_all_fail(self):
        """所有项都失败"""
        result = diagnose_citation_readiness(
            answer_ownership_rate=0.0,
            citation_accuracy=0.0,
            ai_sov=0.0,
            competitor_gap=0.6,
        )
        assert result.score == 0.0
        assert result.status == "warning"

    def test_aor_tiers(self):
        """AOR分级测试"""
        # >= 50%
        result = diagnose_citation_readiness(answer_ownership_rate=0.5)
        aor_item = [i for i in result.items if i.name == "引用频率 (AOR)"][0]
        assert aor_item.score == 5.0

        # 30-49%
        result = diagnose_citation_readiness(answer_ownership_rate=0.3)
        aor_item = [i for i in result.items if i.name == "引用频率 (AOR)"][0]
        assert aor_item.score == 3.5

        # 10-29%
        result = diagnose_citation_readiness(answer_ownership_rate=0.1)
        aor_item = [i for i in result.items if i.name == "引用频率 (AOR)"][0]
        assert aor_item.score == 2.0

    def test_competitor_gap_tiers(self):
        """竞品差距分级测试"""
        # <= 10pp
        result = diagnose_citation_readiness(competitor_gap=0.05)
        gap_item = [i for i in result.items if i.name == "竞品对比"][0]
        assert gap_item.score == 3.0

        # 10-20pp
        result = diagnose_citation_readiness(competitor_gap=0.15)
        gap_item = [i for i in result.items if i.name == "竞品对比"][0]
        assert gap_item.score == 2.0


class TestRecommendations:
    """推荐生成测试"""

    def test_generate_from_fail_items(self):
        """从fail项生成P0推荐"""
        dimensions = [
            diagnose_content_extractability(
                has_direct_answer=False,
                has_qa_headings=False,
            ),
        ]
        recommendations = generate_recommendations(dimensions)
        assert len(recommendations) >= 2
        # 检查有P0推荐（不一定是全部）
        p0_recs = [r for r in recommendations if r.priority == "P0"]
        assert len(p0_recs) >= 2

    def test_generate_from_warning_items(self):
        """从warning项生成P1推荐"""
        dimensions = [
            diagnose_content_extractability(
                has_direct_answer=True,
                has_qa_headings=True,
                has_structured_data=True,
                has_internal_links=False,
                has_freshness_info=True,
                update_days_ago=50,
            ),
        ]
        recommendations = generate_recommendations(dimensions)
        p1_recs = [r for r in recommendations if r.priority == "P1"]
        assert len(p1_recs) >= 1

    def test_priority_ordering(self):
        """推荐按优先级排序"""
        dimensions = [
            diagnose_content_extractability(
                has_direct_answer=False,
                has_qa_headings=False,
                has_structured_data=True,
                has_internal_links=False,
                has_freshness_info=True,
                update_days_ago=50,
            ),
        ]
        recommendations = generate_recommendations(dimensions)
        priorities = [r.priority for r in recommendations]
        assert priorities == sorted(priorities)

    def test_empty_dimensions(self):
        """空维度列表"""
        recommendations = generate_recommendations([])
        assert len(recommendations) == 0


class TestHealthLevel:
    """健康等级测试"""

    def test_excellent(self):
        assert get_health_level(85) == "excellent"
        assert get_health_level(80) == "excellent"

    def test_good(self):
        assert get_health_level(70) == "good"
        assert get_health_level(60) == "good"

    def test_pass(self):
        assert get_health_level(50) == "pass"
        assert get_health_level(40) == "pass"

    def test_danger(self):
        assert get_health_level(30) == "danger"
        assert get_health_level(0) == "danger"

    def test_labels(self):
        assert get_health_level_label("excellent") == "优秀"
        assert get_health_level_label("good") == "良好"
        assert get_health_level_label("pass") == "及格"
        assert get_health_level_label("danger") == "危险"


class TestGEODiagnosisService:
    """GEO诊断服务类测试"""

    @pytest.fixture
    def service(self):
        return GEODiagnosisService()

    def test_full_diagnosis_all_pass(self, service):
        """完整诊断 - 所有项通过"""
        input_data = GEODiagnosisInput(
            # 内容可提取性
            has_direct_answer=True,
            has_qa_headings=True,
            has_structured_data=True,
            has_internal_links=True,
            has_freshness_info=True,
            update_days_ago=10,
            # 实体清晰度
            has_brand_definition=True,
            has_target_audience=True,
            has_unique_value=True,
            has_industry_classification=True,
            # E-E-A-T
            has_author_bio=True,
            author_credentials_complete=1.0,
            has_certifications=True,
            certification_count=5,
            has_data_sources=True,
            authoritative_source_ratio=1.0,
            has_expert_endorsements=True,
            endorsement_count=5,
            # Schema
            has_organization=True,
            has_product=True,
            has_article=True,
            has_faq=True,
            has_howto=True,
            has_breadcrumb=True,
            # 主题权威
            content_depth_score=1.0,
            topic_coverage_ratio=1.0,
            entity_consistency_score=1.0,
            cluster_completeness=1.0,
            # 引用就绪度
            answer_ownership_rate=0.6,
            citation_accuracy=1.0,
            ai_sov=0.35,
            competitor_gap=0.0,
        )
        result = service.diagnose(input_data)

        assert result.overall_score == 100.0
        assert result.health_level == "excellent"
        assert len(result.dimensions) == 6
        assert len(result.recommendations) == 0

    def test_full_diagnosis_all_fail(self, service):
        """完整诊断 - 所有项失败"""
        input_data = GEODiagnosisInput()
        result = service.diagnose(input_data)

        # 由于有些项在默认情况下会得少量分数，总分不一定是0
        assert result.overall_score < 10.0
        assert result.health_level == "danger"
        assert len(result.dimensions) == 6
        assert len(result.recommendations) > 0

    def test_diagnose_from_dict(self, service):
        """从字典执行诊断"""
        data = {
            "has_direct_answer": True,
            "has_brand_definition": True,
            "has_author_bio": True,
            "author_credentials_complete": 0.8,
            "has_organization": True,
            "content_depth_score": 0.8,
            "answer_ownership_rate": 0.5,
        }
        result = service.diagnose_from_dict(data)

        assert result.overall_score > 0
        assert len(result.dimensions) == 6

    def test_result_to_dict(self, service):
        """结果转字典"""
        input_data = GEODiagnosisInput(
            has_direct_answer=True,
            has_brand_definition=True,
        )
        result = service.diagnose(input_data)
        result_dict = result.to_dict()

        assert "overall_score" in result_dict
        assert "health_level" in result_dict
        assert "health_level_label" in result_dict
        assert "dimensions" in result_dict
        assert "recommendations" in result_dict
        assert len(result_dict["dimensions"]) == 6

    def test_score_boundaries(self, service):
        """评分边界测试"""
        # 最低分
        result = service.diagnose(GEODiagnosisInput())
        assert result.overall_score >= 0.0

        # 最高分
        input_data = GEODiagnosisInput(
            has_direct_answer=True,
            has_qa_headings=True,
            has_structured_data=True,
            has_internal_links=True,
            has_freshness_info=True,
            update_days_ago=10,
            has_brand_definition=True,
            has_target_audience=True,
            has_unique_value=True,
            has_industry_classification=True,
            has_author_bio=True,
            author_credentials_complete=1.0,
            has_certifications=True,
            certification_count=5,
            has_data_sources=True,
            authoritative_source_ratio=1.0,
            has_expert_endorsements=True,
            endorsement_count=5,
            has_organization=True,
            has_product=True,
            has_article=True,
            has_faq=True,
            has_howto=True,
            has_breadcrumb=True,
            content_depth_score=1.0,
            topic_coverage_ratio=1.0,
            entity_consistency_score=1.0,
            cluster_completeness=1.0,
            answer_ownership_rate=0.6,
            citation_accuracy=0.95,
            ai_sov=0.35,
            competitor_gap=0.05,
        )
        result = service.diagnose(input_data)
        assert result.overall_score <= 100.0

    def test_health_levels(self, service):
        """健康等级测试"""
        # excellent (>= 80)
        input_data = GEODiagnosisInput(
            has_direct_answer=True,
            has_qa_headings=True,
            has_structured_data=True,
            has_internal_links=True,
            has_freshness_info=True,
            update_days_ago=10,
            has_brand_definition=True,
            has_target_audience=True,
            has_unique_value=True,
            has_industry_classification=True,
            has_author_bio=True,
            author_credentials_complete=0.9,
            has_certifications=True,
            certification_count=4,
            has_data_sources=True,
            authoritative_source_ratio=0.9,
            has_expert_endorsements=True,
            endorsement_count=4,
            has_organization=True,
            has_product=True,
            has_article=True,
            has_faq=True,
            has_howto=True,
            has_breadcrumb=True,
            content_depth_score=0.9,
            topic_coverage_ratio=0.9,
            entity_consistency_score=0.9,
            cluster_completeness=0.8,
            answer_ownership_rate=0.6,
            citation_accuracy=0.95,
            ai_sov=0.35,
            competitor_gap=0.05,
        )
        result = service.diagnose(input_data)
        assert result.health_level == "excellent"

    def test_dimension_scores_sum(self, service):
        """维度分数求和验证"""
        input_data = GEODiagnosisInput(
            has_direct_answer=True,
            has_brand_definition=True,
        )
        result = service.diagnose(input_data)

        # 各维度分数求和应等于总分
        total = sum(dim.score for dim in result.dimensions)
        assert result.overall_score == pytest.approx(total, rel=0.01)

    def test_recommendations_generated(self, service):
        """推荐生成验证"""
        input_data = GEODiagnosisInput()
        result = service.diagnose(input_data)

        # 所有项都失败时应该有推荐
        assert len(result.recommendations) > 0
        assert all(r.priority in ["P0", "P1", "P2"] for r in result.recommendations)