geo/backend/tests/test_services/test_geo_diagnosis.py

606 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
GEO诊断服务单元测试
测试6大维度诊断逻辑、评分算法、推荐生成和服务类
"""
import pytest
from app.services.geo_diagnosis import (
GEODiagnosisService,
GEODiagnosisInput,
diagnose_content_extractability,
diagnose_entity_clarity,
diagnose_eeat_signals,
diagnose_schema_markup,
diagnose_topic_authority,
diagnose_citation_readiness,
generate_recommendations,
get_health_level,
get_health_level_label,
)
class TestContentExtractability:
"""内容可提取性诊断测试"""
def test_all_pass(self):
"""所有项都通过"""
result = diagnose_content_extractability(
has_direct_answer=True,
has_qa_headings=True,
has_structured_data=True,
has_internal_links=True,
has_freshness_info=True,
update_days_ago=10,
)
assert result.score == 20.0
assert result.max_score == 20.0
assert result.status == "pass"
assert result.percentage == 100.0
def test_all_fail(self):
"""所有项都失败"""
result = diagnose_content_extractability(
has_direct_answer=False,
has_qa_headings=False,
has_structured_data=False,
has_internal_links=False,
has_freshness_info=False,
)
assert result.score == 0.0
assert result.status == "warning"
def test_partial_pass(self):
"""部分通过"""
result = diagnose_content_extractability(
has_direct_answer=True,
has_qa_headings=True,
has_structured_data=False,
has_internal_links=False,
has_freshness_info=False,
)
assert result.score == 11.0 # 6 + 5
def test_freshness_recent(self):
"""内容新鲜度 - 近期更新"""
result = diagnose_content_extractability(
has_freshness_info=True,
update_days_ago=10,
)
freshness_item = [i for i in result.items if i.name == "内容新鲜度"][0]
assert freshness_item.score == 2.0
assert freshness_item.status == "pass"
def test_freshness_old(self):
"""内容新鲜度 - 过期更新"""
result = diagnose_content_extractability(
has_freshness_info=True,
update_days_ago=100,
)
freshness_item = [i for i in result.items if i.name == "内容新鲜度"][0]
assert freshness_item.score == 0.5
assert freshness_item.status == "warning"
class TestEntityClarity:
"""实体清晰度诊断测试"""
def test_all_pass(self):
"""所有项都通过"""
result = diagnose_entity_clarity(
has_brand_definition=True,
has_target_audience=True,
has_unique_value=True,
has_industry_classification=True,
)
assert result.score == 15.0
assert result.max_score == 15.0
assert result.status == "pass"
def test_all_fail(self):
"""所有项都失败"""
result = diagnose_entity_clarity(
has_brand_definition=False,
has_target_audience=False,
has_unique_value=False,
has_industry_classification=False,
)
assert result.score == 0.0
assert result.status == "warning"
def test_partial_pass(self):
"""部分通过"""
result = diagnose_entity_clarity(
has_brand_definition=True,
has_target_audience=True,
has_unique_value=False,
has_industry_classification=False,
)
assert result.score == 9.0 # 5 + 4
class TestEEATSignals:
"""E-E-A-T信号诊断测试"""
def test_all_pass(self):
"""所有项都通过"""
result = diagnose_eeat_signals(
has_author_bio=True,
author_credentials_complete=1.0,
has_certifications=True,
certification_count=5,
has_data_sources=True,
authoritative_source_ratio=1.0,
has_expert_endorsements=True,
endorsement_count=5,
)
assert result.score == 20.0
assert result.max_score == 20.0
assert result.status == "pass"
def test_all_fail(self):
"""所有项都失败"""
result = diagnose_eeat_signals(
has_author_bio=False,
has_certifications=False,
has_data_sources=False,
has_expert_endorsements=False,
)
assert result.score == 0.0
assert result.status == "warning"
def test_author_partial(self):
"""作者资质部分完成"""
result = diagnose_eeat_signals(
has_author_bio=True,
author_credentials_complete=0.5,
)
author_item = [i for i in result.items if i.name == "作者资质"][0]
assert author_item.score == 3.0 # 0.5 * 6.0
assert author_item.status == "warning"
def test_certification_tiers(self):
"""认证数量分级测试"""
# 5个以上
result = diagnose_eeat_signals(has_certifications=True, certification_count=5)
cert_item = [i for i in result.items if i.name == "专业认证"][0]
assert cert_item.score == 5.0
# 3-4个
result = diagnose_eeat_signals(has_certifications=True, certification_count=3)
cert_item = [i for i in result.items if i.name == "专业认证"][0]
assert cert_item.score == 4.0
# 1-2个
result = diagnose_eeat_signals(has_certifications=True, certification_count=1)
cert_item = [i for i in result.items if i.name == "专业认证"][0]
assert cert_item.score == 2.5
class TestSchemaMarkup:
"""Schema标记诊断测试"""
def test_all_pass(self):
"""所有项都通过"""
result = diagnose_schema_markup(
has_organization=True,
has_product=True,
has_article=True,
has_faq=True,
has_howto=True,
has_breadcrumb=True,
)
assert result.score == 15.0
assert result.max_score == 15.0
assert result.status == "pass"
def test_all_fail(self):
"""所有项都失败"""
result = diagnose_schema_markup(
has_organization=False,
has_product=False,
has_article=False,
has_faq=False,
has_howto=False,
has_breadcrumb=False,
)
assert result.score == 0.0
assert result.status == "warning"
def test_p0_only(self):
"""仅P0必须项"""
result = diagnose_schema_markup(
has_organization=True,
has_product=True,
has_article=True,
)
assert result.score == 10.0 # 4 + 3 + 3
assert result.status == "pass"
def test_schema_count(self):
"""Schema计数"""
result = diagnose_schema_markup(
has_organization=True,
has_product=True,
)
assert result.detail["schema_count"] == 2
class TestTopicAuthority:
"""主题权威诊断测试"""
def test_all_pass(self):
"""所有项都通过"""
result = diagnose_topic_authority(
content_depth_score=1.0,
topic_coverage_ratio=1.0,
entity_consistency_score=1.0,
cluster_completeness=1.0,
)
assert result.score == 15.0
assert result.max_score == 15.0
assert result.status == "pass"
def test_all_fail(self):
"""所有项都失败"""
result = diagnose_topic_authority(
content_depth_score=0.0,
topic_coverage_ratio=0.0,
entity_consistency_score=0.0,
cluster_completeness=0.0,
)
assert result.score == 0.0
assert result.status == "warning"
def test_partial_scores(self):
"""部分分数"""
result = diagnose_topic_authority(
content_depth_score=0.8,
topic_coverage_ratio=0.5,
entity_consistency_score=0.7,
cluster_completeness=0.4,
)
# 0.8*5 + 0.5*4 + 0.7*3 + 0.4*3 = 4 + 2 + 2.1 + 1.2 = 9.3
assert result.score == pytest.approx(9.3, rel=0.01)
class TestCitationReadiness:
"""引用就绪度诊断测试"""
def test_all_pass(self):
"""所有项都通过"""
result = diagnose_citation_readiness(
answer_ownership_rate=0.6,
citation_accuracy=1.0,
ai_sov=0.35,
competitor_gap=0.0,
)
assert result.score == 15.0
assert result.max_score == 15.0
assert result.status == "pass"
def test_all_fail(self):
"""所有项都失败"""
result = diagnose_citation_readiness(
answer_ownership_rate=0.0,
citation_accuracy=0.0,
ai_sov=0.0,
competitor_gap=0.6,
)
assert result.score == 0.0
assert result.status == "warning"
def test_aor_tiers(self):
"""AOR分级测试"""
# >= 50%
result = diagnose_citation_readiness(answer_ownership_rate=0.5)
aor_item = [i for i in result.items if i.name == "引用频率 (AOR)"][0]
assert aor_item.score == 5.0
# 30-49%
result = diagnose_citation_readiness(answer_ownership_rate=0.3)
aor_item = [i for i in result.items if i.name == "引用频率 (AOR)"][0]
assert aor_item.score == 3.5
# 10-29%
result = diagnose_citation_readiness(answer_ownership_rate=0.1)
aor_item = [i for i in result.items if i.name == "引用频率 (AOR)"][0]
assert aor_item.score == 2.0
def test_competitor_gap_tiers(self):
"""竞品差距分级测试"""
# <= 10pp
result = diagnose_citation_readiness(competitor_gap=0.05)
gap_item = [i for i in result.items if i.name == "竞品对比"][0]
assert gap_item.score == 3.0
# 10-20pp
result = diagnose_citation_readiness(competitor_gap=0.15)
gap_item = [i for i in result.items if i.name == "竞品对比"][0]
assert gap_item.score == 2.0
class TestRecommendations:
"""推荐生成测试"""
def test_generate_from_fail_items(self):
"""从fail项生成P0推荐"""
dimensions = [
diagnose_content_extractability(
has_direct_answer=False,
has_qa_headings=False,
),
]
recommendations = generate_recommendations(dimensions)
assert len(recommendations) >= 2
# 检查有P0推荐不一定是全部
p0_recs = [r for r in recommendations if r.priority == "P0"]
assert len(p0_recs) >= 2
def test_generate_from_warning_items(self):
"""从warning项生成P1推荐"""
dimensions = [
diagnose_content_extractability(
has_direct_answer=True,
has_qa_headings=True,
has_structured_data=True,
has_internal_links=False,
has_freshness_info=True,
update_days_ago=50,
),
]
recommendations = generate_recommendations(dimensions)
p1_recs = [r for r in recommendations if r.priority == "P1"]
assert len(p1_recs) >= 1
def test_priority_ordering(self):
"""推荐按优先级排序"""
dimensions = [
diagnose_content_extractability(
has_direct_answer=False,
has_qa_headings=False,
has_structured_data=True,
has_internal_links=False,
has_freshness_info=True,
update_days_ago=50,
),
]
recommendations = generate_recommendations(dimensions)
priorities = [r.priority for r in recommendations]
assert priorities == sorted(priorities)
def test_empty_dimensions(self):
"""空维度列表"""
recommendations = generate_recommendations([])
assert len(recommendations) == 0
class TestHealthLevel:
"""健康等级测试"""
def test_excellent(self):
assert get_health_level(85) == "excellent"
assert get_health_level(80) == "excellent"
def test_good(self):
assert get_health_level(70) == "good"
assert get_health_level(60) == "good"
def test_pass(self):
assert get_health_level(50) == "pass"
assert get_health_level(40) == "pass"
def test_danger(self):
assert get_health_level(30) == "danger"
assert get_health_level(0) == "danger"
def test_labels(self):
assert get_health_level_label("excellent") == "优秀"
assert get_health_level_label("good") == "良好"
assert get_health_level_label("pass") == "及格"
assert get_health_level_label("danger") == "危险"
class TestGEODiagnosisService:
"""GEO诊断服务类测试"""
@pytest.fixture
def service(self):
return GEODiagnosisService()
def test_full_diagnosis_all_pass(self, service):
"""完整诊断 - 所有项通过"""
input_data = GEODiagnosisInput(
# 内容可提取性
has_direct_answer=True,
has_qa_headings=True,
has_structured_data=True,
has_internal_links=True,
has_freshness_info=True,
update_days_ago=10,
# 实体清晰度
has_brand_definition=True,
has_target_audience=True,
has_unique_value=True,
has_industry_classification=True,
# E-E-A-T
has_author_bio=True,
author_credentials_complete=1.0,
has_certifications=True,
certification_count=5,
has_data_sources=True,
authoritative_source_ratio=1.0,
has_expert_endorsements=True,
endorsement_count=5,
# Schema
has_organization=True,
has_product=True,
has_article=True,
has_faq=True,
has_howto=True,
has_breadcrumb=True,
# 主题权威
content_depth_score=1.0,
topic_coverage_ratio=1.0,
entity_consistency_score=1.0,
cluster_completeness=1.0,
# 引用就绪度
answer_ownership_rate=0.6,
citation_accuracy=1.0,
ai_sov=0.35,
competitor_gap=0.0,
)
result = service.diagnose(input_data)
assert result.overall_score == 100.0
assert result.health_level == "excellent"
assert len(result.dimensions) == 6
assert len(result.recommendations) == 0
def test_full_diagnosis_all_fail(self, service):
"""完整诊断 - 所有项失败"""
input_data = GEODiagnosisInput()
result = service.diagnose(input_data)
# 由于有些项在默认情况下会得少量分数总分不一定是0
assert result.overall_score < 10.0
assert result.health_level == "danger"
assert len(result.dimensions) == 6
assert len(result.recommendations) > 0
def test_diagnose_from_dict(self, service):
"""从字典执行诊断"""
data = {
"has_direct_answer": True,
"has_brand_definition": True,
"has_author_bio": True,
"author_credentials_complete": 0.8,
"has_organization": True,
"content_depth_score": 0.8,
"answer_ownership_rate": 0.5,
}
result = service.diagnose_from_dict(data)
assert result.overall_score > 0
assert len(result.dimensions) == 6
def test_result_to_dict(self, service):
"""结果转字典"""
input_data = GEODiagnosisInput(
has_direct_answer=True,
has_brand_definition=True,
)
result = service.diagnose(input_data)
result_dict = result.to_dict()
assert "overall_score" in result_dict
assert "health_level" in result_dict
assert "health_level_label" in result_dict
assert "dimensions" in result_dict
assert "recommendations" in result_dict
assert len(result_dict["dimensions"]) == 6
def test_score_boundaries(self, service):
"""评分边界测试"""
# 最低分
result = service.diagnose(GEODiagnosisInput())
assert result.overall_score >= 0.0
# 最高分
input_data = GEODiagnosisInput(
has_direct_answer=True,
has_qa_headings=True,
has_structured_data=True,
has_internal_links=True,
has_freshness_info=True,
update_days_ago=10,
has_brand_definition=True,
has_target_audience=True,
has_unique_value=True,
has_industry_classification=True,
has_author_bio=True,
author_credentials_complete=1.0,
has_certifications=True,
certification_count=5,
has_data_sources=True,
authoritative_source_ratio=1.0,
has_expert_endorsements=True,
endorsement_count=5,
has_organization=True,
has_product=True,
has_article=True,
has_faq=True,
has_howto=True,
has_breadcrumb=True,
content_depth_score=1.0,
topic_coverage_ratio=1.0,
entity_consistency_score=1.0,
cluster_completeness=1.0,
answer_ownership_rate=0.6,
citation_accuracy=0.95,
ai_sov=0.35,
competitor_gap=0.05,
)
result = service.diagnose(input_data)
assert result.overall_score <= 100.0
def test_health_levels(self, service):
"""健康等级测试"""
# excellent (>= 80)
input_data = GEODiagnosisInput(
has_direct_answer=True,
has_qa_headings=True,
has_structured_data=True,
has_internal_links=True,
has_freshness_info=True,
update_days_ago=10,
has_brand_definition=True,
has_target_audience=True,
has_unique_value=True,
has_industry_classification=True,
has_author_bio=True,
author_credentials_complete=0.9,
has_certifications=True,
certification_count=4,
has_data_sources=True,
authoritative_source_ratio=0.9,
has_expert_endorsements=True,
endorsement_count=4,
has_organization=True,
has_product=True,
has_article=True,
has_faq=True,
has_howto=True,
has_breadcrumb=True,
content_depth_score=0.9,
topic_coverage_ratio=0.9,
entity_consistency_score=0.9,
cluster_completeness=0.8,
answer_ownership_rate=0.6,
citation_accuracy=0.95,
ai_sov=0.35,
competitor_gap=0.05,
)
result = service.diagnose(input_data)
assert result.health_level == "excellent"
def test_dimension_scores_sum(self, service):
"""维度分数求和验证"""
input_data = GEODiagnosisInput(
has_direct_answer=True,
has_brand_definition=True,
)
result = service.diagnose(input_data)
# 各维度分数求和应等于总分
total = sum(dim.score for dim in result.dimensions)
assert result.overall_score == pytest.approx(total, rel=0.01)
def test_recommendations_generated(self, service):
"""推荐生成验证"""
input_data = GEODiagnosisInput()
result = service.diagnose(input_data)
# 所有项都失败时应该有推荐
assert len(result.recommendations) > 0
assert all(r.priority in ["P0", "P1", "P2"] for r in result.recommendations)