geo/tests/test_content_generation.py

431 lines
13 KiB
Python

"""内容生成模块TDD测试
测试策略:
- 测试真实的内容规则校验
- 测试真实的敏感词过滤
- 不使用Mock进行业务逻辑测试
"""
import pytest
from app.services.content.rule_validator import (
ValidationIssue,
ValidationResult,
AI_Pattern,
RuleValidator,
)
from app.services.content.sensitive_filter import (
FoundWord,
FilterResult,
SensitiveFilter,
)
# ============================================================================
# 规则校验测试
# ============================================================================
class TestValidationIssue:
"""校验问题数据结构测试"""
def test_validation_issue_creation(self):
"""测试创建校验问题"""
issue = ValidationIssue(
severity="high",
message="测试问题",
category="test_category"
)
assert issue.severity == "high"
assert issue.message == "测试问题"
assert issue.category == "test_category"
class TestValidationResult:
"""校验结果数据结构测试"""
def test_validation_result_creation(self):
"""测试创建校验结果"""
issues = [
ValidationIssue("high", "问题1", "cat1"),
ValidationIssue("medium", "问题2", "cat2"),
]
passed = ["检查1", "检查2"]
result = ValidationResult(
is_valid=False,
score=85,
issues=issues,
passed=passed
)
assert result.is_valid is False
assert result.score == 85
assert len(result.issues) == 2
assert len(result.passed) == 2
class TestRuleValidator:
"""规则校验器测试"""
def test_validate_title_too_long(self):
"""测试标题过长校验"""
validator = RuleValidator()
result = validator.validate(
content="这是正文内容",
title="A" * 100, # 超过大多数平台限制
platform="zhihu"
)
# 应该发现问题
title_issues = [i for i in result.issues if i.category == "title_length"]
assert any("超过限制" in i.message for i in title_issues)
def test_validate_title_too_short(self):
"""测试标题过短校验"""
validator = RuleValidator()
result = validator.validate(
content="这是正文内容",
title="", # 低于最低要求
platform="zhihu"
)
title_issues = [i for i in result.issues if i.category == "title_length"]
assert any("低于" in i.message for i in title_issues)
def test_validate_content_too_long(self):
"""测试内容过长校验"""
validator = RuleValidator()
result = validator.validate(
content="A" * 50000, # 超过限制
title="测试标题",
platform="zhihu"
)
content_issues = [i for i in result.issues if i.category == "content_length"]
assert any("超过限制" in i.message for i in content_issues)
def test_validate_valid_content(self):
"""测试有效内容"""
validator = RuleValidator()
result = validator.validate(
content="这是一篇正常的文章内容,包含足够的文字来通过校验。",
title="这是一个有效的标题",
platform="zhihu"
)
# 验证通过检查
assert len(result.passed) > 0
assert "标题长度" in result.passed[0]
def test_validate_unsupported_platform(self):
"""测试不支持的平台"""
validator = RuleValidator()
with pytest.raises(ValueError, match="不支持的平台"):
validator.validate(
content="内容",
title="标题",
platform="unsupported_platform"
)
def test_validate_wechat_inducing_share(self):
"""测试微信公众号诱导分享检测"""
validator = RuleValidator()
result = validator.validate(
content="转发本文领取精美礼品",
title="测试标题",
platform="wechat"
)
inducing_issues = [i for i in result.issues if "诱导分享" in i.message]
assert len(inducing_issues) > 0
assert inducing_issues[0].severity == "high"
def test_validate_wechat_external_link(self):
"""测试微信公众号外部链接检测"""
validator = RuleValidator()
result = validator.validate(
content="请访问 https://example.com 了解更多信息",
title="测试标题",
platform="wechat"
)
link_issues = [i for i in result.issues if "外部链接" in i.message]
assert len(link_issues) > 0
def test_validate_xiaohongshu_cross_platform(self):
"""测试小红书跨平台引流检测"""
validator = RuleValidator()
result = validator.validate(
content="欢迎关注我的微信公众号: test123",
title="测试标题",
platform="xiaohongshu"
)
cross_issues = [i for i in result.issues if "引流" in i.message]
assert len(cross_issues) > 0
def test_validate_baijiahao_clickbait(self):
"""测试百家号标题党检测"""
validator = RuleValidator()
result = validator.validate(
content="这是一篇正常的文章内容",
title="震惊!这个产品竟然...",
platform="baijiahao"
)
clickbait_issues = [i for i in result.issues if "标题党" in i.message]
assert len(clickbait_issues) > 0
def test_validate_douyin_watermark(self):
"""测试抖音水印检测"""
validator = RuleValidator()
result = validator.validate(
content="视频来自抖音水印",
title="测试标题",
platform="douyin"
)
watermark_issues = [i for i in result.issues if "水印" in i.message]
assert len(watermark_issues) > 0
def test_detect_ai_patterns(self):
"""测试AI模式检测"""
validator = RuleValidator()
patterns = validator.detect_ai_patterns(
content="随着时代的不断发展,我们可以发现...",
platform="zhihu"
)
# AI敏感平台应该能检测到某些模式
assert isinstance(patterns, list)
def test_get_optimization_tips(self):
"""测试获取优化建议"""
validator = RuleValidator()
tips = validator.get_optimization_tips("zhihu")
assert isinstance(tips, list)
def test_get_optimization_tips_unsupported_platform(self):
"""测试不支持平台的优化建议"""
validator = RuleValidator()
tips = validator.get_optimization_tips("unsupported")
assert tips == []
# ============================================================================
# 敏感词过滤测试
# ============================================================================
class TestFoundWord:
"""敏感词数据结构测试"""
def test_found_word_creation(self):
"""测试创建敏感词"""
word = FoundWord(
word="测试",
category="test",
position=10,
replacement="**"
)
assert word.word == "测试"
assert word.category == "test"
assert word.position == 10
assert word.replacement == "**"
class TestFilterResult:
"""过滤结果数据结构测试"""
def test_filter_result_creation(self):
"""测试创建过滤结果"""
result = FilterResult(
filtered_content="这是**内容",
found_words=[],
replacements={"测试": "**"}
)
assert result.filtered_content == "这是**内容"
assert len(result.replacements) == 1
class TestSensitiveFilter:
"""敏感词过滤器测试"""
def test_filter_politics_words(self):
"""测试政治敏感词过滤"""
filter_service = SensitiveFilter()
result = filter_service.filter(
content="这是关于台湾的消息",
platform="zhihu"
)
assert "**" in result.filtered_content
assert len(result.found_words) > 0
assert result.found_words[0].category == "politics"
def test_filter_medical_words(self):
"""测试医疗敏感词过滤"""
filter_service = SensitiveFilter()
result = filter_service.filter(
content="这种药品可以治愈疾病",
platform="zhihu"
)
assert "**" in result.filtered_content
assert len(result.found_words) > 0
def test_filter_no_sensitive_words(self):
"""测试无敏感词情况"""
filter_service = SensitiveFilter()
result = filter_service.filter(
content="这是一篇正常的文章",
platform="zhihu"
)
assert result.filtered_content == "这是一篇正常的文章"
assert len(result.found_words) == 0
assert len(result.replacements) == 0
def test_filter_multiple_words(self):
"""测试多个敏感词"""
filter_service = SensitiveFilter()
result = filter_service.filter(
content="台湾和西藏都是中国的一部分",
platform="zhihu"
)
# 多个词都应该被替换
assert "**" in result.filtered_content
assert len(result.found_words) >= 2
def test_filter_replacement_preserves_length(self):
"""测试替换保持长度一致"""
filter_service = SensitiveFilter()
original = "台湾"
result = filter_service.filter(
content=original,
platform="zhihu"
)
# 替换后长度应该一致
assert len(result.filtered_content) == len(original)
def test_add_custom_words(self):
"""测试添加自定义敏感词"""
filter_service = SensitiveFilter()
filter_service.add_custom_words("custom", ["自定义词1", "自定义词2"])
assert "custom" in filter_service.custom_words
assert len(filter_service.custom_words["custom"]) == 2
def test_filter_custom_words(self):
"""测试自定义敏感词过滤"""
filter_service = SensitiveFilter()
filter_service.add_custom_words("custom", ["品牌名"])
result = filter_service.filter(
content="这是一个品牌名的产品",
platform="zhihu"
)
assert "**" in result.filtered_content
def test_filter_empty_content(self):
"""测试空内容过滤"""
filter_service = SensitiveFilter()
result = filter_service.filter(
content="",
platform="zhihu"
)
assert result.filtered_content == ""
assert len(result.found_words) == 0
class TestSensitiveWordsData:
"""敏感词库数据测试"""
def test_sensitive_words_exist(self):
"""测试敏感词库存在"""
from app.services.content.sensitive_filter import SENSITIVE_WORDS
assert "politics" in SENSITIVE_WORDS
assert "medical" in SENSITIVE_WORDS
assert "finance" in SENSITIVE_WORDS
assert "adult" in SENSITIVE_WORDS
def test_sensitive_words_not_empty(self):
"""测试敏感词库非空"""
from app.services.content.sensitive_filter import SENSITIVE_WORDS
for category, words in SENSITIVE_WORDS.items():
assert len(words) > 0, f"Category {category} is empty"
# ============================================================================
# 校验和过滤集成测试
# ============================================================================
class TestContentValidationIntegration:
"""内容校验和过滤集成测试"""
def test_full_validation_workflow(self):
"""测试完整校验工作流"""
validator = RuleValidator()
filter_service = SensitiveFilter()
# 原始内容
original_content = "这是一篇关于台湾的文章,内容涉及敏感政治话题。"
# 先过滤敏感词
filtered = filter_service.filter(original_content, platform="zhihu")
# 再校验内容
result = validator.validate(
content=filtered.filtered_content,
title="测试标题",
platform="zhihu"
)
# 敏感词应该被替换
assert "**" in filtered.filtered_content
# 校验结果应该是有效的(敏感词已被替换)
assert isinstance(result.is_valid, bool)
assert isinstance(result.score, int)
def test_platform_specific_filtering(self):
"""测试平台特定的过滤"""
filter_service = SensitiveFilter()
# 不同平台可能有不同的敏感词配置
result_zhihu = filter_service.filter("测试内容", platform="zhihu")
result_wechat = filter_service.filter("测试内容", platform="wechat")
# 两者都应该返回结果
assert isinstance(result_zhihu, FilterResult)
assert isinstance(result_wechat, FilterResult)