431 lines
13 KiB
Python
431 lines
13 KiB
Python
"""内容生成模块TDD测试
|
|
|
|
测试策略:
|
|
- 测试真实的内容规则校验
|
|
- 测试真实的敏感词过滤
|
|
- 不使用Mock进行业务逻辑测试
|
|
"""
|
|
import pytest
|
|
|
|
from app.services.content.rule_validator import (
|
|
ValidationIssue,
|
|
ValidationResult,
|
|
AI_Pattern,
|
|
RuleValidator,
|
|
)
|
|
from app.services.content.sensitive_filter import (
|
|
FoundWord,
|
|
FilterResult,
|
|
SensitiveFilter,
|
|
)
|
|
|
|
|
|
# ============================================================================
|
|
# 规则校验测试
|
|
# ============================================================================
|
|
|
|
class TestValidationIssue:
|
|
"""校验问题数据结构测试"""
|
|
|
|
def test_validation_issue_creation(self):
|
|
"""测试创建校验问题"""
|
|
issue = ValidationIssue(
|
|
severity="high",
|
|
message="测试问题",
|
|
category="test_category"
|
|
)
|
|
|
|
assert issue.severity == "high"
|
|
assert issue.message == "测试问题"
|
|
assert issue.category == "test_category"
|
|
|
|
|
|
class TestValidationResult:
|
|
"""校验结果数据结构测试"""
|
|
|
|
def test_validation_result_creation(self):
|
|
"""测试创建校验结果"""
|
|
issues = [
|
|
ValidationIssue("high", "问题1", "cat1"),
|
|
ValidationIssue("medium", "问题2", "cat2"),
|
|
]
|
|
passed = ["检查1", "检查2"]
|
|
|
|
result = ValidationResult(
|
|
is_valid=False,
|
|
score=85,
|
|
issues=issues,
|
|
passed=passed
|
|
)
|
|
|
|
assert result.is_valid is False
|
|
assert result.score == 85
|
|
assert len(result.issues) == 2
|
|
assert len(result.passed) == 2
|
|
|
|
|
|
class TestRuleValidator:
|
|
"""规则校验器测试"""
|
|
|
|
def test_validate_title_too_long(self):
|
|
"""测试标题过长校验"""
|
|
validator = RuleValidator()
|
|
|
|
result = validator.validate(
|
|
content="这是正文内容",
|
|
title="A" * 100, # 超过大多数平台限制
|
|
platform="zhihu"
|
|
)
|
|
|
|
# 应该发现问题
|
|
title_issues = [i for i in result.issues if i.category == "title_length"]
|
|
assert any("超过限制" in i.message for i in title_issues)
|
|
|
|
def test_validate_title_too_short(self):
|
|
"""测试标题过短校验"""
|
|
validator = RuleValidator()
|
|
|
|
result = validator.validate(
|
|
content="这是正文内容",
|
|
title="短", # 低于最低要求
|
|
platform="zhihu"
|
|
)
|
|
|
|
title_issues = [i for i in result.issues if i.category == "title_length"]
|
|
assert any("低于" in i.message for i in title_issues)
|
|
|
|
def test_validate_content_too_long(self):
|
|
"""测试内容过长校验"""
|
|
validator = RuleValidator()
|
|
|
|
result = validator.validate(
|
|
content="A" * 50001,
|
|
title="测试标题",
|
|
platform="zhihu"
|
|
)
|
|
|
|
content_issues = [i for i in result.issues if i.category == "content_length"]
|
|
assert any("超过限制" in i.message for i in content_issues)
|
|
|
|
def test_validate_valid_content(self):
|
|
"""测试有效内容"""
|
|
validator = RuleValidator()
|
|
|
|
result = validator.validate(
|
|
content="这是一篇正常的文章内容,包含足够的文字来通过校验。",
|
|
title="这是一个有效的标题啊",
|
|
platform="zhihu"
|
|
)
|
|
|
|
# 验证通过检查
|
|
assert len(result.passed) > 0
|
|
assert any("标题长度" in p for p in result.passed)
|
|
|
|
def test_validate_unsupported_platform(self):
|
|
"""测试不支持的平台"""
|
|
validator = RuleValidator()
|
|
|
|
with pytest.raises(ValueError, match="不支持的平台"):
|
|
validator.validate(
|
|
content="内容",
|
|
title="标题",
|
|
platform="unsupported_platform"
|
|
)
|
|
|
|
def test_validate_wechat_inducing_share(self):
|
|
"""测试微信公众号诱导分享检测"""
|
|
validator = RuleValidator()
|
|
|
|
result = validator.validate(
|
|
content="转发本文领取精美礼品",
|
|
title="测试标题",
|
|
platform="wechat"
|
|
)
|
|
|
|
inducing_issues = [i for i in result.issues if "诱导分享" in i.message]
|
|
assert len(inducing_issues) > 0
|
|
assert inducing_issues[0].severity == "high"
|
|
|
|
def test_validate_wechat_external_link(self):
|
|
"""测试微信公众号外部链接检测"""
|
|
validator = RuleValidator()
|
|
|
|
result = validator.validate(
|
|
content="请访问 https://example.com 了解更多信息",
|
|
title="测试标题",
|
|
platform="wechat"
|
|
)
|
|
|
|
link_issues = [i for i in result.issues if "外部链接" in i.message]
|
|
assert len(link_issues) > 0
|
|
|
|
def test_validate_xiaohongshu_cross_platform(self):
|
|
"""测试小红书跨平台引流检测"""
|
|
validator = RuleValidator()
|
|
|
|
result = validator.validate(
|
|
content="欢迎关注我的微信公众号: test123",
|
|
title="测试标题",
|
|
platform="xiaohongshu"
|
|
)
|
|
|
|
cross_issues = [i for i in result.issues if "引流" in i.message]
|
|
assert len(cross_issues) > 0
|
|
|
|
def test_validate_baijiahao_clickbait(self):
|
|
"""测试百家号标题党检测"""
|
|
validator = RuleValidator()
|
|
|
|
result = validator.validate(
|
|
content="这是一篇正常的文章内容",
|
|
title="震惊!这个产品竟然...",
|
|
platform="baijiahao"
|
|
)
|
|
|
|
clickbait_issues = [i for i in result.issues if "标题党" in i.message]
|
|
assert len(clickbait_issues) > 0
|
|
|
|
def test_validate_douyin_watermark(self):
|
|
"""测试抖音水印检测"""
|
|
validator = RuleValidator()
|
|
|
|
result = validator.validate(
|
|
content="视频来自抖音水印",
|
|
title="测试标题",
|
|
platform="douyin"
|
|
)
|
|
|
|
watermark_issues = [i for i in result.issues if "水印" in i.message]
|
|
assert len(watermark_issues) > 0
|
|
|
|
def test_detect_ai_patterns(self):
|
|
"""测试AI模式检测"""
|
|
validator = RuleValidator()
|
|
|
|
patterns = validator.detect_ai_patterns(
|
|
content="随着时代的不断发展,我们可以发现...",
|
|
platform="zhihu"
|
|
)
|
|
|
|
# AI敏感平台应该能检测到某些模式
|
|
assert isinstance(patterns, list)
|
|
|
|
def test_get_optimization_tips(self):
|
|
"""测试获取优化建议"""
|
|
validator = RuleValidator()
|
|
|
|
tips = validator.get_optimization_tips("zhihu")
|
|
|
|
assert isinstance(tips, list)
|
|
|
|
def test_get_optimization_tips_unsupported_platform(self):
|
|
"""测试不支持平台的优化建议"""
|
|
validator = RuleValidator()
|
|
|
|
tips = validator.get_optimization_tips("unsupported")
|
|
|
|
assert tips == []
|
|
|
|
|
|
# ============================================================================
|
|
# 敏感词过滤测试
|
|
# ============================================================================
|
|
|
|
class TestFoundWord:
|
|
"""敏感词数据结构测试"""
|
|
|
|
def test_found_word_creation(self):
|
|
"""测试创建敏感词"""
|
|
word = FoundWord(
|
|
word="测试",
|
|
category="test",
|
|
position=10,
|
|
replacement="**"
|
|
)
|
|
|
|
assert word.word == "测试"
|
|
assert word.category == "test"
|
|
assert word.position == 10
|
|
assert word.replacement == "**"
|
|
|
|
|
|
class TestFilterResult:
|
|
"""过滤结果数据结构测试"""
|
|
|
|
def test_filter_result_creation(self):
|
|
"""测试创建过滤结果"""
|
|
result = FilterResult(
|
|
filtered_content="这是**内容",
|
|
found_words=[],
|
|
replacements={"测试": "**"}
|
|
)
|
|
|
|
assert result.filtered_content == "这是**内容"
|
|
assert len(result.replacements) == 1
|
|
|
|
|
|
class TestSensitiveFilter:
|
|
"""敏感词过滤器测试"""
|
|
|
|
def test_filter_politics_words(self):
|
|
"""测试政治敏感词过滤"""
|
|
filter_service = SensitiveFilter()
|
|
|
|
result = filter_service.filter(
|
|
content="这是关于台湾的消息",
|
|
platform="zhihu"
|
|
)
|
|
|
|
assert "**" in result.filtered_content
|
|
assert len(result.found_words) > 0
|
|
assert result.found_words[0].category == "politics"
|
|
|
|
def test_filter_medical_words(self):
|
|
"""测试医疗敏感词过滤"""
|
|
filter_service = SensitiveFilter()
|
|
|
|
result = filter_service.filter(
|
|
content="这种药品可以治愈疾病",
|
|
platform="zhihu"
|
|
)
|
|
|
|
assert "**" in result.filtered_content
|
|
assert len(result.found_words) > 0
|
|
|
|
def test_filter_no_sensitive_words(self):
|
|
"""测试无敏感词情况"""
|
|
filter_service = SensitiveFilter()
|
|
|
|
result = filter_service.filter(
|
|
content="这是一篇正常的文章",
|
|
platform="zhihu"
|
|
)
|
|
|
|
assert result.filtered_content == "这是一篇正常的文章"
|
|
assert len(result.found_words) == 0
|
|
assert len(result.replacements) == 0
|
|
|
|
def test_filter_multiple_words(self):
|
|
"""测试多个敏感词"""
|
|
filter_service = SensitiveFilter()
|
|
|
|
result = filter_service.filter(
|
|
content="台湾和西藏都是中国的一部分",
|
|
platform="zhihu"
|
|
)
|
|
|
|
# 多个词都应该被替换
|
|
assert "**" in result.filtered_content
|
|
assert len(result.found_words) >= 2
|
|
|
|
def test_filter_replacement_preserves_length(self):
|
|
"""测试替换保持长度一致"""
|
|
filter_service = SensitiveFilter()
|
|
|
|
original = "台湾"
|
|
result = filter_service.filter(
|
|
content=original,
|
|
platform="zhihu"
|
|
)
|
|
|
|
# 替换后长度应该一致
|
|
assert len(result.filtered_content) == len(original)
|
|
|
|
def test_add_custom_words(self):
|
|
"""测试添加自定义敏感词"""
|
|
filter_service = SensitiveFilter()
|
|
|
|
filter_service.add_custom_words("custom", ["自定义词1", "自定义词2"])
|
|
|
|
assert "custom" in filter_service.custom_words
|
|
assert len(filter_service.custom_words["custom"]) == 2
|
|
|
|
def test_filter_custom_words(self):
|
|
"""测试自定义敏感词过滤"""
|
|
filter_service = SensitiveFilter()
|
|
filter_service.add_custom_words("custom", ["品牌名"])
|
|
|
|
result = filter_service.filter(
|
|
content="这是一个品牌名的产品",
|
|
platform="zhihu"
|
|
)
|
|
|
|
assert "**" in result.filtered_content
|
|
|
|
def test_filter_empty_content(self):
|
|
"""测试空内容过滤"""
|
|
filter_service = SensitiveFilter()
|
|
|
|
result = filter_service.filter(
|
|
content="",
|
|
platform="zhihu"
|
|
)
|
|
|
|
assert result.filtered_content == ""
|
|
assert len(result.found_words) == 0
|
|
|
|
|
|
class TestSensitiveWordsData:
|
|
"""敏感词库数据测试"""
|
|
|
|
def test_sensitive_words_exist(self):
|
|
"""测试敏感词库存在"""
|
|
from app.services.content.sensitive_filter import SENSITIVE_WORDS
|
|
|
|
assert "politics" in SENSITIVE_WORDS
|
|
assert "medical" in SENSITIVE_WORDS
|
|
assert "finance" in SENSITIVE_WORDS
|
|
assert "adult" in SENSITIVE_WORDS
|
|
|
|
def test_sensitive_words_not_empty(self):
|
|
"""测试敏感词库非空"""
|
|
from app.services.content.sensitive_filter import SENSITIVE_WORDS
|
|
|
|
for category, words in SENSITIVE_WORDS.items():
|
|
assert len(words) > 0, f"Category {category} is empty"
|
|
|
|
|
|
# ============================================================================
|
|
# 校验和过滤集成测试
|
|
# ============================================================================
|
|
|
|
class TestContentValidationIntegration:
|
|
"""内容校验和过滤集成测试"""
|
|
|
|
def test_full_validation_workflow(self):
|
|
"""测试完整校验工作流"""
|
|
validator = RuleValidator()
|
|
filter_service = SensitiveFilter()
|
|
|
|
# 原始内容
|
|
original_content = "这是一篇关于台湾的文章,内容涉及敏感政治话题。"
|
|
|
|
# 先过滤敏感词
|
|
filtered = filter_service.filter(original_content, platform="zhihu")
|
|
|
|
# 再校验内容
|
|
result = validator.validate(
|
|
content=filtered.filtered_content,
|
|
title="测试标题",
|
|
platform="zhihu"
|
|
)
|
|
|
|
# 敏感词应该被替换
|
|
assert "**" in filtered.filtered_content
|
|
|
|
# 校验结果应该是有效的(敏感词已被替换)
|
|
assert isinstance(result.is_valid, bool)
|
|
assert isinstance(result.score, int)
|
|
|
|
def test_platform_specific_filtering(self):
|
|
"""测试平台特定的过滤"""
|
|
filter_service = SensitiveFilter()
|
|
|
|
# 不同平台可能有不同的敏感词配置
|
|
result_zhihu = filter_service.filter("测试内容", platform="zhihu")
|
|
result_wechat = filter_service.filter("测试内容", platform="wechat")
|
|
|
|
# 两者都应该返回结果
|
|
assert isinstance(result_zhihu, FilterResult)
|
|
assert isinstance(result_wechat, FilterResult)
|