"""内容生成模块TDD测试 测试策略: - 测试真实的内容规则校验 - 测试真实的敏感词过滤 - 不使用Mock进行业务逻辑测试 """ import pytest from app.services.content.rule_validator import ( ValidationIssue, ValidationResult, AI_Pattern, RuleValidator, ) from app.services.content.sensitive_filter import ( FoundWord, FilterResult, SensitiveFilter, ) # ============================================================================ # 规则校验测试 # ============================================================================ class TestValidationIssue: """校验问题数据结构测试""" def test_validation_issue_creation(self): """测试创建校验问题""" issue = ValidationIssue( severity="high", message="测试问题", category="test_category" ) assert issue.severity == "high" assert issue.message == "测试问题" assert issue.category == "test_category" class TestValidationResult: """校验结果数据结构测试""" def test_validation_result_creation(self): """测试创建校验结果""" issues = [ ValidationIssue("high", "问题1", "cat1"), ValidationIssue("medium", "问题2", "cat2"), ] passed = ["检查1", "检查2"] result = ValidationResult( is_valid=False, score=85, issues=issues, passed=passed ) assert result.is_valid is False assert result.score == 85 assert len(result.issues) == 2 assert len(result.passed) == 2 class TestRuleValidator: """规则校验器测试""" def test_validate_title_too_long(self): """测试标题过长校验""" validator = RuleValidator() result = validator.validate( content="这是正文内容", title="A" * 100, # 超过大多数平台限制 platform="zhihu" ) # 应该发现问题 title_issues = [i for i in result.issues if i.category == "title_length"] assert any("超过限制" in i.message for i in title_issues) def test_validate_title_too_short(self): """测试标题过短校验""" validator = RuleValidator() result = validator.validate( content="这是正文内容", title="短", # 低于最低要求 platform="zhihu" ) title_issues = [i for i in result.issues if i.category == "title_length"] assert any("低于" in i.message for i in title_issues) def test_validate_content_too_long(self): """测试内容过长校验""" validator = RuleValidator() result = validator.validate( content="A" * 50001, title="测试标题", platform="zhihu" ) content_issues = [i for i in result.issues if i.category == "content_length"] assert any("超过限制" in i.message for i in content_issues) def test_validate_valid_content(self): """测试有效内容""" validator = RuleValidator() result = validator.validate( content="这是一篇正常的文章内容,包含足够的文字来通过校验。", title="这是一个有效的标题啊", platform="zhihu" ) # 验证通过检查 assert len(result.passed) > 0 assert any("标题长度" in p for p in result.passed) def test_validate_unsupported_platform(self): """测试不支持的平台""" validator = RuleValidator() with pytest.raises(ValueError, match="不支持的平台"): validator.validate( content="内容", title="标题", platform="unsupported_platform" ) def test_validate_wechat_inducing_share(self): """测试微信公众号诱导分享检测""" validator = RuleValidator() result = validator.validate( content="转发本文领取精美礼品", title="测试标题", platform="wechat" ) inducing_issues = [i for i in result.issues if "诱导分享" in i.message] assert len(inducing_issues) > 0 assert inducing_issues[0].severity == "high" def test_validate_wechat_external_link(self): """测试微信公众号外部链接检测""" validator = RuleValidator() result = validator.validate( content="请访问 https://example.com 了解更多信息", title="测试标题", platform="wechat" ) link_issues = [i for i in result.issues if "外部链接" in i.message] assert len(link_issues) > 0 def test_validate_xiaohongshu_cross_platform(self): """测试小红书跨平台引流检测""" validator = RuleValidator() result = validator.validate( content="欢迎关注我的微信公众号: test123", title="测试标题", platform="xiaohongshu" ) cross_issues = [i for i in result.issues if "引流" in i.message] assert len(cross_issues) > 0 def test_validate_baijiahao_clickbait(self): """测试百家号标题党检测""" validator = RuleValidator() result = validator.validate( content="这是一篇正常的文章内容", title="震惊!这个产品竟然...", platform="baijiahao" ) clickbait_issues = [i for i in result.issues if "标题党" in i.message] assert len(clickbait_issues) > 0 def test_validate_douyin_watermark(self): """测试抖音水印检测""" validator = RuleValidator() result = validator.validate( content="视频来自抖音水印", title="测试标题", platform="douyin" ) watermark_issues = [i for i in result.issues if "水印" in i.message] assert len(watermark_issues) > 0 def test_detect_ai_patterns(self): """测试AI模式检测""" validator = RuleValidator() patterns = validator.detect_ai_patterns( content="随着时代的不断发展,我们可以发现...", platform="zhihu" ) # AI敏感平台应该能检测到某些模式 assert isinstance(patterns, list) def test_get_optimization_tips(self): """测试获取优化建议""" validator = RuleValidator() tips = validator.get_optimization_tips("zhihu") assert isinstance(tips, list) def test_get_optimization_tips_unsupported_platform(self): """测试不支持平台的优化建议""" validator = RuleValidator() tips = validator.get_optimization_tips("unsupported") assert tips == [] # ============================================================================ # 敏感词过滤测试 # ============================================================================ class TestFoundWord: """敏感词数据结构测试""" def test_found_word_creation(self): """测试创建敏感词""" word = FoundWord( word="测试", category="test", position=10, replacement="**" ) assert word.word == "测试" assert word.category == "test" assert word.position == 10 assert word.replacement == "**" class TestFilterResult: """过滤结果数据结构测试""" def test_filter_result_creation(self): """测试创建过滤结果""" result = FilterResult( filtered_content="这是**内容", found_words=[], replacements={"测试": "**"} ) assert result.filtered_content == "这是**内容" assert len(result.replacements) == 1 class TestSensitiveFilter: """敏感词过滤器测试""" def test_filter_politics_words(self): """测试政治敏感词过滤""" filter_service = SensitiveFilter() result = filter_service.filter( content="这是关于台湾的消息", platform="zhihu" ) assert "**" in result.filtered_content assert len(result.found_words) > 0 assert result.found_words[0].category == "politics" def test_filter_medical_words(self): """测试医疗敏感词过滤""" filter_service = SensitiveFilter() result = filter_service.filter( content="这种药品可以治愈疾病", platform="zhihu" ) assert "**" in result.filtered_content assert len(result.found_words) > 0 def test_filter_no_sensitive_words(self): """测试无敏感词情况""" filter_service = SensitiveFilter() result = filter_service.filter( content="这是一篇正常的文章", platform="zhihu" ) assert result.filtered_content == "这是一篇正常的文章" assert len(result.found_words) == 0 assert len(result.replacements) == 0 def test_filter_multiple_words(self): """测试多个敏感词""" filter_service = SensitiveFilter() result = filter_service.filter( content="台湾和西藏都是中国的一部分", platform="zhihu" ) # 多个词都应该被替换 assert "**" in result.filtered_content assert len(result.found_words) >= 2 def test_filter_replacement_preserves_length(self): """测试替换保持长度一致""" filter_service = SensitiveFilter() original = "台湾" result = filter_service.filter( content=original, platform="zhihu" ) # 替换后长度应该一致 assert len(result.filtered_content) == len(original) def test_add_custom_words(self): """测试添加自定义敏感词""" filter_service = SensitiveFilter() filter_service.add_custom_words("custom", ["自定义词1", "自定义词2"]) assert "custom" in filter_service.custom_words assert len(filter_service.custom_words["custom"]) == 2 def test_filter_custom_words(self): """测试自定义敏感词过滤""" filter_service = SensitiveFilter() filter_service.add_custom_words("custom", ["品牌名"]) result = filter_service.filter( content="这是一个品牌名的产品", platform="zhihu" ) assert "**" in result.filtered_content def test_filter_empty_content(self): """测试空内容过滤""" filter_service = SensitiveFilter() result = filter_service.filter( content="", platform="zhihu" ) assert result.filtered_content == "" assert len(result.found_words) == 0 class TestSensitiveWordsData: """敏感词库数据测试""" def test_sensitive_words_exist(self): """测试敏感词库存在""" from app.services.content.sensitive_filter import SENSITIVE_WORDS assert "politics" in SENSITIVE_WORDS assert "medical" in SENSITIVE_WORDS assert "finance" in SENSITIVE_WORDS assert "adult" in SENSITIVE_WORDS def test_sensitive_words_not_empty(self): """测试敏感词库非空""" from app.services.content.sensitive_filter import SENSITIVE_WORDS for category, words in SENSITIVE_WORDS.items(): assert len(words) > 0, f"Category {category} is empty" # ============================================================================ # 校验和过滤集成测试 # ============================================================================ class TestContentValidationIntegration: """内容校验和过滤集成测试""" def test_full_validation_workflow(self): """测试完整校验工作流""" validator = RuleValidator() filter_service = SensitiveFilter() # 原始内容 original_content = "这是一篇关于台湾的文章,内容涉及敏感政治话题。" # 先过滤敏感词 filtered = filter_service.filter(original_content, platform="zhihu") # 再校验内容 result = validator.validate( content=filtered.filtered_content, title="测试标题", platform="zhihu" ) # 敏感词应该被替换 assert "**" in filtered.filtered_content # 校验结果应该是有效的(敏感词已被替换) assert isinstance(result.is_valid, bool) assert isinstance(result.score, int) def test_platform_specific_filtering(self): """测试平台特定的过滤""" filter_service = SensitiveFilter() # 不同平台可能有不同的敏感词配置 result_zhihu = filter_service.filter("测试内容", platform="zhihu") result_wechat = filter_service.filter("测试内容", platform="wechat") # 两者都应该返回结果 assert isinstance(result_zhihu, FilterResult) assert isinstance(result_wechat, FilterResult)