"""内容规则校验服务""" import re from dataclasses import dataclass from typing import Optional from app.services.distribution.platform_rules import PLATFORM_RULES @dataclass class ValidationIssue: """校验问题""" severity: str # high, medium, low message: str category: str @dataclass class ValidationResult: """校验结果""" is_valid: bool score: int issues: list # list of ValidationIssue passed: list # list of str @dataclass class AI_Pattern: """AI写作特征""" pattern: str type: str # banned_word, banned_structure severity: str # medium, high class RuleValidator: """内容规则校验器""" def validate(self, content: str, title: str, platform: str) -> ValidationResult: """ 校验内容是否符合平台规则 Args: content: 内容正文 title: 标题 platform: 平台标识 Returns: ValidationResult: 校验结果 """ rules = PLATFORM_RULES.get(platform) if not rules: raise ValueError(f"不支持的平台: {platform}") issues: list[ValidationIssue] = [] passed: list[str] = [] # 标题长度校验 title_len = len(title) title_rules = rules.get("title_rules", {}) max_title = title_rules.get("max_length", 30) min_title = title_rules.get("min_length", 5) if title_len > max_title: issues.append(ValidationIssue( "high", f"标题长度 {title_len} 超过限制 {max_title}", "title_length" )) elif title_len < min_title: issues.append(ValidationIssue( "medium", f"标题长度 {title_len} 低于最低要求 {min_title}", "title_length" )) else: passed.append(f"标题长度合规({title_len}/{max_title})") # 内容长度校验 content_len = len(content) content_rules = rules.get("content_length", {}) max_content = content_rules.get("max", 20000) min_content = content_rules.get("min", 0) if content_len > max_content: issues.append(ValidationIssue( "high", f"内容长度 {content_len} 超过限制 {max_content}", "content_length" )) elif min_content > 0 and content_len < min_content: issues.append(ValidationIssue( "medium", f"内容长度 {content_len} 低于建议最低 {min_content}", "content_length" )) else: passed.append(f"内容长度合规({content_len}/{max_content})") # AI模式检测 ai_sensitivity = rules.get("ai_sensitivity", {}) if ai_sensitivity.get("humanization_required", False): ai_results = self.detect_ai_patterns(content, platform) for result in ai_results: issues.append(ValidationIssue( "medium", f"发现AI写作特征: {result.pattern}", "ai_pattern" )) # 平台特定规则 platform_issues, platform_passed = self._validate_platform_specific(content, title, platform) issues.extend(platform_issues) passed.extend(platform_passed) # 计算分数 penalty = sum( 15 if i.severity == "high" else 8 if i.severity == "medium" else 3 for i in issues ) score = max(0, 100 - penalty) # 判断是否有效(无high级别问题) is_valid = all(i.severity != "high" for i in issues) return ValidationResult(is_valid, score, issues, passed) def detect_ai_patterns(self, content: str, platform: str) -> list[AI_Pattern]: """ 检测AI写作模式 Args: content: 内容正文 platform: 平台标识 Returns: list[AI_Pattern]: 检测到的AI特征列表 """ rules = PLATFORM_RULES.get(platform) if not rules: return [] results: list[AI_Pattern] = [] ai_config = rules.get("ai_sensitivity", {}) banned_patterns = ai_config.get("banned_patterns", []) banned_structures = ai_config.get("banned_structures", []) # 检测禁用词汇 for pattern in banned_patterns: if pattern in content: results.append(AI_Pattern(pattern, "banned_word", "medium")) # 检测禁用结构 for structure in banned_structures: if re.search(structure, content): results.append(AI_Pattern(structure, "banned_structure", "medium")) break return results def get_optimization_tips(self, platform: str) -> list[str]: """ 获取平台优化建议 Args: platform: 平台标识 Returns: list[str]: 优化建议列表 """ rules = PLATFORM_RULES.get(platform) if not rules: return [] return rules.get("seo_tips", []) def _validate_platform_specific( self, content: str, title: str, platform: str ) -> tuple: """平台特定规则校验""" issues: list[ValidationIssue] = [] passed: list[str] = [] # 诱导分享/关注检测 inducing_patterns = re.compile( r"(转发|分享|关注|点赞|收藏).{0,4}(领|获|得|拿|解锁|免费)", re.IGNORECASE, ) # 连续特殊符号 consecutive_symbols = re.compile(r"[!!??]{3,}") # 外部链接(排除公众号和小程序链接) external_link = re.compile( r"https?://(?!mp\.weixin\.qq\.com|wx\.qq\.com|weixin://)[^\s<>))]+", re.IGNORECASE, ) # 标题党词汇 clickbait_words = {"震惊", "惊呆", "吓死", "笑死", "疯传", "刷屏", "出大事", "不敢相信"} # 水印检测 watermark_patterns = re.compile( r"(抖音|快手|小红书|微博|B站|bilibili).*(水印|logo)", re.IGNORECASE, ) if platform == "wechat": # 诱导分享/关注 if inducing_patterns.search(title) or inducing_patterns.search(content): issues.append(ValidationIssue( "high", "包含诱导分享/关注语句", "platform_rule" )) else: passed.append("无诱导分享/关注语句") # 连续特殊符号 if consecutive_symbols.search(title): issues.append(ValidationIssue( "medium", "标题包含连续特殊符号", "title_format" )) else: passed.append("标题无连续特殊符号") # 外部链接 if external_link.search(content): issues.append(ValidationIssue( "high", "正文包含外部链接(仅支持公众号链接和小程序)", "platform_rule" )) else: passed.append("无外部链接") # 营销用语检测 marketing_words = ["购买", "下单", "优惠价", "限时折扣", "点击购买"] found_marketing = [w for w in marketing_words if w in content] if found_marketing: issues.append(ValidationIssue( "medium", f"疑似营销用语: {', '.join(found_marketing)}", "platform_rule" )) else: passed.append("未检测到过度营销用语") elif platform == "zhihu": # 营销内容检测 marketing_words = ["购买", "下单", "优惠价", "限时折扣", "点击购买"] found_marketing = [w for w in marketing_words if w in content] if found_marketing: issues.append(ValidationIssue( "medium", f"疑似营销用语: {', '.join(found_marketing)}", "platform_rule" )) else: passed.append("未检测到过度营销用语") elif platform == "xiaohongshu": # 字数建议 content_len = len(content) if content_len > 800: issues.append(ValidationIssue( "medium", f"正文建议300-800字,当前 {content_len} 字", "content_length" )) elif content_len < 300: issues.append(ValidationIssue( "low", f"正文建议300-800字,当前仅 {content_len} 字", "content_length" )) else: passed.append(f"正文字数适宜({content_len}字)") # 其他平台引流 cross_platform_keywords = ["微信", "公众号", "抖音号", "微博"] found_cross = [p for p in cross_platform_keywords if p in content] if found_cross: issues.append(ValidationIssue( "high", f"疑似其他平台引流: {', '.join(found_cross)}", "platform_rule" )) else: passed.append("未检测到其他平台引流信息") elif platform in ("baijiahao", "toutiao"): # 标题党检测 found_clickbait = clickbait_words & set(title) if found_clickbait: issues.append(ValidationIssue( "high", f"标题含标题党词汇: {', '.join(found_clickbait)}", "title_content" )) else: passed.append("标题无标题党词汇") elif platform == "douyin": # 水印检测 if watermark_patterns.search(content): issues.append(ValidationIssue( "high", "内容包含其他平台水印信息", "platform_rule" )) else: passed.append("未检测到其他平台水印") return issues, passed # 导出单例 validator = RuleValidator()