"""平台规则引擎 - 各内容平台的规则和最佳实践""" import re from typing import Optional # AI写作典型特征模式 AI_PATTERNS = { "banned_transitions": [ "总之", "综上所述", "值得注意的是", "让我们", "总而言之", "不可否认", "毋庸置疑", "首先", "其次", "最后", "最后但同样重要", "换句话说", "也就是说", "更重要的是", "可以说", ], "banned_modifiers": [ "至关重要", "不可或缺", "举足轻重", "蓬勃发展", "日新月异", "深远影响", "全面提升", "显著成效", "重大突破", "核心要素", ], "banned_structures": [ r"第一[,、].*第二[,、].*第三", # 对称三段式 r"一方面[,、].*另一方面", # 一方面...另一方面 ], "safe_patterns": [ "根据研究表明", "调研数据显示", "经验告诉我们", "事实上", "说白了", "说实话", "说真的", ], } # 敏感词分类 SENSITIVE_CATEGORIES = { "politics": ["政治敏感词库"], "medical": ["医疗敏感词库"], "finance": ["金融敏感词库"], "adult": ["低俗敏感词库"], } PLATFORM_RULES: dict[str, dict] = { # ============================================================ # P0 优先级平台 # ============================================================ "zhihu": { "name": "知乎", "platform_type": "内容社区", "priority": "P0", "enabled": True, "content_style": "专业/深度/逻辑严谨", "content_length": {"min": 500, "max": 50000, "recommended": 2000}, "structure_preference": { "has_intro": True, "has_conclusion": True, "has_toc": True, }, "title_rules": { "min_length": 10, "max_length": 30, "avoid_patterns": ["emoji", "标题党", "过度营销"], "required_patterns": ["关键词"], "case_style": "normal", }, "tag_rules": { "min_tags": 3, "max_tags": 10, "tag_style": "plain", }, "ai_sensitivity": { "detection_level": "high", "banned_patterns": AI_PATTERNS["banned_transitions"] + AI_PATTERNS["banned_modifiers"], "banned_structures": AI_PATTERNS["banned_structures"], "safe_patterns": AI_PATTERNS["safe_patterns"], "humanization_required": True, }, "sensitive_words": { "check_required": True, "categories": ["politics", "medical", "finance", "adult"], "max_tolerance": 0, "auto_filter": True, }, "seo_rules": { "keyword_density": {"min": 1, "max": 3, "recommended": 2}, "keyword_position": ["title", "first_para", "headings"], "internal_links": {"min": 0, "max": 3}, }, "geo_rules": { "citation_format": "academic", "source_attribution": True, "reference_style": "academic", }, "html_rules": { "supported_tags": ["p", "h1", "h2", "h3", "h4", "ul", "ol", "blockquote", "code", "pre"], "banned_tags": ["script", "iframe", "form", "style"], "image_support": True, "video_support": True, "code_block_support": True, }, "publish_rules": { "auto_publish": False, "require_review": False, "publish_timing": "immediate", }, "best_publish_times": ["09:00-10:00", "14:00-15:00", "20:00-22:00"], "best_publish_days": ["周一", "周三", "周五"], "max_images": 50, "platform_rules": [ "回答需针对问题本身", "不得过度营销或硬广", "引用需标注来源", "不得使用AI水文(内容需有信息增量)", "专业背书内容需有据可查", ], "seo_tips": [ "回答开头直接给出结论", "使用数据和案例支撑", "合理设置二级标题", "文末引导关注专栏", ], }, "wechat": { "name": "微信公众号", "platform_type": "内容平台", "priority": "P0", "enabled": True, "content_style": "深度/品牌调性", "content_length": {"min": 300, "max": 20000, "recommended": 1500}, "structure_preference": { "has_intro": True, "has_conclusion": True, "has_toc": False, }, "title_rules": { "min_length": 5, "max_length": 22, "avoid_patterns": ["emoji", "连续特殊符号", "标题党"], "required_patterns": [], "case_style": "normal", }, "tag_rules": { "min_tags": 1, "max_tags": 3, "tag_style": "plain", }, "ai_sensitivity": { "detection_level": "medium", "banned_patterns": [ "首先", "其次", "最后", "总的来说", "综上所述", "想必大家都有所了解", "相信大家都不陌生", ], "banned_structures": AI_PATTERNS["banned_structures"], "safe_patterns": ["根据后台数据", "从运营角度来说", "结合我们的经验"], "humanization_required": True, }, "sensitive_words": { "check_required": True, "categories": ["politics", "medical", "finance", "adult"], "max_tolerance": 0, "auto_filter": True, }, "seo_rules": { "keyword_density": {"min": 0.5, "max": 2, "recommended": 1}, "keyword_position": ["title", "first_para"], "internal_links": {"min": 0, "max": 5}, }, "geo_rules": { "citation_format": "plain", "source_attribution": True, "reference_style": "informal", }, "html_rules": { "supported_tags": ["p", "h1", "h2", "h3", "section", "blockquote", "img"], "banned_tags": ["script", "iframe", "form", "a"], "image_support": True, "video_support": True, "code_block_support": False, }, "publish_rules": { "auto_publish": False, "require_review": True, "publish_timing": "scheduled", }, "best_publish_times": ["07:30-08:30", "11:30-12:30", "17:30-18:30", "20:30-22:00"], "best_publish_days": ["周二", "周四", "周六"], "max_images": 20, "platform_rules": [ "不得使用诱导分享/关注语句", "图片单张不超过10MB", "标题不含连续特殊符号(如!!!)", "正文不含外部链接(仅支持公众号链接和小程序)", "不得包含未经授权的商标/品牌名称", ], "seo_tips": [ "首段包含核心关键词", "使用小标题分段(适配搜一搜)", "文末设置话题标签", "摘要控制在120字内", ], }, "baijiahao": { "name": "百家号", "platform_type": "内容平台", "priority": "P0", "enabled": True, "content_style": "资讯/SEO友好", "content_length": {"min": 800, "max": 30000, "recommended": 1500}, "structure_preference": { "has_intro": True, "has_conclusion": True, "has_toc": False, }, "title_rules": { "min_length": 10, "max_length": 40, "avoid_patterns": ["emoji", "标题党", "夸张词汇"], "required_patterns": ["核心关键词"], "case_style": "normal", }, "tag_rules": { "min_tags": 3, "max_tags": 5, "tag_style": "plain", }, "ai_sensitivity": { "detection_level": "high", "banned_patterns": AI_PATTERNS["banned_transitions"] + AI_PATTERNS["banned_modifiers"], "banned_structures": AI_PATTERNS["banned_structures"], "safe_patterns": ["据悉", "从某处获悉", "数据显示"], "humanization_required": True, }, "sensitive_words": { "check_required": True, "categories": ["politics", "medical", "finance", "adult"], "max_tolerance": 0, "auto_filter": True, }, "seo_rules": { "keyword_density": {"min": 1.5, "max": 4, "recommended": 2.5}, "keyword_position": ["title", "first_para", "headings"], "internal_links": {"min": 1, "max": 5}, }, "geo_rules": { "citation_format": "link", "source_attribution": True, "reference_style": "informal", }, "html_rules": { "supported_tags": ["p", "h1", "h2", "h3", "ul", "ol", "blockquote", "img"], "banned_tags": ["script", "iframe", "form", "video"], "image_support": True, "video_support": False, "code_block_support": False, }, "publish_rules": { "auto_publish": True, "require_review": False, "publish_timing": "immediate", }, "best_publish_times": ["08:00-09:00", "11:00-12:00", "17:00-18:00"], "best_publish_days": ["工作日"], "max_images": 30, "platform_rules": [ "原创内容优先推荐", "标题不含夸张/标题党词汇", "正文需含至少1张配图", "不得搬运/洗稿", ], "seo_tips": [ "标题包含百度搜索热词", "文章结构化(H2小标题)", "适当添加内链", "发布后及时答复评论", ], }, # ============================================================ # P1 优先级平台 # ============================================================ "toutiao": { "name": "今日头条", "platform_type": "内容平台", "priority": "P1", "enabled": True, "content_style": "资讯/简洁明了", "content_length": {"min": 500, "max": 30000, "recommended": 1500}, "structure_preference": { "has_intro": True, "has_conclusion": False, "has_toc": False, }, "title_rules": { "min_length": 10, "max_length": 30, "avoid_patterns": ["emoji", "标题党", "夸张"], "required_patterns": ["悬念元素"], "case_style": "normal", }, "tag_rules": { "min_tags": 1, "max_tags": 5, "tag_style": "hashtag", }, "ai_sensitivity": { "detection_level": "high", "banned_patterns": AI_PATTERNS["banned_transitions"] + AI_PATTERNS["banned_modifiers"], "banned_structures": AI_PATTERNS["banned_structures"], "safe_patterns": ["据悉", "报道称", "记者了解到"], "humanization_required": True, }, "sensitive_words": { "check_required": True, "categories": ["politics", "medical", "finance", "adult"], "max_tolerance": 0, "auto_filter": True, }, "seo_rules": { "keyword_density": {"min": 1, "max": 3, "recommended": 2}, "keyword_position": ["title", "first_para"], "internal_links": {"min": 0, "max": 3}, }, "geo_rules": { "citation_format": "link", "source_attribution": True, "reference_style": "informal", }, "html_rules": { "supported_tags": ["p", "h1", "h2", "h3", "ul", "ol", "blockquote", "img"], "banned_tags": ["script", "iframe", "form", "video"], "image_support": True, "video_support": True, "code_block_support": False, }, "publish_rules": { "auto_publish": True, "require_review": False, "publish_timing": "immediate", }, "best_publish_times": ["07:00-08:00", "12:00-13:00", "18:00-19:00", "21:00-22:00"], "best_publish_days": ["每天"], "max_images": 30, "platform_rules": [ "标题不得标题党", "内容需有信息价值", "首发原创优先推荐", "配图清晰不模糊", ], "seo_tips": [ "标题含核心关键词", "文章1500字以上推荐更高", "合理使用头条话题", "发布频率稳定提升权重", ], }, "weibo": { "name": "微博", "platform_type": "社交媒体", "priority": "P1", "enabled": True, "content_style": "轻松/即时/互动性强", "content_length": {"min": 50, "max": 2000, "recommended": 280}, "structure_preference": { "has_intro": False, "has_conclusion": False, "has_toc": False, }, "title_rules": { "min_length": 5, "max_length": 50, "avoid_patterns": ["长链接"], "required_patterns": [], "case_style": "normal", }, "tag_rules": { "min_tags": 1, "max_tags": 10, "tag_style": "hashtag", }, "ai_sensitivity": { "detection_level": "low", "banned_patterns": ["首先", "其次", "最后", "总的来说"], "banned_structures": [], "safe_patterns": ["我觉得", "说实话", "求证中", "据说"], "humanization_required": False, }, "sensitive_words": { "check_required": True, "categories": ["politics", "adult"], "max_tolerance": 2, "auto_filter": True, }, "seo_rules": { "keyword_density": {"min": 0.5, "max": 2, "recommended": 1}, "keyword_position": ["title", "first_para"], "internal_links": {"min": 0, "max": 2}, }, "geo_rules": { "citation_format": "link", "source_attribution": False, "reference_style": "informal", }, "html_rules": { "supported_tags": ["p", "br"], "banned_tags": ["script", "iframe", "form"], "image_support": True, "video_support": True, "code_block_support": False, }, "publish_rules": { "auto_publish": True, "require_review": False, "publish_timing": "immediate", }, "best_publish_times": ["07:00-09:00", "12:00-13:00", "18:00-20:00", "21:00-23:00"], "best_publish_days": ["每天"], "max_images": 9, "platform_rules": [ "不得发布虚假信息", "不得过度营销", "话题标签有助于曝光", "配图有助于转发", ], "seo_tips": [ "热门话题可增加曝光", "短句更易阅读", "互动有助于上热门", ], }, "xiaohongshu": { "name": "小红书", "platform_type": "种草平台", "priority": "P1", "enabled": True, "content_style": "种草/亲身体验/生活化", "content_length": {"min": 100, "max": 1000, "recommended": 500}, "structure_preference": { "has_intro": True, "has_conclusion": True, "has_toc": False, }, "title_rules": { "min_length": 5, "max_length": 20, "avoid_patterns": ["标题党"], "required_patterns": ["emoji可用"], "case_style": "normal", }, "tag_rules": { "min_tags": 3, "max_tags": 10, "tag_style": "hashtag", }, "ai_sensitivity": { "detection_level": "low", "banned_patterns": ["首先", "其次", "最后", "综上所述"], "banned_structures": [], "safe_patterns": ["我用过", "亲测", "真实体验", "分享一下"], "humanization_required": False, }, "sensitive_words": { "check_required": True, "categories": ["adult"], "max_tolerance": 0, "auto_filter": True, }, "seo_rules": { "keyword_density": {"min": 0.5, "max": 2, "recommended": 1}, "keyword_position": ["title", "first_para"], "internal_links": {"min": 0, "max": 0}, }, "geo_rules": { "citation_format": "plain", "source_attribution": False, "reference_style": "informal", }, "html_rules": { "supported_tags": ["p", "br"], "banned_tags": ["script", "iframe", "form", "h1", "h2", "h3"], "image_support": True, "video_support": True, "code_block_support": False, }, "publish_rules": { "auto_publish": True, "require_review": False, "publish_timing": "immediate", }, "best_publish_times": ["07:00-09:00", "12:00-13:00", "18:00-20:00", "21:00-23:00"], "best_publish_days": ["周末", "周三"], "max_images": 18, "platform_rules": [ "首图质量决定点击率", "正文控制在300-800字", "话题标签3-10个", "不得出现其他平台引流信息", "图片不含水印", ], "seo_tips": [ "标题含数字更吸引点击", "正文用短句+emoji分段", "话题标签放文末", "首句即核心观点", ], }, # ============================================================ # P2 优先级平台 # ============================================================ "bilibili": { "name": "B站", "platform_type": "视频/图文", "priority": "P2", "enabled": True, "content_style": "年轻化/专业/趣味性", "content_length": {"min": 200, "max": 5000, "recommended": 1000}, "structure_preference": { "has_intro": True, "has_conclusion": True, "has_toc": True, }, "title_rules": { "min_length": 5, "max_length": 80, "avoid_patterns": ["标题党", "过度夸张"], "required_patterns": [], "case_style": "normal", }, "tag_rules": { "min_tags": 1, "max_tags": 10, "tag_style": "hashtag", }, "ai_sensitivity": { "detection_level": "medium", "banned_patterns": ["首先", "其次", "最后", "总的来说", "综上所述"], "banned_structures": [], "safe_patterns": ["兄弟们", "家人们", "懂的都懂", "感谢一键三连"], "humanization_required": True, }, "sensitive_words": { "check_required": True, "categories": ["politics", "adult"], "max_tolerance": 0, "auto_filter": True, }, "seo_rules": { "keyword_density": {"min": 0.5, "max": 2, "recommended": 1}, "keyword_position": ["title"], "internal_links": {"min": 0, "max": 0}, }, "geo_rules": { "citation_format": "plain", "source_attribution": True, "reference_style": "informal", }, "html_rules": { "supported_tags": ["p", "h1", "h2", "h3", "ul", "ol", "blockquote", "code", "pre"], "banned_tags": ["script", "iframe", "form"], "image_support": True, "video_support": True, "code_block_support": True, }, "publish_rules": { "auto_publish": True, "require_review": False, "publish_timing": "immediate", }, "best_publish_times": ["16:00-18:00", "20:00-22:00"], "best_publish_days": ["周五", "周六", "周日"], "max_images": 50, "platform_rules": [ "稿件需要过审", "不得搬运他人内容", "封面和标题很重要", "互动有助于推荐", ], "seo_tips": [ "标题包含关键词", "封面吸引人", "标签有助于分类", ], }, "jianshu": { "name": "简书", "platform_type": "内容平台", "priority": "P2", "enabled": True, "content_style": "文艺/真实/个人表达", "content_length": {"min": 500, "max": 50000, "recommended": 2000}, "structure_preference": { "has_intro": True, "has_conclusion": True, "has_toc": False, }, "title_rules": { "min_length": 5, "max_length": 50, "avoid_patterns": ["emoji", "标题党"], "required_patterns": [], "case_style": "normal", }, "tag_rules": { "min_tags": 2, "max_tags": 5, "tag_style": "plain", }, "ai_sensitivity": { "detection_level": "medium", "banned_patterns": ["首先", "其次", "最后", "总的来说"], "banned_structures": [], "safe_patterns": ["我手写我心", "分享一下", "记录一下"], "humanization_required": True, }, "sensitive_words": { "check_required": True, "categories": ["politics", "adult"], "max_tolerance": 0, "auto_filter": True, }, "seo_rules": { "keyword_density": {"min": 0.5, "max": 2, "recommended": 1}, "keyword_position": ["title", "first_para"], "internal_links": {"min": 0, "max": 3}, }, "geo_rules": { "citation_format": "plain", "source_attribution": True, "reference_style": "informal", }, "html_rules": { "supported_tags": ["p", "h1", "h2", "h3", "ul", "ol", "blockquote", "code"], "banned_tags": ["script", "iframe", "form"], "image_support": True, "video_support": False, "code_block_support": True, }, "publish_rules": { "auto_publish": True, "require_review": False, "publish_timing": "immediate", }, "best_publish_times": ["08:00-10:00", "14:00-16:00", "20:00-22:00"], "best_publish_days": ["每天"], "max_images": 30, "platform_rules": [ "鼓励原创", "文艺风格更受欢迎", "配图有助于阅读", ], "seo_tips": [ "标题包含关键词", "合理使用专题", "互动有助于曝光", ], }, "juejin": { "name": "掘金", "platform_type": "技术社区", "priority": "P2", "enabled": True, "content_style": "技术/专业/深度", "content_length": {"min": 500, "max": 50000, "recommended": 3000}, "structure_preference": { "has_intro": True, "has_conclusion": True, "has_toc": True, }, "title_rules": { "min_length": 5, "max_length": 50, "avoid_patterns": ["emoji", "标题党"], "required_patterns": ["技术关键词"], "case_style": "normal", }, "tag_rules": { "min_tags": 3, "max_tags": 5, "tag_style": "hashtag", }, "ai_sensitivity": { "detection_level": "high", "banned_patterns": AI_PATTERNS["banned_transitions"] + AI_PATTERNS["banned_modifiers"], "banned_structures": AI_PATTERNS["banned_structures"], "safe_patterns": ["项目中实际用到", "经过调研发现", "踩坑记录"], "humanization_required": True, }, "sensitive_words": { "check_required": True, "categories": ["politics"], "max_tolerance": 0, "auto_filter": True, }, "seo_rules": { "keyword_density": {"min": 1, "max": 3, "recommended": 2}, "keyword_position": ["title", "first_para", "headings"], "internal_links": {"min": 0, "max": 3}, }, "geo_rules": { "citation_format": "link", "source_attribution": True, "reference_style": "academic", }, "html_rules": { "supported_tags": ["p", "h1", "h2", "h3", "h4", "ul", "ol", "blockquote", "code", "pre"], "banned_tags": ["script", "iframe", "form"], "image_support": True, "video_support": False, "code_block_support": True, }, "publish_rules": { "auto_publish": False, "require_review": True, "publish_timing": "immediate", }, "best_publish_times": ["09:00-11:00", "14:00-16:00", "20:00-22:00"], "best_publish_days": ["周二", "周四", "周六"], "max_images": 30, "platform_rules": [ "技术内容优先", "代码示例有助于理解", "鼓励原创技术文章", "禁止低质量搬运", ], "seo_tips": [ "标题包含技术关键词", "代码块有助于阅读", "标签精准有助于推荐", ], }, "douyin": { "name": "抖音", "platform_type": "短视频平台", "priority": "P1", "enabled": True, "content_style": "短平快/视觉冲击", "content_length": {"min": 0, "max": 5000, "recommended": 0}, "structure_preference": { "has_intro": True, "has_conclusion": False, "has_toc": False, }, "title_rules": { "min_length": 5, "max_length": 55, "avoid_patterns": ["标题党"], "required_patterns": ["悬念"], "case_style": "normal", }, "tag_rules": { "min_tags": 2, "max_tags": 5, "tag_style": "hashtag", }, "ai_sensitivity": { "detection_level": "low", "banned_patterns": [], "banned_structures": [], "safe_patterns": [], "humanization_required": False, }, "sensitive_words": { "check_required": True, "categories": ["politics", "adult"], "max_tolerance": 0, "auto_filter": True, }, "seo_rules": { "keyword_density": {"min": 0.5, "max": 2, "recommended": 1}, "keyword_position": ["title"], "internal_links": {"min": 0, "max": 0}, }, "geo_rules": { "citation_format": "plain", "source_attribution": False, "reference_style": "informal", }, "html_rules": { "supported_tags": [], "banned_tags": [], "image_support": False, "video_support": False, "code_block_support": False, }, "publish_rules": { "auto_publish": True, "require_review": False, "publish_timing": "immediate", }, "best_publish_times": ["06:00-08:00", "12:00-13:00", "18:00-20:00", "22:00-23:00"], "best_publish_days": ["每天"], "max_images": 35, "platform_rules": [ "视频/图文需原创", "不得含其他平台水印", "话题标签2-5个", "文案简短有吸引力", ], "seo_tips": [ "前3秒决定完播率", "标题含热点关键词", "评论区互动提升权重", "合适的话题+POI定位", ], }, } # 诱导分享/关注关键词 _INDUCING_PATTERNS = re.compile( r"(转发|分享|关注|点赞|收藏).{0,4}(领|获|得|拿|解锁|免费)", re.IGNORECASE, ) # 连续特殊符号 _CONSECUTIVE_SYMBOLS = re.compile(r"[!!??]{3,}") # 外部链接(排除公众号和小程序链接) _EXTERNAL_LINK = re.compile( r"https?://(?!mp\.weixin\.qq\.com|wx\.qq\.com|weixin://)[^\s<>))]+", re.IGNORECASE, ) # 标题党关键词 _CLICKBAIT_WORDS = {"震惊", "惊呆", "吓死", "笑死", "疯传", "刷屏", "出大事", "不敢相信"} # 水印检测(简化:检测常见平台水印文本) _WATERMARK_PATTERNS = re.compile( r"(抖音|快手|小红书|微博|B站|bilibili).*(水印|logo)", re.IGNORECASE, ) class PlatformRuleEngine: """平台规则引擎""" def get_platforms(self, enabled_only: bool = True) -> list[dict]: """获取所有支持平台列表 Args: enabled_only: 是否只返回启用的平台 """ platforms = [] for key, val in PLATFORM_RULES.items(): if enabled_only and not val.get("enabled", True): continue platforms.append({ "id": key, "name": val["name"], "platform_type": val.get("platform_type", ""), "priority": val.get("priority", "P2"), "enabled": val.get("enabled", True), "max_title_length": val["title_rules"]["max_length"], "min_title_length": val["title_rules"]["min_length"], "max_content_length": val["content_length"]["max"], "min_content_length": val["content_length"]["min"], "recommended_content_length": val["content_length"]["recommended"], "supported_media": val.get("supported_tags", []), "ai_sensitivity": val.get("ai_sensitivity", {}), "best_publish_times": val.get("best_publish_times", []), "best_publish_days": val.get("best_publish_days", []), }) return platforms def get_platform_rules(self, platform: str) -> dict | None: """获取指定平台的完整规则""" return PLATFORM_RULES.get(platform) def get_platform_rule(self, platform: str, rule_category: str) -> dict | None: """获取指定平台特定类别的规则 Args: platform: 平台标识 rule_category: 规则类别 (title_rules, tag_rules, ai_sensitivity, etc.) """ rules = PLATFORM_RULES.get(platform) if rules is None: return None return rules.get(rule_category) def get_all_rule_categories(self) -> list[str]: """获取所有规则类别""" return [ "content_length", "title_rules", "tag_rules", "ai_sensitivity", "sensitive_words", "seo_rules", "geo_rules", "html_rules", "publish_rules", ] def validate_content(self, content: str, title: str, platform: str) -> dict: """ 校验内容是否符合平台规则 返回: { "is_valid": bool, "score": int (0-100), "issues": [{"severity": "high|medium|low", "message": "...", "category": "..."}], "passed": ["规则1", "规则2"] } """ rules = PLATFORM_RULES.get(platform) if rules is None: return { "is_valid": False, "score": 0, "issues": [{"severity": "high", "message": f"不支持的平台: {platform}", "category": "platform"}], "passed": [], } issues: list[dict] = [] passed: list[str] = [] # --- 标题长度 --- title_len = len(title) title_rules = rules.get("title_rules", {}) max_title = title_rules.get("max_length", 30) min_title = title_rules.get("min_length", 5) if title_len > max_title: issues.append({ "severity": "high", "message": f"标题长度 {title_len} 超过限制 {max_title}", "category": "title_length", }) elif title_len < min_title: issues.append({ "severity": "medium", "message": f"标题长度 {title_len} 低于最低要求 {min_title}", "category": "title_length", }) else: passed.append(f"标题长度合规({title_len}/{max_title})") # --- 内容长度 --- content_len = len(content) content_rules = rules.get("content_length", {}) max_content = content_rules.get("max", 20000) min_content = content_rules.get("min", 0) recommended_content = content_rules.get("recommended", 0) if content_len > max_content: issues.append({ "severity": "high", "message": f"内容长度 {content_len} 超过限制 {max_content}", "category": "content_length", }) elif min_content > 0 and content_len < min_content: issues.append({ "severity": "medium", "message": f"内容长度 {content_len} 低于建议最低 {min_content}", "category": "content_length", }) else: passed.append(f"内容长度合规({content_len}/{max_content})") # --- 标签规则 --- tag_rules = rules.get("tag_rules", {}) min_tags = tag_rules.get("min_tags", 0) max_tags = tag_rules.get("max_tags", 10) # TODO: 需要传入标签数据进行验证 # --- AI敏感度检测 --- ai_sensitivity = rules.get("ai_sensitivity", {}) if ai_sensitivity.get("humanization_required", False): ai_issues = self._check_ai_patterns(content, ai_sensitivity) issues.extend(ai_issues) # --- 平台特有规则 --- platform_specific = self._validate_platform_specific(content, title, platform) issues.extend(platform_specific["issues"]) passed.extend(platform_specific["passed"]) # --- 计算分数 --- total_checks = len(issues) + len(passed) if total_checks == 0: score = 100 else: # high=扣15分, medium=扣8分, low=扣3分 penalty = sum( 15 if i["severity"] == "high" else 8 if i["severity"] == "medium" else 3 for i in issues ) score = max(0, 100 - penalty) return { "is_valid": len([i for i in issues if i["severity"] == "high"]) == 0, "score": score, "issues": issues, "passed": passed, } def _check_ai_patterns(self, content: str, ai_sensitivity: dict) -> list[dict]: """检测AI写作模式""" issues: list[dict] = [] banned_patterns = ai_sensitivity.get("banned_patterns", []) banned_structures = ai_sensitivity.get("banned_structures", []) # 检测禁用词汇 found_banned = [p for p in banned_patterns if p in content] if found_banned: issues.append({ "severity": "medium", "message": f"发现AI写作特征词: {', '.join(found_banned[:3])}", "category": "ai_pattern", }) # 检测禁用结构 for pattern in banned_structures: if re.search(pattern, content): issues.append({ "severity": "medium", "message": "发现AI典型对称结构", "category": "ai_pattern", }) break return issues def _validate_platform_specific( self, content: str, title: str, platform: str ) -> dict: """平台特定规则校验""" issues: list[dict] = [] passed: list[str] = [] if platform == "wechat": # 诱导分享/关注 if _INDUCING_PATTERNS.search(title) or _INDUCING_PATTERNS.search(content): issues.append({ "severity": "high", "message": "包含诱导分享/关注语句", "category": "platform_rule", }) else: passed.append("无诱导分享/关注语句") # 连续特殊符号 if _CONSECUTIVE_SYMBOLS.search(title): issues.append({ "severity": "medium", "message": "标题包含连续特殊符号", "category": "title_format", }) else: passed.append("标题无连续特殊符号") # 外部链接 if _EXTERNAL_LINK.search(content): issues.append({ "severity": "high", "message": "正文包含外部链接(仅支持公众号链接和小程序)", "category": "platform_rule", }) else: passed.append("无外部链接") elif platform == "zhihu": # 营销内容检测(简化) marketing_words = {"购买", "下单", "优惠价", "限时折扣", "点击购买"} found_marketing = marketing_words & set(content) if found_marketing: issues.append({ "severity": "medium", "message": f"疑似营销用语: {', '.join(found_marketing)}", "category": "platform_rule", }) else: passed.append("未检测到过度营销用语") elif platform == "xiaohongshu": # 字数建议 content_len = len(content) if content_len > 800: issues.append({ "severity": "medium", "message": f"正文建议300-800字,当前 {content_len} 字", "category": "content_length", }) elif content_len < 300: issues.append({ "severity": "low", "message": f"正文建议300-800字,当前仅 {content_len} 字", "category": "content_length", }) else: passed.append(f"正文字数适宜({content_len}字)") # 其他平台引流 cross_platform_keywords = ["微信", "公众号", "抖音号", "微博"] found_cross = [p for p in cross_platform_keywords if p in content] if found_cross: issues.append({ "severity": "high", "message": f"疑似其他平台引流: {', '.join(found_cross)}", "category": "platform_rule", }) else: passed.append("未检测到其他平台引流信息") elif platform in ("baijiahao", "toutiao"): # 标题党检测 found_clickbait = _CLICKBAIT_WORDS & set(title) if found_clickbait: issues.append({ "severity": "high", "message": f"标题含标题党词汇: {', '.join(found_clickbait)}", "category": "title_content", }) else: passed.append("标题无标题党词汇") elif platform == "douyin": # 水印检测 if _WATERMARK_PATTERNS.search(content): issues.append({ "severity": "high", "message": "内容包含其他平台水印信息", "category": "platform_rule", }) else: passed.append("未检测到其他平台水印") return {"issues": issues, "passed": passed} def get_optimization_tips(self, platform: str) -> list[str]: """获取平台优化建议""" rules = PLATFORM_RULES.get(platform) if rules is None: return [] return rules.get("seo_tips", []) def get_ai_humanization_config(self, platform: str) -> dict | None: """获取平台去AI化配置""" rules = PLATFORM_RULES.get(platform) if rules is None: return None return rules.get("ai_sensitivity", {}) def get_sensitive_words_config(self, platform: str) -> dict | None: """获取平台敏感词配置""" rules = PLATFORM_RULES.get(platform) if rules is None: return None return rules.get("sensitive_words", {}) def get_seo_config(self, platform: str) -> dict | None: """获取平台SEO配置""" rules = PLATFORM_RULES.get(platform) if rules is None: return None return rules.get("seo_rules", {}) def get_html_config(self, platform: str) -> dict | None: """获取平台HTML规则配置""" rules = PLATFORM_RULES.get(platform) if rules is None: return None return rules.get("html_rules", {}) # 导出单例 rule_engine = PlatformRuleEngine()