geo/backend/app/services/distribution/publish_strategy.py

"""发布策略服务"""

import random
from datetime import datetime, timedelta

from app.services.distribution.platform_rules import PLATFORM_RULES


# 行业-平台优先级映射
_INDUSTRY_PRIORITY: dict[str, list[str]] = {
    "科技": ["zhihu", "wechat", "baijiahao", "toutiao"],
    "美妆": ["xiaohongshu", "douyin", "wechat"],
    "教育": ["wechat", "zhihu", "baijiahao"],
    "金融": ["wechat", "baijiahao", "toutiao", "zhihu"],
    "餐饮": ["xiaohongshu", "douyin", "wechat"],
    "通用": ["wechat", "zhihu", "xiaohongshu", "baijiahao", "toutiao", "douyin"],
}

# 平台标签风格模板
_TAG_TEMPLATES: dict[str, list[str]] = {
    "wechat": ["#{keyword}研究", "#{keyword}洞察", "#{keyword}实践"],
    "zhihu": ["#{keyword}", "#{keyword}领域", "#{keyword}深度解析"],
    "xiaohongshu": ["{keyword}", "{keyword}推荐", "{keyword}分享", "{keyword}攻略"],
    "baijiahao": ["#{keyword}#", "#{keyword}话题"],
    "douyin": ["#{keyword}", "#{keyword}热门", "#{keyword}话题"],
    "toutiao": ["#{keyword}", "#{keyword}热点", "#{keyword}关注"],
}

# 策略提示模板
_STRATEGY_TIPS: list[str] = [
    "知乎首发可获取原创保护标识",
    "各平台间隔24h发布避免被判定为搬运",
    "公众号适合深度长文，建议优先发布",
    "小红书首图决定点击率，提前准备视觉素材",
    "抖音内容建议配合短视频，纯文字效果有限",
    "百家号和头条文章同步时，标题需差异化",
    "周末流量分布与工作日不同，注意错峰",
    "多平台发布时调整发布时间，避免同一时段密集推送",
]


class PublishStrategyService:
    """发布策略服务"""

    def generate_strategy(
        self,
        content_title: str,
        platforms: list[str],
        industry: str = "通用",
    ) -> dict:
        """
        生成多平台发布策略

        返回: {
            "schedule": [
                {"platform": "zhihu", "suggested_time": "...", "reason": "..."},
                ...
            ],
            "tags": {"zhihu": ["#AI营销", ...], ...},
            "tips": ["...", ...]
        }
        """
        # 过滤有效平台
        valid_platforms = [p for p in platforms if p in PLATFORM_RULES]
        if not valid_platforms:
            return {"schedule": [], "tags": {}, "tips": ["请选择至少一个有效平台"]}

        # 按行业优先级排序
        priority_order = _INDUSTRY_PRIORITY.get(industry, _INDUSTRY_PRIORITY["通用"])
        sorted_platforms = sorted(
            valid_platforms,
            key=lambda p: priority_order.index(p) if p in priority_order else 999,
        )

        # 生成排期
        schedule = self._build_schedule(sorted_platforms)

        # 生成标签
        tags = self._build_tags(content_title, valid_platforms)

        # 选取策略提示
        tips = self._pick_tips(valid_platforms)

        return {
            "schedule": schedule,
            "tags": tags,
            "tips": tips,
        }

    def _build_schedule(self, platforms: list[str]) -> list[dict]:
        """根据平台最佳发布时间生成排期"""
        now = datetime.now()
        schedule: list[dict] = []
        offset_hours = 0

        for idx, platform_key in enumerate(platforms):
            rules = PLATFORM_RULES[platform_key]
            best_times = rules.get("best_publish_times", [])
            best_days = rules.get("best_publish_days", [])

            # 计算下次发布时间：从当前时间往后，找到最近的推荐时段
            suggested_time = self._find_next_publish_slot(
                now, offset_hours, best_times, best_days
            )

            # 构建推荐原因
            time_desc = best_times[0] if best_times else "任意时间"
            day_desc = best_days[0] if best_days else "任意日期"
            reason = f"{rules['name']}{day_desc}{time_desc}流量高峰"

            schedule.append({
                "platform": platform_key,
                "platform_name": rules["name"],
                "suggested_time": suggested_time.strftime("%Y-%m-%d %H:%M"),
                "reason": reason,
            })

            # 平台间隔至少4小时
            offset_hours += 4

        return schedule

    def _find_next_publish_slot(
        self,
        now: datetime,
        offset_hours: int,
        best_times: list[str],
        best_days: list[str],
    ) -> datetime:
        """查找下一个推荐发布时间"""
        # 基础时间 = 当前 + 偏移
        base = now + timedelta(hours=offset_hours)

        # 如果有推荐时段，尝试匹配
        if best_times:
            # 取第一个推荐时段的起始小时
            first_slot = best_times[0]
            try:
                start_str = first_slot.split("-")[0]
                target_hour, target_minute = int(start_str[:2]), int(start_str[3:5])
            except (ValueError, IndexError):
                target_hour, target_minute = 10, 0

            # 构建当天目标时间
            candidate = base.replace(hour=target_hour, minute=target_minute, second=0, microsecond=0)

            # 如果已过，推到明天
            if candidate < base:
                candidate += timedelta(days=1)

            return candidate

        # 无推荐时段，直接使用基础时间（整点化）
        return base.replace(minute=0, second=0, microsecond=0) + timedelta(hours=1)

    def _build_tags(self, title: str, platforms: list[str]) -> dict[str, list[str]]:
        """为各平台生成标签建议"""
        # 从标题提取关键词（简化：取2-4字片段）
        keywords = self._extract_keywords(title)
        tags: dict[str, list[str]] = {}

        for platform_key in platforms:
            templates = _TAG_TEMPLATES.get(platform_key, ["{keyword}"])
            platform_tags: list[str] = []

            for kw in keywords[:5]:
                for tpl in templates:
                    tag = tpl.format(keyword=kw)
                    # 去重
                    if tag not in platform_tags:
                        platform_tags.append(tag)
                    if len(platform_tags) >= 8:
                        break
                if len(platform_tags) >= 8:
                    break

            tags[platform_key] = platform_tags

        return tags

    def _extract_keywords(self, title: str) -> list[str]:
        """从标题提取关键词（简化实现）"""
        # 按标点和空格分词
        parts = [p.strip() for p in _split_title(title) if len(p.strip()) >= 2]
        if not parts:
            parts = [title[:4]] if len(title) >= 2 else [title]
        return parts[:6]

    def _pick_tips(self, platforms: list[str]) -> list[str]:
        """根据所选平台选取策略提示"""
        tips: list[str] = []

        # 通用提示
        if len(platforms) > 1:
            tips.append("多平台发布时，建议间隔4-24小时避免判定搬运")

        # 平台特定提示
        if "zhihu" in platforms:
            tips.append("知乎首发可获取原创保护标识，建议优先发布")
        if "wechat" in platforms:
            tips.append("公众号适合深度长文，摘要控制在120字内")
        if "xiaohongshu" in platforms:
            tips.append("小红书首图决定点击率，提前准备高质量视觉素材")
        if "douyin" in platforms:
            tips.append("抖音内容建议配合短视频，纯文字效果有限")
        if "baijiahao" in platforms and "toutiao" in platforms:
            tips.append("百家号和头条同步时，标题需差异化处理")

        # 补充通用提示至3-5条
        remaining = [t for t in _STRATEGY_TIPS if t not in tips]
        while len(tips) < 3 and remaining:
            picked = remaining.pop(random.randint(0, len(remaining) - 1))
            tips.append(picked)

        return tips[:5]

    def suggest_tags(self, content: str, platform: str, count: int = 5) -> list[str]:
        """为指定平台生成话题/标签建议"""
        templates = _TAG_TEMPLATES.get(platform, ["{keyword}"])
        keywords = self._extract_keywords(content[:200])

        tags: list[str] = []
        for kw in keywords:
            for tpl in templates:
                tag = tpl.format(keyword=kw)
                if tag not in tags:
                    tags.append(tag)
                if len(tags) >= count:
                    return tags

        return tags


def _split_title(title: str) -> list[str]:
    """按常见分隔符拆分标题"""
    import re
    return re.split(r"[|\-·—\s,，、：:]+", title)