fischer-agentkit/src/agentkit/chat/skill_routing.py

"""Shared skill routing logic for GUI and CLI chat.

Extracts the duplicated skill routing, @skill: prefix parsing,
and prompt assembly into a single module used by both chat routes.
"""

from __future__ import annotations

import enum
import json
import logging
import re
from dataclasses import dataclass, field
from typing import Any

from agentkit.marketplace.auction import AuctionHouse, Bid
from agentkit.telemetry.tracer import get_tracer

logger = logging.getLogger(__name__)

# Strict validation: only lowercase alphanumeric, hyphens, underscores
_SKILL_NAME_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$")


class ExecutionMode(enum.Enum):
    """How the downstream should execute this routing result.

    This is the single source of truth for execution path selection.
    The transport layer (portal.py, chat.py) should branch on this
    field instead of string-matching match_method.
    """

    DIRECT_CHAT = "direct_chat"  # Zero-cost: direct LLM call, no ReAct loop
    REACT = "react"  # Default agent ReAct loop with default tools
    SKILL_REACT = "skill_react"  # Skill-matched ReAct with skill tools + prompt
    REWOO = "rewoo"  # Plan-without-observation mode
    REFLEXION = "reflexion"  # Reflection-driven mode
    PLAN_EXEC = "plan_exec"  # Plan-then-execute mode
    TEAM_COLLAB = "team_collab"  # Expert Team collaborative mode


# Mapping from skill config execution_mode string to ExecutionMode enum
_SKILL_EXECUTION_MODE_MAP: dict[str, ExecutionMode] = {
    "direct": ExecutionMode.DIRECT_CHAT,
    "react": ExecutionMode.SKILL_REACT,
    "rewoo": ExecutionMode.REWOO,
    "reflexion": ExecutionMode.REFLEXION,
    "plan_exec": ExecutionMode.PLAN_EXEC,
    "custom": ExecutionMode.SKILL_REACT,
    "llm_generate": ExecutionMode.SKILL_REACT,
    "tool_call": ExecutionMode.SKILL_REACT,
}


def _resolve_execution_mode(skill_config: Any) -> ExecutionMode:
    """Resolve ExecutionMode from skill config's execution_mode field."""
    mode_str = getattr(skill_config, "execution_mode", "react") or "react"
    return _SKILL_EXECUTION_MODE_MAP.get(mode_str, ExecutionMode.SKILL_REACT)


def validate_skill_name(name: str) -> str:
    """Validate and normalize a skill name. Raises ValueError on invalid input."""
    normalized = name.strip().lower()
    if not _SKILL_NAME_RE.match(normalized):
        raise ValueError(f"Invalid skill name '{name}': must match [a-z0-9][a-z0-9_-]{{0,63}}")
    return normalized


@dataclass
class SkillRoutingResult:
    """Result of skill routing for a user message."""

    skill_name: str | None = None
    skill_config: Any = None
    skill_tools: list = field(default_factory=list)
    clean_content: str = ""
    system_prompt: str | None = None
    tools: list = field(default_factory=list)
    model: str = "default"
    agent_name: str | None = None
    matched: bool = False
    match_method: str | None = None
    match_confidence: float = 0.0
    transparency_level: str = "SILENT"
    execution_trace: list[dict] = field(default_factory=list)
    complexity: float = 0.0
    execution_mode: ExecutionMode = ExecutionMode.DIRECT_CHAT


def parse_skill_prefix(content: str) -> tuple[str | None, str]:
    """Parse @skill:name prefix from user message.

    Returns (skill_name_or_None, clean_content).
    """
    if not content.startswith("@skill:"):
        return None, content

    parts = content.split(" ", 1)
    skill_ref = parts[0][7:]  # strip "@skill:"
    explicit_skill = skill_ref.strip()
    clean = parts[1].strip() if len(parts) > 1 else content[7 + len(skill_ref) :].strip()
    return explicit_skill, clean


def build_skill_system_prompt(skill_config) -> str | None:
    """Build system prompt from skill config's prompt section."""
    if not skill_config or not skill_config.prompt:
        return None
    prompt_parts = []
    for key in ("identity", "context", "instructions", "constraints", "output_format"):
        val = skill_config.prompt.get(key)
        if val:
            prompt_parts.append(val)
    return "\n\n".join(prompt_parts) if prompt_parts else None


async def resolve_skill_routing(
    content: str,
    skill_registry: Any,
    intent_router: Any,
    default_tools: list,
    default_system_prompt: str | None,
    default_model: str = "default",
    default_agent_name: str = "default",
    agent_tool_registry: Any = None,
    session_id: str = "",
    force_skill: str | None = None,
) -> SkillRoutingResult:
    """Resolve skill routing for a user message.

    This is the shared entry point used by both GUI WebSocket chat and CLI chat.
    Returns a SkillRoutingResult with all execution parameters set.
    """
    result = SkillRoutingResult()

    # Parse @skill: prefix
    explicit_skill, clean_content = parse_skill_prefix(content)
    result.clean_content = clean_content

    if explicit_skill:
        logger.info(f"Session {session_id}: explicit skill reference: {explicit_skill}")

    # Try explicit skill match
    if explicit_skill and skill_registry:
        try:
            matched_skill = skill_registry.get(explicit_skill)
            result.skill_name = explicit_skill
            result.skill_config = matched_skill.config
            result.skill_tools = matched_skill.tools or []
            result.matched = True
            result.match_method = "explicit"
            result.match_confidence = 1.0
            logger.info(f"Session {session_id}: using explicit skill '{explicit_skill}'")
        except Exception as e:
            logger.warning(
                f"Session {session_id}: explicit skill '{explicit_skill}' not found: {e}"
            )
            # Reset so we don't enter skill branch with stale data
            result.skill_name = None
            result.skill_config = None

    # Try force_skill match (from semantic router high confidence)
    if not result.matched and force_skill and skill_registry:
        try:
            matched_skill = skill_registry.get(force_skill)
            result.skill_name = force_skill
            result.skill_config = matched_skill.config
            result.skill_tools = matched_skill.tools or []
            result.matched = True
            result.match_method = "semantic_force"
            result.match_confidence = 1.0
            logger.info(f"Session {session_id}: using force-matched skill '{force_skill}'")
        except Exception as e:
            logger.warning(f"Session {session_id}: force skill '{force_skill}' not found: {e}")

    # Try IntentRouter if no explicit match
    if not result.matched and skill_registry and intent_router:
        skills = skill_registry.list_skills()
        routable_skills = [s for s in skills if s.config.intent.keywords]
        if routable_skills:
            try:
                routing_result = await intent_router.route(
                    input_data={"content": clean_content},
                    skills=routable_skills,
                )
                if routing_result and routing_result.confidence >= 0.5:
                    skill_name = routing_result.matched_skill
                    try:
                        matched_skill = skill_registry.get(skill_name)
                        skill_config = matched_skill.config

                        # Check if matched skill can handle tool-calling tasks.
                        # Direct-mode agents with no tools cannot execute tasks
                        # that require tool use (shell, search, etc.).
                        # If the task content suggests tool needs, fall through
                        # to default agent which has full tool access.
                        execution_mode = getattr(skill_config, "execution_mode", "react")
                        skill_tools = matched_skill.tools or []
                        if execution_mode == "direct" and not skill_tools:
                            # Direct agent matched but has no tools — check if
                            # the task might need tools. If so, skip this match
                            # and let it fall through to default agent.
                            tool_hints = [
                                "执行",
                                "运行",
                                "命令",
                                "终端",
                                "shell",
                                "bash",
                                "搜索",
                                "查找",
                                "联网",
                                "搜索",
                                "search",
                                "安装",
                                "部署",
                                "启动",
                                "停止",
                                "重启",
                                "文件",
                                "目录",
                                "创建",
                                "删除",
                                "修改",
                                "run",
                                "execute",
                                "install",
                                "deploy",
                                "start",
                                "stop",
                                "restart",
                                "file",
                            ]
                            content_lower = clean_content.lower()
                            needs_tools = any(h in content_lower for h in tool_hints)
                            if needs_tools:
                                logger.info(
                                    f"Session {session_id}: skill '{skill_name}' is direct-mode "
                                    f"but task may need tools, falling through to default agent"
                                )
                                # Don't set result.matched, let it fall through
                            else:
                                result.skill_name = skill_name
                                result.skill_config = skill_config
                                result.skill_tools = skill_tools
                                result.matched = True
                                result.match_method = routing_result.method
                                result.match_confidence = routing_result.confidence
                        else:
                            result.skill_name = skill_name
                            result.skill_config = skill_config
                            result.skill_tools = skill_tools
                            result.matched = True
                            result.match_method = routing_result.method
                            result.match_confidence = routing_result.confidence

                        if result.matched:
                            logger.info(
                                f"Session {session_id}: routed to skill '{skill_name}' "
                                f"via {routing_result.method} (confidence={routing_result.confidence})"
                            )
                    except Exception as e:
                        logger.warning(
                            f"Session {session_id}: skill '{skill_name}' found by router but not in registry: {e}"
                        )
            except Exception as e:
                logger.warning(f"Skill routing failed for session {session_id}: {e}")

    # Determine execution parameters
    if result.matched and result.skill_config:
        skill_prompt = build_skill_system_prompt(result.skill_config)
        result.system_prompt = skill_prompt or default_system_prompt

        # Merge skill tools with agent tools, deduplicating by name
        agent_tools = agent_tool_registry.list_tools() if agent_tool_registry else default_tools
        seen_names = set()
        merged_tools = []
        for tool in result.skill_tools + agent_tools:
            if tool.name not in seen_names:
                seen_names.add(tool.name)
                merged_tools.append(tool)
        result.tools = merged_tools

        result.model = (
            result.skill_config.llm.get("model", default_model)
            if result.skill_config.llm
            else default_model
        )
        result.agent_name = result.skill_name
        # Map skill.config.execution_mode to ExecutionMode enum
        result.execution_mode = _resolve_execution_mode(result.skill_config)
    else:
        result.system_prompt = default_system_prompt
        result.tools = default_tools
        result.model = default_model
        result.agent_name = default_agent_name
        # No skill matched — if we have tools, use ReAct; otherwise direct chat
        result.execution_mode = ExecutionMode.REACT if default_tools else ExecutionMode.DIRECT_CHAT

    # Append available tools to system prompt so LLM knows what it can call
    if result.tools:
        tools_desc = _build_tools_description(result.tools)
        tool_instruction = (
            "\n\n## Tool Usage\n"
            "You have access to the following tools. When you need to use a tool, "
            "respond with a tool call in the format specified by the system.\n"
            "Never make up information or guess answers when you can use a tool to find the answer.\n"
            "Always prefer using tools over guessing.\n"
        )
        if result.system_prompt:
            result.system_prompt += f"{tool_instruction}\n## Available Tools\n{tools_desc}"
        else:
            result.system_prompt = f"{tool_instruction}\n## Available Tools\n{tools_desc}"

    return result


def _build_tools_description(tools: list) -> str:
    """Build a text description of tools for the system prompt."""
    lines = []
    for tool in tools:
        desc = getattr(tool, "description", "")
        lines.append(f"- **{tool.name}**: {desc}")
        schema = getattr(tool, "input_schema", None)
        if schema and "properties" in schema:
            params = list(schema["properties"].keys())
            if params:
                lines[-1] += f" (parameters: {', '.join(params)})"
    return "\n".join(lines)


# ---------------------------------------------------------------------------
# CostAwareRouter - 三层成本感知路由
# ---------------------------------------------------------------------------

_GREETING_RE = re.compile(
    r"^(你好|hi|hello|hey|嗨|哈喽|早上好|下午好|晚上好|good morning|good afternoon|good evening)\s*[!！.。?？]*$",
    re.IGNORECASE,
)

_CHAT_MODE_RE = re.compile(
    r"^(谢谢|感谢|thanks|thank you|ok|好的|嗯|对|是|不是|没关系|再见|bye|goodbye)\s*[!！.。?？]*$",
    re.IGNORECASE,
)

# Simple identity/meta questions — zero-cost direct chat, no skill routing needed
_IDENTITY_RE = re.compile(
    r"^(你是谁|你叫什么|你是什么|你是哪个|who are you|what are you|what's your name"
    r"|介绍一下你自己|自我介绍|你叫啥|你叫什么名字|你的名字)"
    r"\s*[?？!！.。]*$",
    re.IGNORECASE,
)

_SENTENCE_SPLIT_RE = re.compile(r"[，。！？；\n,.!?;]")


def _tokenize_content(content: str) -> list[str]:
    """Tokenize content for capability matching. Supports Chinese and English."""
    # 1. Split by punctuation and whitespace
    segments = re.split(r"[\s,，。！？、；：\n]+", content)

    # 2. For long Chinese segments, add 2-gram supplements
    tokens = []
    for seg in segments:
        if len(seg) <= 4:
            tokens.append(seg)
        else:
            tokens.append(seg)
            # Add 2-grams for Chinese compound words
            for i in range(len(seg) - 1):
                bigram = seg[i : i + 2]
                if all("\u4e00" <= c <= "\u9fff" for c in bigram):
                    tokens.append(bigram)

    # 3. Filter stopwords
    stopwords = {
        "的",
        "了",
        "是",
        "在",
        "和",
        "与",
        "也",
        "都",
        "就",
        "要",
        "会",
        "我",
        "你",
        "他",
        "这",
        "那",
        "有",
        "没",
        "不",
    }
    tokens = [t for t in tokens if t not in stopwords and len(t) > 1][:10]

    return tokens


class HeuristicClassifier:
    """零成本本地启发式分类器，替代 LLM quick_classify。

    基于消息长度、关键词密度、工具暗示等特征评估复杂度 (0.0-1.0)，
    无需任何 LLM 调用，延迟 <1ms。
    """

    # 高复杂度暗示词（需要工具或多步推理）
    # 中文关键词使用子串匹配（中文无自然词边界）
    _HIGH_COMPLEXITY_HINTS_CN = {
        "执行",
        "运行",
        "命令",
        "终端",
        "安装",
        "部署",
        "启动",
        "停止",
        "重启",
        "配置",
        "搜索",
        "查找",
        "联网",
        "文件",
        "目录",
        "创建",
        "删除",
        "修改",
        "编辑",
        "分析",
        "比较",
        "对比",
        "评估",
        "调研",
        "研究",
        "设计",
        "规划",
        "方案",
        "架构",
        "实现",
        "开发",
        "代码",
        "编程",
        "函数",
        "接口",
        "调试",
        "重构",
    }

    # 英文关键词使用词边界匹配（避免子串误匹配如 "profile" 匹配 "file"）
    _HIGH_COMPLEXITY_HINTS_EN = {
        "shell",
        "bash",
        "script",
        "search",
        "query",
        "directory",
        "execute",
        "install",
        "deploy",
        "restart",
        "modify",
        "analyze",
        "compare",
        "evaluate",
        "research",
        "design",
        "implement",
        "develop",
        "refactor",
        "debug",
        "python",
        "javascript",
        "typescript",
        "sql",
    }

    # 英文短词需要精确匹配（避免子串误匹配）
    _HIGH_COMPLEXITY_EXACT_EN = {
        "run",
        "find",
        "start",
        "stop",
        "file",
        "create",
        "delete",
        "plan",
        "build",
        "code",
        "program",
        "function",
        "class",
        "interface",
        "api",
    }

    # 中等复杂度暗示词（简单问题但需思考）
    # 注意：不包含"怎么"，因为"怎么样"是闲聊而非工具需求
    _MEDIUM_COMPLEXITY_HINTS_CN = {
        "如何",
        "怎样",
        "为什么",
        "什么原因",
        "区别",
        "推荐",
        "建议",
        "选择",
        "哪个",
    }

    _MEDIUM_COMPLEXITY_HINTS_EN = {
        "difference",
        "explain",
        "recommend",
        "suggest",
        "choose",
    }

    # 英文短词精确匹配
    _MEDIUM_COMPLEXITY_EXACT_EN = {
        "how",
        "why",
        "what",
        "which",
    }

    # 低复杂度暗示词（问候/闲聊/简单定义，不需要工具）
    _LOW_COMPLEXITY_HINTS_CN = {
        "你好",
        "嗨",
        "早上好",
        "下午好",
        "晚上好",
        "再见",
        "谢谢",
        "辛苦",
        "你是谁",
        "你叫什么",
        "你是什么",
        "自我介绍",
        "天气",
        "今天",
        "怎么样",
        "闲聊",
        "聊天",
    }

    _LOW_COMPLEXITY_HINTS_EN = {
        "hello",
        "hi",
        "hey",
        "good morning",
        "good afternoon",
        "good evening",
        "goodbye",
        "thanks",
        "who are you",
        "what are you",
        "your name",
        "introduce yourself",
        "how are you",
        "chat",
    }

    # 否定上下文模式（"不要X"中的X不计入高复杂度匹配）
    # 匹配1-4个中文字符或1个英文单词（避免匹配过长串如"分析，直接告诉我答案"）
    _NEGATION_PATTERNS = re.compile(
        r"(?:不要|无需|不用|不需要|别|don'?t|no need|without|not)\s*"
        r"([\u4e00-\u9fff]{1,4}|[a-zA-Z]+)",
        re.IGNORECASE,
    )

    # 短疑问句模式（以？或?结尾且长度<30）
    _SHORT_QUESTION_RE = re.compile(r"[？?]\s*$")

    # 预编译英文词边界正则
    _HIGH_EN_RE = re.compile(
        r"\b("
        + "|".join(re.escape(w) for w in sorted(_HIGH_COMPLEXITY_HINTS_EN, key=len, reverse=True))
        + r")\b",
        re.IGNORECASE,
    )
    _HIGH_EXACT_RE = re.compile(
        r"\b("
        + "|".join(re.escape(w) for w in sorted(_HIGH_COMPLEXITY_EXACT_EN, key=len, reverse=True))
        + r")\b",
        re.IGNORECASE,
    )
    _MEDIUM_EN_RE = re.compile(
        r"\b("
        + "|".join(re.escape(w) for w in sorted(_MEDIUM_COMPLEXITY_HINTS_EN, key=len, reverse=True))
        + r")\b",
        re.IGNORECASE,
    )
    _MEDIUM_EXACT_RE = re.compile(
        r"\b("
        + "|".join(re.escape(w) for w in sorted(_MEDIUM_COMPLEXITY_EXACT_EN, key=len, reverse=True))
        + r")\b",
        re.IGNORECASE,
    )

    def classify(self, content: str) -> float:
        """评估消息复杂度 (0.0-1.0)。

        评分规则:
        - 低复杂度信号（问候/闲聊/身份查询）→ 0.05
        - 短消息 (<20字符) 且无复杂度暗示 → 0.1
        - 含中等复杂度关键词 → 0.35
        - 含高复杂度关键词 → 0.65-0.8
        - 否定上下文中的高复杂度词不计入匹配
        - 短疑问句额外扣减
        - 多句/长消息 → 额外加成
        - 代码模式 (反引号/括号) → 额外加成
        """
        if not content or not content.strip():
            return 0.0

        content_lower = content.lower()
        score = 0.0

        # 0. 低复杂度信号检测（仅在无高复杂度信号时生效）
        low_hits_cn = sum(1 for h in self._LOW_COMPLEXITY_HINTS_CN if h in content_lower)
        low_hits_en = sum(1 for h in self._LOW_COMPLEXITY_HINTS_EN if h in content_lower)
        has_low_signal = low_hits_cn + low_hits_en > 0

        # 1. 否定上下文检测 — 提取被否定的词
        negated_words: set[str] = set()
        for match in self._NEGATION_PATTERNS.finditer(content_lower):
            negated_words.add(match.group(1).lower())

        # 2. 关键词匹配（排除否定上下文中的词）
        # 中文：子串匹配
        high_hits = sum(
            1
            for h in self._HIGH_COMPLEXITY_HINTS_CN
            if h in content_lower and h not in negated_words
        )
        medium_hits = sum(1 for m in self._MEDIUM_COMPLEXITY_HINTS_CN if m in content_lower)

        # 英文：词边界匹配
        high_en_matches = self._HIGH_EN_RE.findall(content) + self._HIGH_EXACT_RE.findall(content)
        high_hits += sum(1 for w in high_en_matches if w.lower() not in negated_words)
        medium_hits += len(self._MEDIUM_EN_RE.findall(content)) + len(
            self._MEDIUM_EXACT_RE.findall(content)
        )

        has_high_signal = high_hits > 0 or medium_hits > 0

        # 低复杂度信号仅在无高/中复杂度信号时生效
        if has_low_signal and not has_high_signal:
            score = 0.05  # 问候/闲聊直接给极低分
            length = len(content)
            if length > 200:
                score += 0.05
            elif length > 100:
                score += 0.03
            return max(0.0, min(1.0, score))

        if high_hits >= 2:
            score = 0.80
        elif high_hits == 1:
            score = 0.65
        elif medium_hits >= 1:
            score = 0.35
        else:
            score = 0.10

        # 3. 消息长度加成
        length = len(content)
        if length > 200:
            score += 0.15
        elif length > 100:
            score += 0.10
        elif length > 50:
            score += 0.05

        # 4. 多句加成（逗号/句号/换行分隔）
        sentence_count = len(_SENTENCE_SPLIT_RE.split(content))
        if sentence_count >= 4:
            score += 0.10
        elif sentence_count >= 2:
            score += 0.05

        # 5. 代码模式加成
        if "`" in content or "```" in content:
            score += 0.15
        if re.search(r"[\{\}\[\]\(\)]", content):
            score += 0.05

        # 6. 短疑问句扣减（以？或?结尾且长度<30）
        if self._SHORT_QUESTION_RE.search(content) and len(content) < 30:
            score -= 0.10

        return max(0.0, min(1.0, score))


class CostAwareRouter:
    """三层成本感知路由器。

    Layer 0: 规则匹配（零成本）— @skill: 前缀 / 问候 / 简单对话
    Layer 1: 复杂度分类 — heuristic（零成本）或 LLM（~100 tokens）
    Layer 2: 能力匹配 / 拍卖（可选）— 高复杂度任务委派给最佳 Agent
    """

    def __init__(
        self,
        llm_gateway: Any = None,
        model: str = "default",
        org_context: Any = None,
        auction_enabled: bool = False,
        classifier: str = "heuristic",
        merged_llm_classify: bool = True,
        semantic_router: Any = None,  # SemanticRouter | None
        expert_team_router: Any = None,  # ExpertTeamRouter | None
    ):
        self._llm_gateway = llm_gateway
        self._model = model
        self._org_context = org_context
        self._auction_enabled = auction_enabled
        self._classifier = classifier
        self._merged_llm_classify = merged_llm_classify
        self._semantic_router = semantic_router
        self._expert_team_router = expert_team_router
        self._auction_house = AuctionHouse() if auction_enabled else None
        if classifier not in ("heuristic", "llm"):
            raise ValueError(f"Invalid classifier: {classifier!r}, must be 'heuristic' or 'llm'")
        self._heuristic = HeuristicClassifier()

    # -- Layer 0: Rule-based (zero cost) ------------------------------------

    def _match_layer0(self, content: str) -> tuple[str | None, str]:
        """Layer 0 规则匹配。

        Returns:
            (match_type, clean_content) — match_type 为 None 表示未命中。
        """
        # @skill: 显式前缀
        explicit_skill, clean = parse_skill_prefix(content)
        if explicit_skill:
            return "explicit_skill", clean

        # 问候模式
        stripped = content.strip()
        if _GREETING_RE.match(stripped):
            return "greeting", stripped

        # 简单对话模式
        if _CHAT_MODE_RE.match(stripped):
            return "chat_mode", stripped

        # 身份/元问题模式（"你是谁"等）— 零成本直接对话
        if _IDENTITY_RE.match(stripped):
            return "identity", stripped

        return None, stripped

    # -- Layer 1: LLM quick classify (~100 tokens) -------------------------

    async def quick_classify(self, content: str) -> float:
        """使用 LLM 快速评估用户请求的复杂度 (0.0-1.0)。

        当 LLM Gateway 不可用或解析失败时，返回默认中等复杂度 0.5。
        """
        if self._llm_gateway is None:
            return 0.5

        prompt = (
            "You are a complexity classifier. Rate the complexity of the user request on a scale of 0.0 to 1.0.\n"
            "0.0 = trivial greeting, 0.3 = simple question, 0.5 = moderate task, "
            "0.7 = complex multi-step task, 1.0 = very complex research task.\n\n"
            "---BEGIN USER REQUEST---\n"
            f"{content}\n"
            "---END USER REQUEST---\n\n"
            'Respond ONLY with a JSON object: {"complexity": <float>}'
        )
        try:
            response = await self._llm_gateway.chat(
                messages=[{"role": "user", "content": prompt}],
                model=self._model,
            )
            data = json.loads(response.content.strip())
            complexity = float(data.get("complexity", 0.5))
            return max(0.0, min(1.0, complexity))
        except Exception as e:
            logger.warning(f"CostAwareRouter quick_classify failed: {e}")
            return 0.5

    # -- Layer 1.5: Merged LLM classify (complexity + intent in one call) ---

    async def _classify_merged(
        self,
        content: str,
        skill_registry: Any,
        intent_router: Any,
        default_tools: list,
        default_system_prompt: str | None,
        default_model: str,
        default_agent_name: str,
        agent_tool_registry: Any = None,
        session_id: str = "",
        complexity: float = 0.5,
    ) -> SkillRoutingResult:
        """合并 LLM 调用：单次 LLM 同时输出 complexity + intent + skill_hint。

        当 HeuristicClassifier 返回不确定区间 (0.3-0.7) 时使用，
        替代分别调用 quick_classify() 和 IntentRouter._classify_with_llm()，
        节省 1 次 LLM 调用 (~1-3s)。
        """
        if self._llm_gateway is None or not self._merged_llm_classify:
            # Fallback: 使用独立的 IntentRouter 路由
            return await resolve_skill_routing(
                content=content,
                skill_registry=skill_registry,
                intent_router=intent_router,
                default_tools=default_tools,
                default_system_prompt=default_system_prompt,
                default_model=default_model,
                default_agent_name=default_agent_name,
                agent_tool_registry=agent_tool_registry,
                session_id=session_id,
            )

        # Build skill list for the prompt
        skill_hints = []
        if skill_registry:
            try:
                for s in skill_registry.list_skills():
                    if s.config.intent and s.config.intent.keywords:
                        skill_hints.append(s.name)
            except Exception:
                pass

        skill_list_str = ", ".join(skill_hints) if skill_hints else "none"

        prompt = (
            "You are a routing classifier. Analyze the user request and output:\n"
            "1. complexity (0.0-1.0): how complex is this request\n"
            "2. intent: the primary intent category\n"
            "3. skill_hint: the best matching skill name, or null if none match\n\n"
            f"Available skills: [{skill_list_str}]\n\n"
            "---BEGIN USER REQUEST---\n"
            f"{content}\n"
            "---END USER REQUEST---\n\n"
            'Respond ONLY with a JSON object: {"complexity": <float>, "intent": <string>, "skill_hint": <string|null>}'
        )

        try:
            response = await self._llm_gateway.chat(
                messages=[{"role": "user", "content": prompt}],
                model=self._model,
            )
            data = json.loads(response.content.strip())
            merged_complexity = float(data.get("complexity", 0.5))
            merged_complexity = max(0.0, min(1.0, merged_complexity))
            skill_hint = data.get("skill_hint")

            # If skill_hint provided and valid, route directly to that skill
            if skill_hint and skill_registry:
                try:
                    matched_skill = skill_registry.get(skill_hint)
                    result = SkillRoutingResult(
                        clean_content=content,
                        skill_name=skill_hint,
                        skill_config=matched_skill.config,
                        skill_tools=matched_skill.tools or [],
                        matched=True,
                        match_method="merged_llm",
                        match_confidence=0.7,
                        complexity=merged_complexity,
                        execution_mode=ExecutionMode.SKILL_REACT,
                    )
                    # Merge tools
                    agent_tools = (
                        agent_tool_registry.list_tools() if agent_tool_registry else default_tools
                    )
                    seen_names = set()
                    merged_tools = []
                    for tool in result.skill_tools + agent_tools:
                        if tool.name not in seen_names:
                            seen_names.add(tool.name)
                            merged_tools.append(tool)
                    result.tools = merged_tools
                    result.model = (
                        result.skill_config.llm.get("model", default_model)
                        if result.skill_config.llm
                        else default_model
                    )
                    result.agent_name = skill_hint
                    result.system_prompt = (
                        build_skill_system_prompt(result.skill_config) or default_system_prompt
                    )
                    # Append available tools to system prompt so LLM knows what it can call
                    if result.tools:
                        tools_desc = _build_tools_description(result.tools)
                        tool_instruction = (
                            "\n\n## Tool Usage\n"
                            "You have access to the following tools. When you need to use a tool, "
                            "respond with a tool call in the format specified by the system.\n"
                            "Never make up information or guess answers when you can use a tool to find the answer.\n"
                            "Always prefer using tools over guessing.\n"
                        )
                        if result.system_prompt:
                            result.system_prompt += (
                                f"{tool_instruction}\n## Available Tools\n{tools_desc}"
                            )
                    logger.info(
                        f"Session {session_id}: merged LLM classify routed to skill '{skill_hint}' "
                        f"(complexity={merged_complexity:.2f})"
                    )
                    return result
                except Exception as e:
                    logger.warning(
                        f"Session {session_id}: merged LLM skill_hint '{skill_hint}' not found: {e}"
                    )

            # No valid skill_hint — use complexity to decide routing
            if merged_complexity < 0.3:
                return SkillRoutingResult(
                    clean_content=content,
                    system_prompt=default_system_prompt,
                    tools=default_tools,
                    model=default_model,
                    agent_name=default_agent_name,
                    matched=False,
                    match_method="merged_llm_low",
                    match_confidence=1.0 - merged_complexity,
                    complexity=merged_complexity,
                    execution_mode=ExecutionMode.DIRECT_CHAT,
                )
            elif merged_complexity > 0.7:
                # High complexity — delegate to Layer 2
                return SkillRoutingResult(
                    clean_content=content,
                    system_prompt=default_system_prompt,
                    tools=default_tools,
                    model=default_model,
                    agent_name=default_agent_name,
                    matched=False,
                    match_method="merged_llm_high",
                    match_confidence=merged_complexity,
                    complexity=merged_complexity,
                    execution_mode=ExecutionMode.REACT,
                )
            else:
                # Medium complexity, no skill match — default agent
                return SkillRoutingResult(
                    clean_content=content,
                    system_prompt=default_system_prompt,
                    tools=default_tools,
                    model=default_model,
                    agent_name=default_agent_name,
                    matched=False,
                    match_method="merged_llm_medium",
                    match_confidence=0.5,
                    complexity=merged_complexity,
                    execution_mode=ExecutionMode.REACT,
                )
        except (json.JSONDecodeError, TypeError, ValueError) as e:
            logger.warning(
                f"CostAwareRouter _classify_merged parse failed: {e}, falling back to default"
            )
            return SkillRoutingResult(
                clean_content=content,
                system_prompt=default_system_prompt,
                tools=default_tools,
                model=default_model,
                agent_name=default_agent_name,
                matched=False,
                match_method="merged_llm_fallback",
                match_confidence=0.5,
                complexity=0.5,
                execution_mode=ExecutionMode.REACT,
            )
        except Exception as e:
            logger.warning(f"CostAwareRouter _classify_merged failed: {e}, falling back to default")
            return SkillRoutingResult(
                clean_content=content,
                system_prompt=default_system_prompt,
                tools=default_tools,
                model=default_model,
                agent_name=default_agent_name,
                matched=False,
                match_method="merged_llm_fallback",
                match_confidence=0.5,
                complexity=0.5,
                execution_mode=ExecutionMode.REACT,
            )

    # -- Layer 2: Capability matching / Auction (optional) -----------------

    def _try_team_upgrade(
        self,
        result: SkillRoutingResult,
        content: str,
        complexity: float,
        trace: list[dict] | None,
    ) -> SkillRoutingResult:
        """Attempt to upgrade REACT → TEAM_COLLAB when complexity is high and experts are available."""
        if (
            result.execution_mode == ExecutionMode.REACT
            and complexity >= 0.7
            and self._expert_team_router is not None
        ):
            try:
                if self._expert_team_router.can_handle(content):
                    team_result = self._expert_team_router.resolve(content, complexity)
                    if team_result.team_mode:
                        result.execution_mode = ExecutionMode.TEAM_COLLAB
                        if trace is not None:
                            trace.append(
                                {
                                    "layer": 2,
                                    "method": "team_upgrade",
                                    "from_mode": "REACT",
                                    "to_mode": "TEAM_COLLAB",
                                    "team_match_method": team_result.match_method,
                                    "complexity": complexity,
                                }
                            )
            except Exception as e:
                logger.warning(f"CostAwareRouter team upgrade check failed: {e}")
        return result

    async def _route_layer2(
        self,
        content: str,
        skill_registry: Any,
        intent_router: Any,
        default_tools: list,
        default_system_prompt: str | None,
        default_model: str,
        default_agent_name: str,
        agent_tool_registry: Any = None,
        session_id: str = "",
        complexity: float = 0.0,
        trace: list[dict] | None = None,
    ) -> SkillRoutingResult:
        """Layer 2: 高复杂度任务通过拍卖或 org_context.find_best_agent 路由。"""
        # Extract capability-like keywords from content for matching
        content_words = _tokenize_content(content)

        # --- Vickrey auction path (when enabled) ---
        if (
            self._auction_enabled
            and self._auction_house is not None
            and self._org_context is not None
        ):
            try:
                # Gather candidate agents from org_context
                all_agents = (
                    self._org_context.list_agents()
                    if hasattr(self._org_context, "list_agents")
                    else []
                )
                # Filter agents that have at least one relevant capability
                candidate_agents = []
                for agent_profile in all_agents:
                    if not agent_profile.availability:
                        continue
                    # Check if agent has any of the content_words as capabilities
                    agent_caps_lower = {c.lower() for c in agent_profile.capabilities}
                    if any(w.lower() in agent_caps_lower for w in content_words):
                        candidate_agents.append(agent_profile)

                # Also include agents that match via find_best_agent (they have ALL required caps)
                best = self._org_context.find_best_agent(required_capabilities=content_words)
                if best is not None:
                    best_name = best if isinstance(best, str) else getattr(best, "name", str(best))
                    existing_names = {a.name for a in candidate_agents}
                    if best_name not in existing_names:
                        profile = (
                            self._org_context.get_agent_profile(best_name)
                            if hasattr(self._org_context, "get_agent_profile")
                            else best
                        )
                        if hasattr(profile, "name"):
                            candidate_agents.append(profile)

                if len(candidate_agents) >= 1:
                    # Build Bid objects for each candidate
                    bids = []
                    for agent_profile in candidate_agents:
                        name = (
                            agent_profile.name
                            if hasattr(agent_profile, "name")
                            else str(agent_profile)
                        )
                        caps = (
                            agent_profile.capabilities
                            if hasattr(agent_profile, "capabilities")
                            else []
                        )
                        arch = (
                            agent_profile.agent_type
                            if hasattr(agent_profile, "agent_type")
                            else "react"
                        )
                        # Use current_load as a proxy for estimated_cost (higher load → higher cost)
                        estimated_cost = (
                            float(agent_profile.current_load + 1)
                            if hasattr(agent_profile, "current_load")
                            else 1.0
                        )
                        bids.append(
                            Bid(
                                agent_name=name,
                                architecture=arch,
                                estimated_steps=1,
                                estimated_cost=estimated_cost,
                                confidence=0.8,
                                payment_offer=estimated_cost,
                                capabilities=caps,
                            )
                        )

                    auction_result = await self._auction_house.run_vickrey_auction(
                        task_description=content,
                        bidders=bids,
                        required_capabilities=content_words,
                    )

                    if auction_result.winner is not None:
                        winner_name = auction_result.winner.agent_name
                        result = SkillRoutingResult(
                            clean_content=content,
                            matched=True,
                            match_method="vickrey_auction",
                            match_confidence=0.8,
                            agent_name=winner_name,
                            model=default_model,
                            system_prompt=default_system_prompt,
                            tools=default_tools,
                            complexity=complexity,
                            execution_mode=ExecutionMode.REACT,
                        )
                        if trace is not None:
                            trace.append(
                                {
                                    "layer": 2,
                                    "method": "vickrey_auction",
                                    "agent_name": winner_name,
                                    "complexity": complexity,
                                    "selection_reason": auction_result.selection_reason,
                                }
                            )
                        return self._try_team_upgrade(result, content, complexity, trace)
                    # No winner from auction → fall through to capability matching
            except Exception as e:
                logger.warning(f"CostAwareRouter Layer 2 Vickrey auction failed: {e}")

        # --- Capability matching path (default) ---
        if self._org_context is not None and hasattr(self._org_context, "find_best_agent"):
            try:
                best_agent = self._org_context.find_best_agent(required_capabilities=content_words)
                if best_agent is not None:
                    agent_name = (
                        best_agent
                        if isinstance(best_agent, str)
                        else getattr(best_agent, "name", str(best_agent))
                    )
                    result = SkillRoutingResult(
                        clean_content=content,
                        matched=True,
                        match_method="capability",
                        match_confidence=0.8,
                        agent_name=agent_name,
                        model=default_model,
                        system_prompt=default_system_prompt,
                        tools=default_tools,
                        complexity=complexity,
                        execution_mode=ExecutionMode.REACT,
                    )
                    if trace is not None:
                        trace.append(
                            {
                                "layer": 2,
                                "method": "capability",
                                "agent_name": agent_name,
                                "complexity": complexity,
                            }
                        )
                    return self._try_team_upgrade(result, content, complexity, trace)
            except Exception as e:
                logger.warning(f"CostAwareRouter Layer 2 org_context.find_best_agent failed: {e}")

        # Fallback: 使用 IntentRouter
        result = await resolve_skill_routing(
            content=content,
            skill_registry=skill_registry,
            intent_router=intent_router,
            default_tools=default_tools,
            default_system_prompt=default_system_prompt,
            default_model=default_model,
            default_agent_name=default_agent_name,
            agent_tool_registry=agent_tool_registry,
            session_id=session_id,
        )
        result.complexity = complexity
        if trace is not None:
            trace.append(
                {
                    "layer": 2,
                    "method": "intent_router_fallback",
                    "complexity": complexity,
                }
            )
        return self._try_team_upgrade(result, content, complexity, trace)

    # -- Main entry point ---------------------------------------------------

    async def route(
        self,
        content: str,
        skill_registry: Any,
        intent_router: Any,
        default_tools: list,
        default_system_prompt: str | None,
        default_model: str = "default",
        default_agent_name: str = "default",
        agent_tool_registry: Any = None,
        session_id: str = "",
        transparency: str = "SILENT",
    ) -> SkillRoutingResult:
        """三层成本感知路由主入口。

        Args:
            content: 用户输入内容
            skill_registry: Skill 注册表
            intent_router: IntentRouter 实例
            default_tools: 默认工具列表
            default_system_prompt: 默认系统提示词
            default_model: 默认模型
            default_agent_name: 默认 Agent 名称
            agent_tool_registry: Agent 工具注册表
            session_id: 会话 ID
            transparency: 透明度级别 (SILENT / VERBOSE / TRACE)

        Returns:
            SkillRoutingResult 包含路由结果和追踪信息
        """
        trace: list[dict] = []

        tracer = get_tracer()
        with tracer.start_span("router.route") as span:
            span.set_attribute("input.length", len(content))

            # ---- Layer 0: Rule-based (zero cost) ----
            match_type, clean_content = self._match_layer0(content)

            if match_type == "explicit_skill":
                result = await resolve_skill_routing(
                    content=content,
                    skill_registry=skill_registry,
                    intent_router=intent_router,
                    default_tools=default_tools,
                    default_system_prompt=default_system_prompt,
                    default_model=default_model,
                    default_agent_name=default_agent_name,
                    agent_tool_registry=agent_tool_registry,
                    session_id=session_id,
                )
                result.match_method = result.match_method or "explicit_skill"
                result.complexity = 0.0
                trace.append(
                    {
                        "layer": 0,
                        "method": "explicit_skill",
                        "matched": result.matched,
                        "cost": "zero",
                    }
                )
                result.execution_trace = trace if transparency != "SILENT" else []
                result.transparency_level = transparency
                span.set_attribute("route.layer", result.match_method or "explicit_skill")
                span.set_attribute("route.target", result.skill_name or "default")
                return result

            if match_type in ("greeting", "chat_mode", "identity"):
                result = SkillRoutingResult(
                    clean_content=clean_content,
                    system_prompt=default_system_prompt,
                    tools=default_tools,
                    model=default_model,
                    agent_name=default_agent_name,
                    matched=False,
                    match_method=match_type,
                    match_confidence=1.0,
                    complexity=0.0,
                    execution_mode=ExecutionMode.DIRECT_CHAT,
                )
                trace.append(
                    {
                        "layer": 0,
                        "method": match_type,
                        "matched": False,
                        "cost": "zero",
                    }
                )
                result.execution_trace = trace if transparency != "SILENT" else []
                result.transparency_level = transparency
                span.set_attribute("route.layer", match_type)
                span.set_attribute("route.target", "default")
                return result

            # ---- Layer 1: Complexity classification ----
            if self._classifier == "heuristic":
                complexity = self._heuristic.classify(clean_content)
                trace.append(
                    {
                        "layer": 1,
                        "method": "heuristic_classify",
                        "complexity": complexity,
                    }
                )
            else:
                complexity = await self.quick_classify(clean_content)
                trace.append(
                    {
                        "layer": 1,
                        "method": "quick_classify",
                        "complexity": complexity,
                    }
                )

            # Low complexity → direct chat
            if complexity < 0.3:
                result = SkillRoutingResult(
                    clean_content=clean_content,
                    system_prompt=default_system_prompt,
                    tools=default_tools,
                    model=default_model,
                    agent_name=default_agent_name,
                    matched=False,
                    match_method="low_complexity",
                    match_confidence=1.0 - complexity,
                    complexity=complexity,
                    execution_mode=ExecutionMode.DIRECT_CHAT,
                )
                trace.append(
                    {
                        "layer": 1,
                        "method": "low_complexity",
                        "complexity": complexity,
                        "routed_to": "default",
                    }
                )
                result.execution_trace = trace if transparency != "SILENT" else []
                result.transparency_level = transparency
                span.set_attribute("route.layer", "low_complexity")
                span.set_attribute("route.target", "default")
                return result

            # ---- Layer 1.5: Semantic Router (zero LLM cost) ----
            skill_hint = None
            if self._semantic_router is not None and complexity >= 0.3:
                try:
                    semantic_result = await self._semantic_router.route(clean_content)
                    if semantic_result.confidence == "high" and semantic_result.skill_name:
                        # Direct skill match — skip Layer 2
                        trace.append(
                            {
                                "layer": 1.5,
                                "method": "semantic_high",
                                "skill": semantic_result.skill_name,
                                "similarity": round(semantic_result.similarity, 3),
                                "cost": "zero",
                            }
                        )
                        result = await resolve_skill_routing(
                            content=content,
                            skill_registry=skill_registry,
                            intent_router=intent_router,
                            default_tools=default_tools,
                            default_system_prompt=default_system_prompt,
                            default_model=default_model,
                            default_agent_name=default_agent_name,
                            agent_tool_registry=agent_tool_registry,
                            session_id=session_id,
                            force_skill=semantic_result.skill_name,
                        )
                        result.match_method = "semantic_high"
                        result.match_confidence = semantic_result.similarity
                        result.complexity = complexity
                        if result.matched:
                            result.execution_mode = ExecutionMode.SKILL_REACT
                        result.execution_trace = trace if transparency != "SILENT" else []
                        result.transparency_level = transparency
                        span.set_attribute("route.layer", "semantic_high")
                        span.set_attribute("route.target", result.skill_name or "default")
                        return result
                    elif semantic_result.confidence == "medium" and semantic_result.skill_name:
                        # Pass skill hint to Layer 1.5 merged classify or Layer 2
                        skill_hint = semantic_result.skill_name
                        trace.append(
                            {
                                "layer": 1.5,
                                "method": "semantic_medium",
                                "skill_hint": skill_hint,
                                "similarity": round(semantic_result.similarity, 3),
                            }
                        )
                except Exception as e:
                    logger.warning(f"Semantic routing failed, falling through: {e}")
                    trace.append(
                        {
                            "layer": 1.5,
                            "method": "semantic_error",
                            "error": str(e),
                        }
                    )

            # Medium complexity → merged LLM classify or IntentRouter
            if complexity <= 0.7:
                if self._merged_llm_classify and self._llm_gateway is not None:
                    # Use merged LLM call: complexity + intent in one call
                    result = await self._classify_merged(
                        content=content,
                        skill_registry=skill_registry,
                        intent_router=intent_router,
                        default_tools=default_tools,
                        default_system_prompt=default_system_prompt,
                        default_model=default_model,
                        default_agent_name=default_agent_name,
                        agent_tool_registry=agent_tool_registry,
                        session_id=session_id,
                        complexity=complexity,
                    )
                    # If merged classify returned high complexity, delegate to Layer 2
                    if (
                        result.complexity > 0.7
                        and result.match_method
                        and result.match_method.startswith("merged_llm_high")
                    ):
                        trace.append(
                            {
                                "layer": 1,
                                "method": "merged_llm_high",
                                "complexity": result.complexity,
                                "delegated_to_layer2": True,
                            }
                        )
                        layer2_result = await self._route_layer2(
                            content=content,
                            skill_registry=skill_registry,
                            intent_router=intent_router,
                            default_tools=default_tools,
                            default_system_prompt=default_system_prompt,
                            default_model=default_model,
                            default_agent_name=default_agent_name,
                            agent_tool_registry=agent_tool_registry,
                            session_id=session_id,
                            complexity=result.complexity,
                            trace=trace,
                        )
                        layer2_result.execution_trace = trace if transparency != "SILENT" else []
                        layer2_result.transparency_level = transparency
                        return layer2_result
                else:
                    # Fallback: use separate IntentRouter
                    result = await resolve_skill_routing(
                        content=content,
                        skill_registry=skill_registry,
                        intent_router=intent_router,
                        default_tools=default_tools,
                        default_system_prompt=default_system_prompt,
                        default_model=default_model,
                        default_agent_name=default_agent_name,
                        agent_tool_registry=agent_tool_registry,
                        session_id=session_id,
                    )
                result.complexity = result.complexity if result.complexity > 0 else complexity
                trace.append(
                    {
                        "layer": 1,
                        "method": result.match_method or "merged_llm",
                        "complexity": result.complexity,
                        "matched": result.matched,
                    }
                )
                result.execution_trace = trace if transparency != "SILENT" else []
                result.transparency_level = transparency
                span.set_attribute("route.layer", result.match_method or "merged_llm")
                span.set_attribute("route.target", result.skill_name or "default")
                return result

            # ---- Layer 2: Capability matching / Auction (high complexity) ----
            trace.append(
                {
                    "layer": 2,
                    "method": "capability_or_auction",
                    "complexity": complexity,
                    "auction_enabled": self._auction_enabled,
                }
            )
            result = await self._route_layer2(
                content=content,
                skill_registry=skill_registry,
                intent_router=intent_router,
                default_tools=default_tools,
                default_system_prompt=default_system_prompt,
                default_model=default_model,
                default_agent_name=default_agent_name,
                agent_tool_registry=agent_tool_registry,
                session_id=session_id,
                complexity=complexity,
                trace=trace,
            )
            result.execution_trace = trace if transparency != "SILENT" else []
            result.transparency_level = transparency
            span.set_attribute("route.layer", result.match_method or "capability")
            span.set_attribute("route.target", result.skill_name or result.agent_name or "default")
            return result