fischer-agentkit/src/agentkit/chat/skill_routing.py

1514 lines
60 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Shared skill routing logic for GUI and CLI chat.
Extracts the duplicated skill routing, @skill: prefix parsing,
and prompt assembly into a single module used by both chat routes.
"""
from __future__ import annotations
import enum
import json
import logging
import re
from dataclasses import dataclass, field
from typing import Any
from agentkit.marketplace.auction import AuctionHouse, Bid
from agentkit.telemetry.tracer import get_tracer
logger = logging.getLogger(__name__)
# Strict validation: only lowercase alphanumeric, hyphens, underscores
_SKILL_NAME_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$")
class ExecutionMode(enum.Enum):
"""How the downstream should execute this routing result.
This is the single source of truth for execution path selection.
The transport layer (portal.py, chat.py) should branch on this
field instead of string-matching match_method.
"""
DIRECT_CHAT = "direct_chat" # Zero-cost: direct LLM call, no ReAct loop
REACT = "react" # Default agent ReAct loop with default tools
SKILL_REACT = "skill_react" # Skill-matched ReAct with skill tools + prompt
REWOO = "rewoo" # Plan-without-observation mode
REFLEXION = "reflexion" # Reflection-driven mode
PLAN_EXEC = "plan_exec" # Plan-then-execute mode
TEAM_COLLAB = "team_collab" # Expert Team collaborative mode
# Mapping from skill config execution_mode string to ExecutionMode enum
_SKILL_EXECUTION_MODE_MAP: dict[str, ExecutionMode] = {
"direct": ExecutionMode.DIRECT_CHAT,
"react": ExecutionMode.SKILL_REACT,
"rewoo": ExecutionMode.REWOO,
"reflexion": ExecutionMode.REFLEXION,
"plan_exec": ExecutionMode.PLAN_EXEC,
"custom": ExecutionMode.SKILL_REACT,
"llm_generate": ExecutionMode.SKILL_REACT,
"tool_call": ExecutionMode.SKILL_REACT,
}
def _resolve_execution_mode(skill_config: Any) -> ExecutionMode:
"""Resolve ExecutionMode from skill config's execution_mode field."""
mode_str = getattr(skill_config, "execution_mode", "react") or "react"
return _SKILL_EXECUTION_MODE_MAP.get(mode_str, ExecutionMode.SKILL_REACT)
def validate_skill_name(name: str) -> str:
"""Validate and normalize a skill name. Raises ValueError on invalid input."""
normalized = name.strip().lower()
if not _SKILL_NAME_RE.match(normalized):
raise ValueError(f"Invalid skill name '{name}': must match [a-z0-9][a-z0-9_-]{{0,63}}")
return normalized
@dataclass
class SkillRoutingResult:
"""Result of skill routing for a user message."""
skill_name: str | None = None
skill_config: Any = None
skill_tools: list = field(default_factory=list)
clean_content: str = ""
system_prompt: str | None = None
tools: list = field(default_factory=list)
model: str = "default"
agent_name: str | None = None
matched: bool = False
match_method: str | None = None
match_confidence: float = 0.0
transparency_level: str = "SILENT"
execution_trace: list[dict] = field(default_factory=list)
complexity: float = 0.0
execution_mode: ExecutionMode = ExecutionMode.DIRECT_CHAT
def parse_skill_prefix(content: str) -> tuple[str | None, str]:
"""Parse @skill:name prefix from user message.
Returns (skill_name_or_None, clean_content).
"""
if not content.startswith("@skill:"):
return None, content
parts = content.split(" ", 1)
skill_ref = parts[0][7:] # strip "@skill:"
explicit_skill = skill_ref.strip()
clean = parts[1].strip() if len(parts) > 1 else content[7 + len(skill_ref) :].strip()
return explicit_skill, clean
def build_skill_system_prompt(skill_config) -> str | None:
"""Build system prompt from skill config's prompt section."""
if not skill_config or not skill_config.prompt:
return None
prompt_parts = []
for key in ("identity", "context", "instructions", "constraints", "output_format"):
val = skill_config.prompt.get(key)
if val:
prompt_parts.append(val)
return "\n\n".join(prompt_parts) if prompt_parts else None
async def resolve_skill_routing(
content: str,
skill_registry: Any,
intent_router: Any,
default_tools: list,
default_system_prompt: str | None,
default_model: str = "default",
default_agent_name: str = "default",
agent_tool_registry: Any = None,
session_id: str = "",
force_skill: str | None = None,
) -> SkillRoutingResult:
"""Resolve skill routing for a user message.
This is the shared entry point used by both GUI WebSocket chat and CLI chat.
Returns a SkillRoutingResult with all execution parameters set.
"""
result = SkillRoutingResult()
# Parse @skill: prefix
explicit_skill, clean_content = parse_skill_prefix(content)
result.clean_content = clean_content
if explicit_skill:
logger.info(f"Session {session_id}: explicit skill reference: {explicit_skill}")
# Try explicit skill match
if explicit_skill and skill_registry:
try:
matched_skill = skill_registry.get(explicit_skill)
result.skill_name = explicit_skill
result.skill_config = matched_skill.config
result.skill_tools = matched_skill.tools or []
result.matched = True
result.match_method = "explicit"
result.match_confidence = 1.0
logger.info(f"Session {session_id}: using explicit skill '{explicit_skill}'")
except Exception as e:
logger.warning(
f"Session {session_id}: explicit skill '{explicit_skill}' not found: {e}"
)
# Reset so we don't enter skill branch with stale data
result.skill_name = None
result.skill_config = None
# Try force_skill match (from semantic router high confidence)
if not result.matched and force_skill and skill_registry:
try:
matched_skill = skill_registry.get(force_skill)
result.skill_name = force_skill
result.skill_config = matched_skill.config
result.skill_tools = matched_skill.tools or []
result.matched = True
result.match_method = "semantic_force"
result.match_confidence = 1.0
logger.info(f"Session {session_id}: using force-matched skill '{force_skill}'")
except Exception as e:
logger.warning(f"Session {session_id}: force skill '{force_skill}' not found: {e}")
# Try IntentRouter if no explicit match
if not result.matched and skill_registry and intent_router:
skills = skill_registry.list_skills()
routable_skills = [s for s in skills if s.config.intent.keywords]
if routable_skills:
try:
routing_result = await intent_router.route(
input_data={"content": clean_content},
skills=routable_skills,
)
if routing_result and routing_result.confidence >= 0.5:
skill_name = routing_result.matched_skill
try:
matched_skill = skill_registry.get(skill_name)
skill_config = matched_skill.config
# Check if matched skill can handle tool-calling tasks.
# Direct-mode agents with no tools cannot execute tasks
# that require tool use (shell, search, etc.).
# If the task content suggests tool needs, fall through
# to default agent which has full tool access.
execution_mode = getattr(skill_config, "execution_mode", "react")
skill_tools = matched_skill.tools or []
if execution_mode == "direct" and not skill_tools:
# Direct agent matched but has no tools — check if
# the task might need tools. If so, skip this match
# and let it fall through to default agent.
tool_hints = [
"执行",
"运行",
"命令",
"终端",
"shell",
"bash",
"搜索",
"查找",
"联网",
"搜索",
"search",
"安装",
"部署",
"启动",
"停止",
"重启",
"文件",
"目录",
"创建",
"删除",
"修改",
"run",
"execute",
"install",
"deploy",
"start",
"stop",
"restart",
"file",
]
content_lower = clean_content.lower()
needs_tools = any(h in content_lower for h in tool_hints)
if needs_tools:
logger.info(
f"Session {session_id}: skill '{skill_name}' is direct-mode "
f"but task may need tools, falling through to default agent"
)
# Don't set result.matched, let it fall through
else:
result.skill_name = skill_name
result.skill_config = skill_config
result.skill_tools = skill_tools
result.matched = True
result.match_method = routing_result.method
result.match_confidence = routing_result.confidence
else:
result.skill_name = skill_name
result.skill_config = skill_config
result.skill_tools = skill_tools
result.matched = True
result.match_method = routing_result.method
result.match_confidence = routing_result.confidence
if result.matched:
logger.info(
f"Session {session_id}: routed to skill '{skill_name}' "
f"via {routing_result.method} (confidence={routing_result.confidence})"
)
except Exception as e:
logger.warning(
f"Session {session_id}: skill '{skill_name}' found by router but not in registry: {e}"
)
except Exception as e:
logger.warning(f"Skill routing failed for session {session_id}: {e}")
# Determine execution parameters
if result.matched and result.skill_config:
skill_prompt = build_skill_system_prompt(result.skill_config)
result.system_prompt = skill_prompt or default_system_prompt
# Merge skill tools with agent tools, deduplicating by name
agent_tools = agent_tool_registry.list_tools() if agent_tool_registry else default_tools
seen_names = set()
merged_tools = []
for tool in result.skill_tools + agent_tools:
if tool.name not in seen_names:
seen_names.add(tool.name)
merged_tools.append(tool)
result.tools = merged_tools
result.model = (
result.skill_config.llm.get("model", default_model)
if result.skill_config.llm
else default_model
)
result.agent_name = result.skill_name
# Map skill.config.execution_mode to ExecutionMode enum
result.execution_mode = _resolve_execution_mode(result.skill_config)
else:
result.system_prompt = default_system_prompt
result.tools = default_tools
result.model = default_model
result.agent_name = default_agent_name
# No skill matched — if we have tools, use ReAct; otherwise direct chat
result.execution_mode = ExecutionMode.REACT if default_tools else ExecutionMode.DIRECT_CHAT
# Append available tools to system prompt so LLM knows what it can call
if result.tools:
tools_desc = _build_tools_description(result.tools)
tool_instruction = (
"\n\n## Tool Usage\n"
"You have access to the following tools. When you need to use a tool, "
"respond with a tool call in the format specified by the system.\n"
"Never make up information or guess answers when you can use a tool to find the answer.\n"
"Always prefer using tools over guessing.\n"
)
if result.system_prompt:
result.system_prompt += f"{tool_instruction}\n## Available Tools\n{tools_desc}"
else:
result.system_prompt = f"{tool_instruction}\n## Available Tools\n{tools_desc}"
return result
def _build_tools_description(tools: list) -> str:
"""Build a text description of tools for the system prompt."""
lines = []
for tool in tools:
desc = getattr(tool, "description", "")
lines.append(f"- **{tool.name}**: {desc}")
schema = getattr(tool, "input_schema", None)
if schema and "properties" in schema:
params = list(schema["properties"].keys())
if params:
lines[-1] += f" (parameters: {', '.join(params)})"
return "\n".join(lines)
# ---------------------------------------------------------------------------
# CostAwareRouter - 三层成本感知路由
# ---------------------------------------------------------------------------
_GREETING_RE = re.compile(
r"^(你好|hi|hello|hey|嗨|哈喽|早上好|下午好|晚上好|good morning|good afternoon|good evening)\s*[!.。?]*$",
re.IGNORECASE,
)
_CHAT_MODE_RE = re.compile(
r"^(谢谢|感谢|thanks|thank you|ok|好的|嗯|对|是|不是|没关系|再见|bye|goodbye)\s*[!.。?]*$",
re.IGNORECASE,
)
# Simple identity/meta questions — zero-cost direct chat, no skill routing needed
_IDENTITY_RE = re.compile(
r"^(你是谁|你叫什么|你是什么|你是哪个|who are you|what are you|what's your name"
r"|介绍一下你自己|自我介绍|你叫啥|你叫什么名字|你的名字)"
r"\s*[?!.。]*$",
re.IGNORECASE,
)
_SENTENCE_SPLIT_RE = re.compile(r"[,。!?;\n,.!?;]")
def _tokenize_content(content: str) -> list[str]:
"""Tokenize content for capability matching. Supports Chinese and English."""
# 1. Split by punctuation and whitespace
segments = re.split(r"[\s,,。!?、;:\n]+", content)
# 2. For long Chinese segments, add 2-gram supplements
tokens = []
for seg in segments:
if len(seg) <= 4:
tokens.append(seg)
else:
tokens.append(seg)
# Add 2-grams for Chinese compound words
for i in range(len(seg) - 1):
bigram = seg[i : i + 2]
if all("\u4e00" <= c <= "\u9fff" for c in bigram):
tokens.append(bigram)
# 3. Filter stopwords
stopwords = {
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
}
tokens = [t for t in tokens if t not in stopwords and len(t) > 1][:10]
return tokens
class HeuristicClassifier:
"""零成本本地启发式分类器,替代 LLM quick_classify。
基于消息长度、关键词密度、工具暗示等特征评估复杂度 (0.0-1.0)
无需任何 LLM 调用,延迟 <1ms。
"""
# 高复杂度暗示词(需要工具或多步推理)
# 中文关键词使用子串匹配(中文无自然词边界)
_HIGH_COMPLEXITY_HINTS_CN = {
"执行",
"运行",
"命令",
"终端",
"安装",
"部署",
"启动",
"停止",
"重启",
"配置",
"搜索",
"查找",
"联网",
"文件",
"目录",
"创建",
"删除",
"修改",
"编辑",
"分析",
"比较",
"对比",
"评估",
"调研",
"研究",
"设计",
"规划",
"方案",
"架构",
"实现",
"开发",
"代码",
"编程",
"函数",
"接口",
"调试",
"重构",
}
# 英文关键词使用词边界匹配(避免子串误匹配如 "profile" 匹配 "file"
_HIGH_COMPLEXITY_HINTS_EN = {
"shell",
"bash",
"script",
"search",
"query",
"directory",
"execute",
"install",
"deploy",
"restart",
"modify",
"analyze",
"compare",
"evaluate",
"research",
"design",
"implement",
"develop",
"refactor",
"debug",
"python",
"javascript",
"typescript",
"sql",
}
# 英文短词需要精确匹配(避免子串误匹配)
_HIGH_COMPLEXITY_EXACT_EN = {
"run",
"find",
"start",
"stop",
"file",
"create",
"delete",
"plan",
"build",
"code",
"program",
"function",
"class",
"interface",
"api",
}
# 中等复杂度暗示词(简单问题但需思考)
# 注意:不包含"怎么",因为"怎么样"是闲聊而非工具需求
_MEDIUM_COMPLEXITY_HINTS_CN = {
"如何",
"怎样",
"为什么",
"什么原因",
"区别",
"推荐",
"建议",
"选择",
"哪个",
}
_MEDIUM_COMPLEXITY_HINTS_EN = {
"difference",
"explain",
"recommend",
"suggest",
"choose",
}
# 英文短词精确匹配
_MEDIUM_COMPLEXITY_EXACT_EN = {
"how",
"why",
"what",
"which",
}
# 低复杂度暗示词(问候/闲聊/简单定义,不需要工具)
_LOW_COMPLEXITY_HINTS_CN = {
"你好",
"",
"早上好",
"下午好",
"晚上好",
"再见",
"谢谢",
"辛苦",
"你是谁",
"你叫什么",
"你是什么",
"自我介绍",
"天气",
"今天",
"怎么样",
"闲聊",
"聊天",
}
_LOW_COMPLEXITY_HINTS_EN = {
"hello",
"hi",
"hey",
"good morning",
"good afternoon",
"good evening",
"goodbye",
"thanks",
"who are you",
"what are you",
"your name",
"introduce yourself",
"how are you",
"chat",
}
# 否定上下文模式("不要X"中的X不计入高复杂度匹配
# 匹配1-4个中文字符或1个英文单词避免匹配过长串如"分析,直接告诉我答案"
_NEGATION_PATTERNS = re.compile(
r"(?:不要|无需|不用|不需要|别|don'?t|no need|without|not)\s*"
r"([\u4e00-\u9fff]{1,4}|[a-zA-Z]+)",
re.IGNORECASE,
)
# 短疑问句模式(以?或?结尾且长度<30
_SHORT_QUESTION_RE = re.compile(r"[?]\s*$")
# 预编译英文词边界正则
_HIGH_EN_RE = re.compile(
r"\b("
+ "|".join(re.escape(w) for w in sorted(_HIGH_COMPLEXITY_HINTS_EN, key=len, reverse=True))
+ r")\b",
re.IGNORECASE,
)
_HIGH_EXACT_RE = re.compile(
r"\b("
+ "|".join(re.escape(w) for w in sorted(_HIGH_COMPLEXITY_EXACT_EN, key=len, reverse=True))
+ r")\b",
re.IGNORECASE,
)
_MEDIUM_EN_RE = re.compile(
r"\b("
+ "|".join(re.escape(w) for w in sorted(_MEDIUM_COMPLEXITY_HINTS_EN, key=len, reverse=True))
+ r")\b",
re.IGNORECASE,
)
_MEDIUM_EXACT_RE = re.compile(
r"\b("
+ "|".join(re.escape(w) for w in sorted(_MEDIUM_COMPLEXITY_EXACT_EN, key=len, reverse=True))
+ r")\b",
re.IGNORECASE,
)
def classify(self, content: str) -> float:
"""评估消息复杂度 (0.0-1.0)。
评分规则:
- 低复杂度信号(问候/闲聊/身份查询)→ 0.05
- 短消息 (<20字符) 且无复杂度暗示 → 0.1
- 含中等复杂度关键词 → 0.35
- 含高复杂度关键词 → 0.65-0.8
- 否定上下文中的高复杂度词不计入匹配
- 短疑问句额外扣减
- 多句/长消息 → 额外加成
- 代码模式 (反引号/括号) → 额外加成
"""
if not content or not content.strip():
return 0.0
content_lower = content.lower()
score = 0.0
# 0. 低复杂度信号检测(仅在无高复杂度信号时生效)
low_hits_cn = sum(1 for h in self._LOW_COMPLEXITY_HINTS_CN if h in content_lower)
low_hits_en = sum(1 for h in self._LOW_COMPLEXITY_HINTS_EN if h in content_lower)
has_low_signal = low_hits_cn + low_hits_en > 0
# 1. 否定上下文检测 — 提取被否定的词
negated_words: set[str] = set()
for match in self._NEGATION_PATTERNS.finditer(content_lower):
negated_words.add(match.group(1).lower())
# 2. 关键词匹配(排除否定上下文中的词)
# 中文:子串匹配
high_hits = sum(
1
for h in self._HIGH_COMPLEXITY_HINTS_CN
if h in content_lower and h not in negated_words
)
medium_hits = sum(1 for m in self._MEDIUM_COMPLEXITY_HINTS_CN if m in content_lower)
# 英文:词边界匹配
high_en_matches = self._HIGH_EN_RE.findall(content) + self._HIGH_EXACT_RE.findall(content)
high_hits += sum(1 for w in high_en_matches if w.lower() not in negated_words)
medium_hits += len(self._MEDIUM_EN_RE.findall(content)) + len(
self._MEDIUM_EXACT_RE.findall(content)
)
has_high_signal = high_hits > 0 or medium_hits > 0
# 低复杂度信号仅在无高/中复杂度信号时生效
if has_low_signal and not has_high_signal:
score = 0.05 # 问候/闲聊直接给极低分
length = len(content)
if length > 200:
score += 0.05
elif length > 100:
score += 0.03
return max(0.0, min(1.0, score))
if high_hits >= 2:
score = 0.80
elif high_hits == 1:
score = 0.65
elif medium_hits >= 1:
score = 0.35
else:
score = 0.10
# 3. 消息长度加成
length = len(content)
if length > 200:
score += 0.15
elif length > 100:
score += 0.10
elif length > 50:
score += 0.05
# 4. 多句加成(逗号/句号/换行分隔)
sentence_count = len(_SENTENCE_SPLIT_RE.split(content))
if sentence_count >= 4:
score += 0.10
elif sentence_count >= 2:
score += 0.05
# 5. 代码模式加成
if "`" in content or "```" in content:
score += 0.15
if re.search(r"[\{\}\[\]\(\)]", content):
score += 0.05
# 6. 短疑问句扣减(以?或?结尾且长度<30
if self._SHORT_QUESTION_RE.search(content) and len(content) < 30:
score -= 0.10
return max(0.0, min(1.0, score))
class CostAwareRouter:
"""三层成本感知路由器。
Layer 0: 规则匹配(零成本)— @skill: 前缀 / 问候 / 简单对话
Layer 1: 复杂度分类 — heuristic零成本或 LLM~100 tokens
Layer 2: 能力匹配 / 拍卖(可选)— 高复杂度任务委派给最佳 Agent
"""
def __init__(
self,
llm_gateway: Any = None,
model: str = "default",
org_context: Any = None,
auction_enabled: bool = False,
classifier: str = "heuristic",
merged_llm_classify: bool = True,
semantic_router: Any = None, # SemanticRouter | None
expert_team_router: Any = None, # ExpertTeamRouter | None
):
self._llm_gateway = llm_gateway
self._model = model
self._org_context = org_context
self._auction_enabled = auction_enabled
self._classifier = classifier
self._merged_llm_classify = merged_llm_classify
self._semantic_router = semantic_router
self._expert_team_router = expert_team_router
self._auction_house = AuctionHouse() if auction_enabled else None
if classifier not in ("heuristic", "llm"):
raise ValueError(f"Invalid classifier: {classifier!r}, must be 'heuristic' or 'llm'")
self._heuristic = HeuristicClassifier()
# -- Layer 0: Rule-based (zero cost) ------------------------------------
def _match_layer0(self, content: str) -> tuple[str | None, str]:
"""Layer 0 规则匹配。
Returns:
(match_type, clean_content) — match_type 为 None 表示未命中。
"""
# @skill: 显式前缀
explicit_skill, clean = parse_skill_prefix(content)
if explicit_skill:
return "explicit_skill", clean
# 问候模式
stripped = content.strip()
if _GREETING_RE.match(stripped):
return "greeting", stripped
# 简单对话模式
if _CHAT_MODE_RE.match(stripped):
return "chat_mode", stripped
# 身份/元问题模式("你是谁"等)— 零成本直接对话
if _IDENTITY_RE.match(stripped):
return "identity", stripped
return None, stripped
# -- Layer 1: LLM quick classify (~100 tokens) -------------------------
async def quick_classify(self, content: str) -> float:
"""使用 LLM 快速评估用户请求的复杂度 (0.0-1.0)。
当 LLM Gateway 不可用或解析失败时,返回默认中等复杂度 0.5。
"""
if self._llm_gateway is None:
return 0.5
prompt = (
"You are a complexity classifier. Rate the complexity of the user request on a scale of 0.0 to 1.0.\n"
"0.0 = trivial greeting, 0.3 = simple question, 0.5 = moderate task, "
"0.7 = complex multi-step task, 1.0 = very complex research task.\n\n"
"---BEGIN USER REQUEST---\n"
f"{content}\n"
"---END USER REQUEST---\n\n"
'Respond ONLY with a JSON object: {"complexity": <float>}'
)
try:
response = await self._llm_gateway.chat(
messages=[{"role": "user", "content": prompt}],
model=self._model,
)
data = json.loads(response.content.strip())
complexity = float(data.get("complexity", 0.5))
return max(0.0, min(1.0, complexity))
except Exception as e:
logger.warning(f"CostAwareRouter quick_classify failed: {e}")
return 0.5
# -- Layer 1.5: Merged LLM classify (complexity + intent in one call) ---
async def _classify_merged(
self,
content: str,
skill_registry: Any,
intent_router: Any,
default_tools: list,
default_system_prompt: str | None,
default_model: str,
default_agent_name: str,
agent_tool_registry: Any = None,
session_id: str = "",
complexity: float = 0.5,
) -> SkillRoutingResult:
"""合并 LLM 调用:单次 LLM 同时输出 complexity + intent + skill_hint。
当 HeuristicClassifier 返回不确定区间 (0.3-0.7) 时使用,
替代分别调用 quick_classify() 和 IntentRouter._classify_with_llm()
节省 1 次 LLM 调用 (~1-3s)。
"""
if self._llm_gateway is None or not self._merged_llm_classify:
# Fallback: 使用独立的 IntentRouter 路由
return await resolve_skill_routing(
content=content,
skill_registry=skill_registry,
intent_router=intent_router,
default_tools=default_tools,
default_system_prompt=default_system_prompt,
default_model=default_model,
default_agent_name=default_agent_name,
agent_tool_registry=agent_tool_registry,
session_id=session_id,
)
# Build skill list for the prompt
skill_hints = []
if skill_registry:
try:
for s in skill_registry.list_skills():
if s.config.intent and s.config.intent.keywords:
skill_hints.append(s.name)
except Exception:
pass
skill_list_str = ", ".join(skill_hints) if skill_hints else "none"
prompt = (
"You are a routing classifier. Analyze the user request and output:\n"
"1. complexity (0.0-1.0): how complex is this request\n"
"2. intent: the primary intent category\n"
"3. skill_hint: the best matching skill name, or null if none match\n\n"
f"Available skills: [{skill_list_str}]\n\n"
"---BEGIN USER REQUEST---\n"
f"{content}\n"
"---END USER REQUEST---\n\n"
'Respond ONLY with a JSON object: {"complexity": <float>, "intent": <string>, "skill_hint": <string|null>}'
)
try:
response = await self._llm_gateway.chat(
messages=[{"role": "user", "content": prompt}],
model=self._model,
)
data = json.loads(response.content.strip())
merged_complexity = float(data.get("complexity", 0.5))
merged_complexity = max(0.0, min(1.0, merged_complexity))
skill_hint = data.get("skill_hint")
# If skill_hint provided and valid, route directly to that skill
if skill_hint and skill_registry:
try:
matched_skill = skill_registry.get(skill_hint)
result = SkillRoutingResult(
clean_content=content,
skill_name=skill_hint,
skill_config=matched_skill.config,
skill_tools=matched_skill.tools or [],
matched=True,
match_method="merged_llm",
match_confidence=0.7,
complexity=merged_complexity,
execution_mode=ExecutionMode.SKILL_REACT,
)
# Merge tools
agent_tools = (
agent_tool_registry.list_tools() if agent_tool_registry else default_tools
)
seen_names = set()
merged_tools = []
for tool in result.skill_tools + agent_tools:
if tool.name not in seen_names:
seen_names.add(tool.name)
merged_tools.append(tool)
result.tools = merged_tools
result.model = (
result.skill_config.llm.get("model", default_model)
if result.skill_config.llm
else default_model
)
result.agent_name = skill_hint
result.system_prompt = (
build_skill_system_prompt(result.skill_config) or default_system_prompt
)
# Append available tools to system prompt so LLM knows what it can call
if result.tools:
tools_desc = _build_tools_description(result.tools)
tool_instruction = (
"\n\n## Tool Usage\n"
"You have access to the following tools. When you need to use a tool, "
"respond with a tool call in the format specified by the system.\n"
"Never make up information or guess answers when you can use a tool to find the answer.\n"
"Always prefer using tools over guessing.\n"
)
if result.system_prompt:
result.system_prompt += (
f"{tool_instruction}\n## Available Tools\n{tools_desc}"
)
logger.info(
f"Session {session_id}: merged LLM classify routed to skill '{skill_hint}' "
f"(complexity={merged_complexity:.2f})"
)
return result
except Exception as e:
logger.warning(
f"Session {session_id}: merged LLM skill_hint '{skill_hint}' not found: {e}"
)
# No valid skill_hint — use complexity to decide routing
if merged_complexity < 0.3:
return SkillRoutingResult(
clean_content=content,
system_prompt=default_system_prompt,
tools=default_tools,
model=default_model,
agent_name=default_agent_name,
matched=False,
match_method="merged_llm_low",
match_confidence=1.0 - merged_complexity,
complexity=merged_complexity,
execution_mode=ExecutionMode.DIRECT_CHAT,
)
elif merged_complexity > 0.7:
# High complexity — delegate to Layer 2
return SkillRoutingResult(
clean_content=content,
system_prompt=default_system_prompt,
tools=default_tools,
model=default_model,
agent_name=default_agent_name,
matched=False,
match_method="merged_llm_high",
match_confidence=merged_complexity,
complexity=merged_complexity,
execution_mode=ExecutionMode.REACT,
)
else:
# Medium complexity, no skill match — default agent
return SkillRoutingResult(
clean_content=content,
system_prompt=default_system_prompt,
tools=default_tools,
model=default_model,
agent_name=default_agent_name,
matched=False,
match_method="merged_llm_medium",
match_confidence=0.5,
complexity=merged_complexity,
execution_mode=ExecutionMode.REACT,
)
except (json.JSONDecodeError, TypeError, ValueError) as e:
logger.warning(
f"CostAwareRouter _classify_merged parse failed: {e}, falling back to default"
)
return SkillRoutingResult(
clean_content=content,
system_prompt=default_system_prompt,
tools=default_tools,
model=default_model,
agent_name=default_agent_name,
matched=False,
match_method="merged_llm_fallback",
match_confidence=0.5,
complexity=0.5,
execution_mode=ExecutionMode.REACT,
)
except Exception as e:
logger.warning(f"CostAwareRouter _classify_merged failed: {e}, falling back to default")
return SkillRoutingResult(
clean_content=content,
system_prompt=default_system_prompt,
tools=default_tools,
model=default_model,
agent_name=default_agent_name,
matched=False,
match_method="merged_llm_fallback",
match_confidence=0.5,
complexity=0.5,
execution_mode=ExecutionMode.REACT,
)
# -- Layer 2: Capability matching / Auction (optional) -----------------
def _try_team_upgrade(
self,
result: SkillRoutingResult,
content: str,
complexity: float,
trace: list[dict] | None,
) -> SkillRoutingResult:
"""Attempt to upgrade REACT → TEAM_COLLAB when complexity is high and experts are available."""
if (
result.execution_mode == ExecutionMode.REACT
and complexity >= 0.7
and self._expert_team_router is not None
):
try:
if self._expert_team_router.can_handle(content):
team_result = self._expert_team_router.resolve(content, complexity)
if team_result.team_mode:
result.execution_mode = ExecutionMode.TEAM_COLLAB
if trace is not None:
trace.append(
{
"layer": 2,
"method": "team_upgrade",
"from_mode": "REACT",
"to_mode": "TEAM_COLLAB",
"team_match_method": team_result.match_method,
"complexity": complexity,
}
)
except Exception as e:
logger.warning(f"CostAwareRouter team upgrade check failed: {e}")
return result
async def _route_layer2(
self,
content: str,
skill_registry: Any,
intent_router: Any,
default_tools: list,
default_system_prompt: str | None,
default_model: str,
default_agent_name: str,
agent_tool_registry: Any = None,
session_id: str = "",
complexity: float = 0.0,
trace: list[dict] | None = None,
) -> SkillRoutingResult:
"""Layer 2: 高复杂度任务通过拍卖或 org_context.find_best_agent 路由。"""
# Extract capability-like keywords from content for matching
content_words = _tokenize_content(content)
# --- Vickrey auction path (when enabled) ---
if (
self._auction_enabled
and self._auction_house is not None
and self._org_context is not None
):
try:
# Gather candidate agents from org_context
all_agents = (
self._org_context.list_agents()
if hasattr(self._org_context, "list_agents")
else []
)
# Filter agents that have at least one relevant capability
candidate_agents = []
for agent_profile in all_agents:
if not agent_profile.availability:
continue
# Check if agent has any of the content_words as capabilities
agent_caps_lower = {c.lower() for c in agent_profile.capabilities}
if any(w.lower() in agent_caps_lower for w in content_words):
candidate_agents.append(agent_profile)
# Also include agents that match via find_best_agent (they have ALL required caps)
best = self._org_context.find_best_agent(required_capabilities=content_words)
if best is not None:
best_name = best if isinstance(best, str) else getattr(best, "name", str(best))
existing_names = {a.name for a in candidate_agents}
if best_name not in existing_names:
profile = (
self._org_context.get_agent_profile(best_name)
if hasattr(self._org_context, "get_agent_profile")
else best
)
if hasattr(profile, "name"):
candidate_agents.append(profile)
if len(candidate_agents) >= 1:
# Build Bid objects for each candidate
bids = []
for agent_profile in candidate_agents:
name = (
agent_profile.name
if hasattr(agent_profile, "name")
else str(agent_profile)
)
caps = (
agent_profile.capabilities
if hasattr(agent_profile, "capabilities")
else []
)
arch = (
agent_profile.agent_type
if hasattr(agent_profile, "agent_type")
else "react"
)
# Use current_load as a proxy for estimated_cost (higher load → higher cost)
estimated_cost = (
float(agent_profile.current_load + 1)
if hasattr(agent_profile, "current_load")
else 1.0
)
bids.append(
Bid(
agent_name=name,
architecture=arch,
estimated_steps=1,
estimated_cost=estimated_cost,
confidence=0.8,
payment_offer=estimated_cost,
capabilities=caps,
)
)
auction_result = await self._auction_house.run_vickrey_auction(
task_description=content,
bidders=bids,
required_capabilities=content_words,
)
if auction_result.winner is not None:
winner_name = auction_result.winner.agent_name
result = SkillRoutingResult(
clean_content=content,
matched=True,
match_method="vickrey_auction",
match_confidence=0.8,
agent_name=winner_name,
model=default_model,
system_prompt=default_system_prompt,
tools=default_tools,
complexity=complexity,
execution_mode=ExecutionMode.REACT,
)
if trace is not None:
trace.append(
{
"layer": 2,
"method": "vickrey_auction",
"agent_name": winner_name,
"complexity": complexity,
"selection_reason": auction_result.selection_reason,
}
)
return self._try_team_upgrade(result, content, complexity, trace)
# No winner from auction → fall through to capability matching
except Exception as e:
logger.warning(f"CostAwareRouter Layer 2 Vickrey auction failed: {e}")
# --- Capability matching path (default) ---
if self._org_context is not None and hasattr(self._org_context, "find_best_agent"):
try:
best_agent = self._org_context.find_best_agent(required_capabilities=content_words)
if best_agent is not None:
agent_name = (
best_agent
if isinstance(best_agent, str)
else getattr(best_agent, "name", str(best_agent))
)
result = SkillRoutingResult(
clean_content=content,
matched=True,
match_method="capability",
match_confidence=0.8,
agent_name=agent_name,
model=default_model,
system_prompt=default_system_prompt,
tools=default_tools,
complexity=complexity,
execution_mode=ExecutionMode.REACT,
)
if trace is not None:
trace.append(
{
"layer": 2,
"method": "capability",
"agent_name": agent_name,
"complexity": complexity,
}
)
return self._try_team_upgrade(result, content, complexity, trace)
except Exception as e:
logger.warning(f"CostAwareRouter Layer 2 org_context.find_best_agent failed: {e}")
# Fallback: 使用 IntentRouter
result = await resolve_skill_routing(
content=content,
skill_registry=skill_registry,
intent_router=intent_router,
default_tools=default_tools,
default_system_prompt=default_system_prompt,
default_model=default_model,
default_agent_name=default_agent_name,
agent_tool_registry=agent_tool_registry,
session_id=session_id,
)
result.complexity = complexity
if trace is not None:
trace.append(
{
"layer": 2,
"method": "intent_router_fallback",
"complexity": complexity,
}
)
return self._try_team_upgrade(result, content, complexity, trace)
# -- Main entry point ---------------------------------------------------
async def route(
self,
content: str,
skill_registry: Any,
intent_router: Any,
default_tools: list,
default_system_prompt: str | None,
default_model: str = "default",
default_agent_name: str = "default",
agent_tool_registry: Any = None,
session_id: str = "",
transparency: str = "SILENT",
) -> SkillRoutingResult:
"""三层成本感知路由主入口。
Args:
content: 用户输入内容
skill_registry: Skill 注册表
intent_router: IntentRouter 实例
default_tools: 默认工具列表
default_system_prompt: 默认系统提示词
default_model: 默认模型
default_agent_name: 默认 Agent 名称
agent_tool_registry: Agent 工具注册表
session_id: 会话 ID
transparency: 透明度级别 (SILENT / VERBOSE / TRACE)
Returns:
SkillRoutingResult 包含路由结果和追踪信息
"""
trace: list[dict] = []
tracer = get_tracer()
with tracer.start_span("router.route") as span:
span.set_attribute("input.length", len(content))
# ---- Layer 0: Rule-based (zero cost) ----
match_type, clean_content = self._match_layer0(content)
if match_type == "explicit_skill":
result = await resolve_skill_routing(
content=content,
skill_registry=skill_registry,
intent_router=intent_router,
default_tools=default_tools,
default_system_prompt=default_system_prompt,
default_model=default_model,
default_agent_name=default_agent_name,
agent_tool_registry=agent_tool_registry,
session_id=session_id,
)
result.match_method = result.match_method or "explicit_skill"
result.complexity = 0.0
trace.append(
{
"layer": 0,
"method": "explicit_skill",
"matched": result.matched,
"cost": "zero",
}
)
result.execution_trace = trace if transparency != "SILENT" else []
result.transparency_level = transparency
span.set_attribute("route.layer", result.match_method or "explicit_skill")
span.set_attribute("route.target", result.skill_name or "default")
return result
if match_type in ("greeting", "chat_mode", "identity"):
result = SkillRoutingResult(
clean_content=clean_content,
system_prompt=default_system_prompt,
tools=default_tools,
model=default_model,
agent_name=default_agent_name,
matched=False,
match_method=match_type,
match_confidence=1.0,
complexity=0.0,
execution_mode=ExecutionMode.DIRECT_CHAT,
)
trace.append(
{
"layer": 0,
"method": match_type,
"matched": False,
"cost": "zero",
}
)
result.execution_trace = trace if transparency != "SILENT" else []
result.transparency_level = transparency
span.set_attribute("route.layer", match_type)
span.set_attribute("route.target", "default")
return result
# ---- Layer 1: Complexity classification ----
if self._classifier == "heuristic":
complexity = self._heuristic.classify(clean_content)
trace.append(
{
"layer": 1,
"method": "heuristic_classify",
"complexity": complexity,
}
)
else:
complexity = await self.quick_classify(clean_content)
trace.append(
{
"layer": 1,
"method": "quick_classify",
"complexity": complexity,
}
)
# Low complexity → direct chat
if complexity < 0.3:
result = SkillRoutingResult(
clean_content=clean_content,
system_prompt=default_system_prompt,
tools=default_tools,
model=default_model,
agent_name=default_agent_name,
matched=False,
match_method="low_complexity",
match_confidence=1.0 - complexity,
complexity=complexity,
execution_mode=ExecutionMode.DIRECT_CHAT,
)
trace.append(
{
"layer": 1,
"method": "low_complexity",
"complexity": complexity,
"routed_to": "default",
}
)
result.execution_trace = trace if transparency != "SILENT" else []
result.transparency_level = transparency
span.set_attribute("route.layer", "low_complexity")
span.set_attribute("route.target", "default")
return result
# ---- Layer 1.5: Semantic Router (zero LLM cost) ----
skill_hint = None
if self._semantic_router is not None and complexity >= 0.3:
try:
semantic_result = await self._semantic_router.route(clean_content)
if semantic_result.confidence == "high" and semantic_result.skill_name:
# Direct skill match — skip Layer 2
trace.append(
{
"layer": 1.5,
"method": "semantic_high",
"skill": semantic_result.skill_name,
"similarity": round(semantic_result.similarity, 3),
"cost": "zero",
}
)
result = await resolve_skill_routing(
content=content,
skill_registry=skill_registry,
intent_router=intent_router,
default_tools=default_tools,
default_system_prompt=default_system_prompt,
default_model=default_model,
default_agent_name=default_agent_name,
agent_tool_registry=agent_tool_registry,
session_id=session_id,
force_skill=semantic_result.skill_name,
)
result.match_method = "semantic_high"
result.match_confidence = semantic_result.similarity
result.complexity = complexity
if result.matched:
result.execution_mode = ExecutionMode.SKILL_REACT
result.execution_trace = trace if transparency != "SILENT" else []
result.transparency_level = transparency
span.set_attribute("route.layer", "semantic_high")
span.set_attribute("route.target", result.skill_name or "default")
return result
elif semantic_result.confidence == "medium" and semantic_result.skill_name:
# Pass skill hint to Layer 1.5 merged classify or Layer 2
skill_hint = semantic_result.skill_name
trace.append(
{
"layer": 1.5,
"method": "semantic_medium",
"skill_hint": skill_hint,
"similarity": round(semantic_result.similarity, 3),
}
)
except Exception as e:
logger.warning(f"Semantic routing failed, falling through: {e}")
trace.append(
{
"layer": 1.5,
"method": "semantic_error",
"error": str(e),
}
)
# Medium complexity → merged LLM classify or IntentRouter
if complexity <= 0.7:
if self._merged_llm_classify and self._llm_gateway is not None:
# Use merged LLM call: complexity + intent in one call
result = await self._classify_merged(
content=content,
skill_registry=skill_registry,
intent_router=intent_router,
default_tools=default_tools,
default_system_prompt=default_system_prompt,
default_model=default_model,
default_agent_name=default_agent_name,
agent_tool_registry=agent_tool_registry,
session_id=session_id,
complexity=complexity,
)
# If merged classify returned high complexity, delegate to Layer 2
if (
result.complexity > 0.7
and result.match_method
and result.match_method.startswith("merged_llm_high")
):
trace.append(
{
"layer": 1,
"method": "merged_llm_high",
"complexity": result.complexity,
"delegated_to_layer2": True,
}
)
layer2_result = await self._route_layer2(
content=content,
skill_registry=skill_registry,
intent_router=intent_router,
default_tools=default_tools,
default_system_prompt=default_system_prompt,
default_model=default_model,
default_agent_name=default_agent_name,
agent_tool_registry=agent_tool_registry,
session_id=session_id,
complexity=result.complexity,
trace=trace,
)
layer2_result.execution_trace = trace if transparency != "SILENT" else []
layer2_result.transparency_level = transparency
return layer2_result
else:
# Fallback: use separate IntentRouter
result = await resolve_skill_routing(
content=content,
skill_registry=skill_registry,
intent_router=intent_router,
default_tools=default_tools,
default_system_prompt=default_system_prompt,
default_model=default_model,
default_agent_name=default_agent_name,
agent_tool_registry=agent_tool_registry,
session_id=session_id,
)
result.complexity = result.complexity if result.complexity > 0 else complexity
trace.append(
{
"layer": 1,
"method": result.match_method or "merged_llm",
"complexity": result.complexity,
"matched": result.matched,
}
)
result.execution_trace = trace if transparency != "SILENT" else []
result.transparency_level = transparency
span.set_attribute("route.layer", result.match_method or "merged_llm")
span.set_attribute("route.target", result.skill_name or "default")
return result
# ---- Layer 2: Capability matching / Auction (high complexity) ----
trace.append(
{
"layer": 2,
"method": "capability_or_auction",
"complexity": complexity,
"auction_enabled": self._auction_enabled,
}
)
result = await self._route_layer2(
content=content,
skill_registry=skill_registry,
intent_router=intent_router,
default_tools=default_tools,
default_system_prompt=default_system_prompt,
default_model=default_model,
default_agent_name=default_agent_name,
agent_tool_registry=agent_tool_registry,
session_id=session_id,
complexity=complexity,
trace=trace,
)
result.execution_trace = trace if transparency != "SILENT" else []
result.transparency_level = transparency
span.set_attribute("route.layer", result.match_method or "capability")
span.set_attribute("route.target", result.skill_name or result.agent_name or "default")
return result