feat(router): improve colloquial/mixed-lang routing, fix low-complexity IntentRouter bypass

Key improvements:
- Low-complexity queries (<0.3) now try IntentRouter keyword match
  before falling back to DIRECT_CHAT, fixing 0% F1 on keyword_match
- SemanticRouter similarity_low lowered from 0.6 to 0.4
- Short text (<20 chars) uses effective_low = max(0.25, low - 0.15)
- Short text with no semantic match forces LLM classify fallback
- Added colloquial keywords to 7 skill YAMLs
- Fixed code_reviewer.yaml output_schema placement
- Fixed SemanticRouter build in e2e tests
- Fixed base_url detection for bailian-coding API keys

Results: keyword_match F1 0->60.87%, colloquial F1 0->100%, mixed_lang F1 0->100%
This commit is contained in:
chiguyong 2026-06-15 23:54:57 +08:00
parent fa2a6dece2
commit 11e2009cb8
12 changed files with 256 additions and 69 deletions

View File

@ -37,4 +37,4 @@ router:
semantic:
enabled: true
similarity_high: 0.85
similarity_low: 0.6
similarity_low: 0.4

View File

@ -10,12 +10,15 @@ max_concurrency: 3
custom_handler: "configs.geo_handlers.handle_citation_task"
intent:
keywords: ["引用检测", "引用分析", "AI引用", "citation", "引用率", "被引用"]
keywords: ["引用检测", "引用分析", "AI引用", "citation", "引用率", "被引用", "引用对不对", "引用准不准"]
description: "用户需要检测品牌在各AI平台回答中的引用情况"
examples:
- "检测我们的品牌在AI平台的引用情况"
- "分析品牌引用率"
- "哪些AI平台引用了我们"
- "这个引用对不对"
- "查查引用准不准"
- "Are these citations correct"
input_schema:
type: object

View File

@ -7,12 +7,15 @@ execution_mode: direct
max_concurrency: 5
intent:
keywords: ["review", "审查", "code review", "代码审查"]
keywords: ["review", "审查", "code review", "代码审查", "代码有没有问题", "看看代码"]
description: "代码质量审查、逻辑检查、安全漏洞检测"
examples:
- "Review this code for quality"
- "审查这段代码"
- "Check for security vulnerabilities"
- "帮我看看代码有没有问题"
- "代码审查一下"
- "review一下这段代码"
capabilities:
- code_review
@ -58,42 +61,3 @@ tools:
quality_gate:
required_fields: ["passed", "issues", "summary", "score"]
max_retries: 0
output_schema:
type: object
required:
- passed
- score
- summary
- issues
properties:
passed:
type: boolean
score:
type: number
minimum: 0
maximum: 1
summary:
type: string
minLength: 10
issues:
type: array
items:
type: object
required:
- severity
- category
- description
properties:
severity:
type: string
enum: ["critical", "major", "minor"]
category:
type: string
enum: ["logic_error", "security", "style", "test_failure", "architecture"]
description:
type: string
minLength: 10
location:
type: string
suggestion:
type: string

View File

@ -9,12 +9,15 @@ supported_tasks:
max_concurrency: 2
intent:
keywords: ["竞品", "对比", "竞争", "competitor", "gap", "分析"]
keywords: ["竞品", "对比", "竞争", "对手", "competitor", "gap", "分析"]
description: "用户需要分析竞品策略、对比品牌差距或发现竞争机会"
examples:
- "分析我的竞品策略"
- "对比我和竞品的差距"
- "竞品分析"
- "对手怎么样"
- "竞品啥情况"
- "How are competitors doing"
input_schema:
type: object

View File

@ -9,12 +9,15 @@ supported_tasks:
max_concurrency: 2
intent:
keywords: ["生成内容", "写文章", "选题", "generate", "content", "创作"]
keywords: ["生成内容", "写文章", "选题", "写点", "写篇", "generate", "content", "创作"]
description: "用户需要生成SEO/GEO优化内容、推荐选题或撰写文章"
examples:
- "帮我写一篇关于AI的文章"
- "推荐一些选题"
- "生成关于品牌的内容"
- "帮我写点东西"
- "写篇文章吧"
- "Write something for me"
input_schema:
type: object

View File

@ -14,6 +14,8 @@ intent:
- "帮我优化这篇文章的SEO"
- "GEO优化一下"
- "提升文章在AI搜索中的排名"
- "做个SEO优化"
- "Optimize for AI search"
input_schema:
type: object

View File

@ -16,6 +16,9 @@ intent:
- "监测品牌引用变化"
- "追踪效果"
- "品牌排名变化"
- "monitor一下系统状态"
- "监控系统运行"
- "Monitor system status"
input_schema:
type: object

View File

@ -9,12 +9,15 @@ supported_tasks:
max_concurrency: 2
intent:
keywords: ["趋势", "热点", "洞察", "trend", "hotspot", "insight"]
keywords: ["趋势", "热点", "洞察", "行情", "市场", "走势", "trend", "hotspot", "insight", "market"]
description: "用户需要分析品牌趋势、识别热点话题或获取行业洞察"
examples:
- "分析品牌趋势"
- "最近的热点话题是什么"
- "趋势洞察"
- "最近市场行情怎么样"
- "市场走势如何"
- "What's the market trend"
input_schema:
type: object

View File

@ -0,0 +1,56 @@
"""Quick test for SemanticRouter similarity on colloquial queries."""
import asyncio
import os
import dotenv
dotenv.load_dotenv()
from agentkit.chat.semantic_router import SemanticRouter
from agentkit.memory.embedder import OpenAIEmbedder
from agentkit.skills.registry import SkillRegistry
from agentkit.skills.loader import SkillLoader
from agentkit.server.config import ServerConfig
config = ServerConfig.from_yaml("agentkit.yaml")
key = os.environ.get("DASHSCOPE_API_KEY", "")
# Set API key and base_url for the first provider that needs it
for name, pconf in config.llm_config.providers.items():
if not pconf.api_key and key:
pconf.api_key = key
if not pconf.base_url:
pconf.base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
break
provider = config.llm_config.providers.get("test") or list(config.llm_config.providers.values())[0]
print(f"Using provider: api_key_len={len(provider.api_key)}, base_url={provider.base_url}")
embedder = OpenAIEmbedder(
api_key=provider.api_key,
base_url=provider.base_url,
model="text-embedding-v3",
)
router = SemanticRouter(embedder=embedder, similarity_low=0.4)
sr = SkillRegistry()
loader = SkillLoader(sr)
skills = loader.load_from_directory("configs/skills")
print(f"Loaded {len(skills)} skills: {[s.name for s in skills]}")
asyncio.run(router.build_index(sr))
print(f"SemanticRouter index size: {router._index.size}")
queries = [
"帮我看看代码有没有问题",
"对手怎么样",
"帮我写点东西",
"这个引用对不对",
"最近市场行情怎么样",
"review一下这段代码",
"做个SEO优化",
"monitor一下系统状态",
"审查代码",
"分析竞品策略",
]
for q in queries:
result = asyncio.run(router.route(q))
print(f"{q:30s} -> skill={str(result.skill_name):25s} sim={result.similarity:.3f} conf={result.confidence}")

View File

@ -97,6 +97,10 @@ class SkillEmbeddingIndex:
if intent and hasattr(intent, "keywords") and intent.keywords:
parts.append(" ".join(intent.keywords))
# Intent examples (rich semantic signal for short queries)
if intent and hasattr(intent, "examples") and intent.examples:
parts.append(" ".join(intent.examples))
# Capability tags
capabilities = getattr(config, "capabilities", None)
if capabilities:
@ -128,15 +132,20 @@ class SemanticRouter:
Three confidence zones:
- similarity > similarity_high (0.85): HIGH direct skill match, skip Layer 2
- similarity_low (0.6) <= similarity <= similarity_high: MEDIUM skill hint for Layer 2
- similarity < similarity_low (0.6): LOW no semantic signal, normal routing
- similarity_low (0.4) <= similarity <= similarity_high: MEDIUM skill hint for Layer 2
- similarity < similarity_low (0.4): LOW no semantic signal, normal routing
Short text (<20 chars) uses a lower effective threshold because
brief queries naturally have lower embedding similarity.
"""
_SHORT_TEXT_THRESHOLD = 20 # chars
def __init__(
self,
embedder: Embedder,
similarity_high: float = 0.85,
similarity_low: float = 0.6,
similarity_low: float = 0.4,
):
self._embedder = embedder
self._similarity_high = similarity_high
@ -183,13 +192,18 @@ class SemanticRouter:
best_skill, best_sim = results[0]
# Short text uses lower effective threshold
effective_low = self._similarity_low
if len(query) < self._SHORT_TEXT_THRESHOLD:
effective_low = max(0.25, self._similarity_low - 0.15)
if best_sim >= self._similarity_high:
return SemanticRouteResult(
confidence="high",
skill_name=best_skill,
similarity=best_sim,
)
elif best_sim >= self._similarity_low:
elif best_sim >= effective_low:
return SemanticRouteResult(
confidence="medium",
skill_name=best_skill,

View File

@ -526,6 +526,7 @@ class HeuristicClassifier:
}
# 低复杂度暗示词(问候/闲聊/简单定义,不需要工具)
# 注意:不包含"怎么样"、"今天"等通用疑问/时间词,因为它们可搭配高复杂度问题
_LOW_COMPLEXITY_HINTS_CN = {
"你好",
"",
@ -539,9 +540,6 @@ class HeuristicClassifier:
"你叫什么",
"你是什么",
"自我介绍",
"天气",
"今天",
"怎么样",
"闲聊",
"聊天",
}
@ -1323,8 +1321,84 @@ class CostAwareRouter:
}
)
# Low complexity → direct chat
# Low complexity → try semantic match, then IntentRouter, then direct chat
if complexity < 0.3:
# Even low-complexity queries may match a skill semantically
if self._semantic_router is not None:
try:
semantic_result = await self._semantic_router.route(clean_content)
if (
semantic_result.confidence in ("high", "medium")
and semantic_result.skill_name
):
trace.append(
{
"layer": 1.5,
"method": "semantic_low_complexity_match",
"skill": semantic_result.skill_name,
"similarity": round(semantic_result.similarity, 3),
}
)
result = await resolve_skill_routing(
content=content,
skill_registry=skill_registry,
intent_router=intent_router,
default_tools=default_tools,
default_system_prompt=default_system_prompt,
default_model=default_model,
default_agent_name=default_agent_name,
agent_tool_registry=agent_tool_registry,
session_id=session_id,
force_skill=semantic_result.skill_name,
)
result.match_method = "semantic_low_complexity"
result.match_confidence = semantic_result.similarity
result.complexity = complexity
if result.matched:
result.execution_mode = ExecutionMode.SKILL_REACT
result.execution_trace = trace if transparency != "SILENT" else []
result.transparency_level = transparency
span.set_attribute("route.layer", "semantic_low_complexity")
span.set_attribute("route.target", result.skill_name or "default")
return result
except Exception as e:
logger.debug(f"Semantic routing for low-complexity query failed: {e}")
# Try IntentRouter keyword match before falling back to direct chat
# Low-complexity queries like "翻译这段话" should still match skills
if skill_registry and intent_router:
try:
result = await resolve_skill_routing(
content=content,
skill_registry=skill_registry,
intent_router=intent_router,
default_tools=default_tools,
default_system_prompt=default_system_prompt,
default_model=default_model,
default_agent_name=default_agent_name,
agent_tool_registry=agent_tool_registry,
session_id=session_id,
)
if result.matched:
result.complexity = complexity
result.match_method = result.match_method or "intent_low_complexity"
trace.append(
{
"layer": 1,
"method": "intent_low_complexity",
"skill": result.skill_name,
"complexity": complexity,
}
)
result.execution_trace = trace if transparency != "SILENT" else []
result.transparency_level = transparency
span.set_attribute("route.layer", "intent_low_complexity")
span.set_attribute("route.target", result.skill_name or "default")
return result
except Exception as e:
logger.debug(f"Intent routing for low-complexity query failed: {e}")
# No semantic or intent match → direct chat
result = SkillRoutingResult(
clean_content=clean_content,
system_prompt=default_system_prompt,
@ -1410,8 +1484,28 @@ class CostAwareRouter:
}
)
# Short text fallback: if semantic router returned low confidence
# and text is short (<20 chars), force LLM classify for better routing
short_text_llm_hint = None
if (
skill_hint is None
and len(clean_content) < 20
and complexity >= 0.3
and self._merged_llm_classify
and self._llm_gateway is not None
):
short_text_llm_hint = True
trace.append(
{
"layer": 1.5,
"method": "short_text_llm_fallback",
"reason": "semantic_low + short_text",
}
)
# Medium complexity → merged LLM classify or IntentRouter
if complexity <= 0.7:
# Short text with no semantic match forces LLM classify
if complexity <= 0.7 or short_text_llm_hint:
if self._merged_llm_classify and self._llm_gateway is not None:
# Use merged LLM call: complexity + intent in one call
result = await self._classify_merged(

View File

@ -87,7 +87,11 @@ def _build_real_components() -> tuple[CostAwareRouter, SkillRegistry, IntentRout
if not pconf.api_key:
pconf.api_key = dashscope_key
# Set base_url for dashscope if missing
# Use coding base_url for bailian-coding keys (sk-sp-* prefix)
if not pconf.base_url:
if dashscope_key.startswith("sk-sp-"):
pconf.base_url = "https://coding.dashscope.aliyuncs.com/v1"
else:
pconf.base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
break
@ -108,20 +112,58 @@ def _build_real_components() -> tuple[CostAwareRouter, SkillRegistry, IntentRout
# Build SemanticRouter if enabled or if embedding is available
semantic_router = None
semantic_conf = router_conf.get("semantic", {})
if semantic_conf.get("enabled", False):
try:
from agentkit.chat.semantic_router import SemanticRouter
from agentkit.memory.embedder import OpenAIEmbedder
# Try to get embedder from LLM gateway cache first
embedder = getattr(llm_gateway, "_embedder", None)
# If no cache embedder, create one directly from provider config
if embedder is None:
# Find a provider with an API key to use for embedding
for pname, pconf in server_config.llm_config.providers.items():
if pconf.api_key:
# Use correct base_url based on key prefix
if pconf.api_key.startswith("sk-sp-"):
base_url = pconf.base_url or "https://coding.dashscope.aliyuncs.com/v1"
else:
base_url = pconf.base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
embedder = OpenAIEmbedder(
api_key=pconf.api_key,
base_url=base_url,
model="text-embedding-v3",
)
print(f"Created embedder from provider '{pname}' (base_url={base_url})")
break
if embedder is not None:
semantic_router = SemanticRouter(
embedder=embedder,
similarity_high=router_conf.get("semantic", {}).get("similarity_high", 0.85),
similarity_low=router_conf.get("semantic", {}).get("similarity_low", 0.6),
similarity_high=semantic_conf.get("similarity_high", 0.85),
similarity_low=semantic_conf.get("similarity_low", 0.4),
)
# Build skill embedding index
import asyncio
try:
loop = asyncio.get_running_loop()
except RuntimeError:
loop = None
if loop and loop.is_running():
# Already in async context (pytest-asyncio), schedule in background
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor() as pool:
pool.submit(asyncio.run, semantic_router.build_index(skill_registry)).result()
else:
asyncio.run(semantic_router.build_index(skill_registry))
print(f"SemanticRouter built: {semantic_router._index.size} skills indexed")
else:
print("Warning: No embedder available for SemanticRouter")
except Exception as e:
print(f"Warning: SemanticRouter not available: {e}")