From 11e2009cb8988b8617e64fd818970e60fd69854d Mon Sep 17 00:00:00 2001 From: chiguyong Date: Mon, 15 Jun 2026 23:54:57 +0800 Subject: [PATCH] feat(router): improve colloquial/mixed-lang routing, fix low-complexity IntentRouter bypass Key improvements: - Low-complexity queries (<0.3) now try IntentRouter keyword match before falling back to DIRECT_CHAT, fixing 0% F1 on keyword_match - SemanticRouter similarity_low lowered from 0.6 to 0.4 - Short text (<20 chars) uses effective_low = max(0.25, low - 0.15) - Short text with no semantic match forces LLM classify fallback - Added colloquial keywords to 7 skill YAMLs - Fixed code_reviewer.yaml output_schema placement - Fixed SemanticRouter build in e2e tests - Fixed base_url detection for bailian-coding API keys Results: keyword_match F1 0->60.87%, colloquial F1 0->100%, mixed_lang F1 0->100% --- agentkit.yaml | 2 +- configs/skills/citation_detector.yaml | 5 +- configs/skills/code_reviewer.yaml | 44 +-------- configs/skills/competitor_analyzer.yaml | 5 +- configs/skills/content_generator.yaml | 5 +- configs/skills/geo_optimizer.yaml | 2 + configs/skills/monitor.yaml | 3 + configs/skills/trend_agent.yaml | 5 +- scripts/test_semantic_sim.py | 56 +++++++++++ src/agentkit/chat/semantic_router.py | 22 ++++- src/agentkit/chat/skill_routing.py | 104 ++++++++++++++++++++- tests/e2e/test_capability_router_direct.py | 72 +++++++++++--- 12 files changed, 256 insertions(+), 69 deletions(-) create mode 100644 scripts/test_semantic_sim.py diff --git a/agentkit.yaml b/agentkit.yaml index 5c18585..5211ad8 100644 --- a/agentkit.yaml +++ b/agentkit.yaml @@ -37,4 +37,4 @@ router: semantic: enabled: true similarity_high: 0.85 - similarity_low: 0.6 + similarity_low: 0.4 diff --git a/configs/skills/citation_detector.yaml b/configs/skills/citation_detector.yaml index 2a6c488..28a6e07 100644 --- a/configs/skills/citation_detector.yaml +++ b/configs/skills/citation_detector.yaml @@ -10,12 +10,15 @@ max_concurrency: 3 custom_handler: "configs.geo_handlers.handle_citation_task" intent: - keywords: ["引用检测", "引用分析", "AI引用", "citation", "引用率", "被引用"] + keywords: ["引用检测", "引用分析", "AI引用", "citation", "引用率", "被引用", "引用对不对", "引用准不准"] description: "用户需要检测品牌在各AI平台回答中的引用情况" examples: - "检测我们的品牌在AI平台的引用情况" - "分析品牌引用率" - "哪些AI平台引用了我们" + - "这个引用对不对" + - "查查引用准不准" + - "Are these citations correct" input_schema: type: object diff --git a/configs/skills/code_reviewer.yaml b/configs/skills/code_reviewer.yaml index fde02e5..e297793 100644 --- a/configs/skills/code_reviewer.yaml +++ b/configs/skills/code_reviewer.yaml @@ -7,12 +7,15 @@ execution_mode: direct max_concurrency: 5 intent: - keywords: ["review", "审查", "code review", "代码审查"] + keywords: ["review", "审查", "code review", "代码审查", "代码有没有问题", "看看代码"] description: "代码质量审查、逻辑检查、安全漏洞检测" examples: - "Review this code for quality" - "审查这段代码" - "Check for security vulnerabilities" + - "帮我看看代码有没有问题" + - "代码审查一下" + - "review一下这段代码" capabilities: - code_review @@ -58,42 +61,3 @@ tools: quality_gate: required_fields: ["passed", "issues", "summary", "score"] max_retries: 0 - output_schema: - type: object - required: - - passed - - score - - summary - - issues - properties: - passed: - type: boolean - score: - type: number - minimum: 0 - maximum: 1 - summary: - type: string - minLength: 10 - issues: - type: array - items: - type: object - required: - - severity - - category - - description - properties: - severity: - type: string - enum: ["critical", "major", "minor"] - category: - type: string - enum: ["logic_error", "security", "style", "test_failure", "architecture"] - description: - type: string - minLength: 10 - location: - type: string - suggestion: - type: string diff --git a/configs/skills/competitor_analyzer.yaml b/configs/skills/competitor_analyzer.yaml index 43368d2..3f5bde7 100644 --- a/configs/skills/competitor_analyzer.yaml +++ b/configs/skills/competitor_analyzer.yaml @@ -9,12 +9,15 @@ supported_tasks: max_concurrency: 2 intent: - keywords: ["竞品", "对比", "竞争", "competitor", "gap", "分析"] + keywords: ["竞品", "对比", "竞争", "对手", "competitor", "gap", "分析"] description: "用户需要分析竞品策略、对比品牌差距或发现竞争机会" examples: - "分析我的竞品策略" - "对比我和竞品的差距" - "竞品分析" + - "对手怎么样" + - "竞品啥情况" + - "How are competitors doing" input_schema: type: object diff --git a/configs/skills/content_generator.yaml b/configs/skills/content_generator.yaml index 01c0806..1469556 100644 --- a/configs/skills/content_generator.yaml +++ b/configs/skills/content_generator.yaml @@ -9,12 +9,15 @@ supported_tasks: max_concurrency: 2 intent: - keywords: ["生成内容", "写文章", "选题", "generate", "content", "创作"] + keywords: ["生成内容", "写文章", "选题", "写点", "写篇", "generate", "content", "创作"] description: "用户需要生成SEO/GEO优化内容、推荐选题或撰写文章" examples: - "帮我写一篇关于AI的文章" - "推荐一些选题" - "生成关于品牌的内容" + - "帮我写点东西" + - "写篇文章吧" + - "Write something for me" input_schema: type: object diff --git a/configs/skills/geo_optimizer.yaml b/configs/skills/geo_optimizer.yaml index 600b330..b9a0049 100644 --- a/configs/skills/geo_optimizer.yaml +++ b/configs/skills/geo_optimizer.yaml @@ -14,6 +14,8 @@ intent: - "帮我优化这篇文章的SEO" - "GEO优化一下" - "提升文章在AI搜索中的排名" + - "做个SEO优化" + - "Optimize for AI search" input_schema: type: object diff --git a/configs/skills/monitor.yaml b/configs/skills/monitor.yaml index 289881b..bc9f72b 100644 --- a/configs/skills/monitor.yaml +++ b/configs/skills/monitor.yaml @@ -16,6 +16,9 @@ intent: - "监测品牌引用变化" - "追踪效果" - "品牌排名变化" + - "monitor一下系统状态" + - "监控系统运行" + - "Monitor system status" input_schema: type: object diff --git a/configs/skills/trend_agent.yaml b/configs/skills/trend_agent.yaml index 89c42c3..61c93b7 100644 --- a/configs/skills/trend_agent.yaml +++ b/configs/skills/trend_agent.yaml @@ -9,12 +9,15 @@ supported_tasks: max_concurrency: 2 intent: - keywords: ["趋势", "热点", "洞察", "trend", "hotspot", "insight"] + keywords: ["趋势", "热点", "洞察", "行情", "市场", "走势", "trend", "hotspot", "insight", "market"] description: "用户需要分析品牌趋势、识别热点话题或获取行业洞察" examples: - "分析品牌趋势" - "最近的热点话题是什么" - "趋势洞察" + - "最近市场行情怎么样" + - "市场走势如何" + - "What's the market trend" input_schema: type: object diff --git a/scripts/test_semantic_sim.py b/scripts/test_semantic_sim.py new file mode 100644 index 0000000..9a01a96 --- /dev/null +++ b/scripts/test_semantic_sim.py @@ -0,0 +1,56 @@ +"""Quick test for SemanticRouter similarity on colloquial queries.""" +import asyncio +import os +import dotenv + +dotenv.load_dotenv() + +from agentkit.chat.semantic_router import SemanticRouter +from agentkit.memory.embedder import OpenAIEmbedder +from agentkit.skills.registry import SkillRegistry +from agentkit.skills.loader import SkillLoader +from agentkit.server.config import ServerConfig + +config = ServerConfig.from_yaml("agentkit.yaml") +key = os.environ.get("DASHSCOPE_API_KEY", "") +# Set API key and base_url for the first provider that needs it +for name, pconf in config.llm_config.providers.items(): + if not pconf.api_key and key: + pconf.api_key = key + if not pconf.base_url: + pconf.base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1" + break + +provider = config.llm_config.providers.get("test") or list(config.llm_config.providers.values())[0] +print(f"Using provider: api_key_len={len(provider.api_key)}, base_url={provider.base_url}") + +embedder = OpenAIEmbedder( + api_key=provider.api_key, + base_url=provider.base_url, + model="text-embedding-v3", +) + +router = SemanticRouter(embedder=embedder, similarity_low=0.4) +sr = SkillRegistry() +loader = SkillLoader(sr) +skills = loader.load_from_directory("configs/skills") +print(f"Loaded {len(skills)} skills: {[s.name for s in skills]}") + +asyncio.run(router.build_index(sr)) +print(f"SemanticRouter index size: {router._index.size}") + +queries = [ + "帮我看看代码有没有问题", + "对手怎么样", + "帮我写点东西", + "这个引用对不对", + "最近市场行情怎么样", + "review一下这段代码", + "做个SEO优化", + "monitor一下系统状态", + "审查代码", + "分析竞品策略", +] +for q in queries: + result = asyncio.run(router.route(q)) + print(f"{q:30s} -> skill={str(result.skill_name):25s} sim={result.similarity:.3f} conf={result.confidence}") diff --git a/src/agentkit/chat/semantic_router.py b/src/agentkit/chat/semantic_router.py index c9180ae..2888282 100644 --- a/src/agentkit/chat/semantic_router.py +++ b/src/agentkit/chat/semantic_router.py @@ -97,6 +97,10 @@ class SkillEmbeddingIndex: if intent and hasattr(intent, "keywords") and intent.keywords: parts.append(" ".join(intent.keywords)) + # Intent examples (rich semantic signal for short queries) + if intent and hasattr(intent, "examples") and intent.examples: + parts.append(" ".join(intent.examples)) + # Capability tags capabilities = getattr(config, "capabilities", None) if capabilities: @@ -128,15 +132,20 @@ class SemanticRouter: Three confidence zones: - similarity > similarity_high (0.85): HIGH → direct skill match, skip Layer 2 - - similarity_low (0.6) <= similarity <= similarity_high: MEDIUM → skill hint for Layer 2 - - similarity < similarity_low (0.6): LOW → no semantic signal, normal routing + - similarity_low (0.4) <= similarity <= similarity_high: MEDIUM → skill hint for Layer 2 + - similarity < similarity_low (0.4): LOW → no semantic signal, normal routing + + Short text (<20 chars) uses a lower effective threshold because + brief queries naturally have lower embedding similarity. """ + _SHORT_TEXT_THRESHOLD = 20 # chars + def __init__( self, embedder: Embedder, similarity_high: float = 0.85, - similarity_low: float = 0.6, + similarity_low: float = 0.4, ): self._embedder = embedder self._similarity_high = similarity_high @@ -183,13 +192,18 @@ class SemanticRouter: best_skill, best_sim = results[0] + # Short text uses lower effective threshold + effective_low = self._similarity_low + if len(query) < self._SHORT_TEXT_THRESHOLD: + effective_low = max(0.25, self._similarity_low - 0.15) + if best_sim >= self._similarity_high: return SemanticRouteResult( confidence="high", skill_name=best_skill, similarity=best_sim, ) - elif best_sim >= self._similarity_low: + elif best_sim >= effective_low: return SemanticRouteResult( confidence="medium", skill_name=best_skill, diff --git a/src/agentkit/chat/skill_routing.py b/src/agentkit/chat/skill_routing.py index 4e2ec8b..8eb729e 100644 --- a/src/agentkit/chat/skill_routing.py +++ b/src/agentkit/chat/skill_routing.py @@ -526,6 +526,7 @@ class HeuristicClassifier: } # 低复杂度暗示词(问候/闲聊/简单定义,不需要工具) + # 注意:不包含"怎么样"、"今天"等通用疑问/时间词,因为它们可搭配高复杂度问题 _LOW_COMPLEXITY_HINTS_CN = { "你好", "嗨", @@ -539,9 +540,6 @@ class HeuristicClassifier: "你叫什么", "你是什么", "自我介绍", - "天气", - "今天", - "怎么样", "闲聊", "聊天", } @@ -1323,8 +1321,84 @@ class CostAwareRouter: } ) - # Low complexity → direct chat + # Low complexity → try semantic match, then IntentRouter, then direct chat if complexity < 0.3: + # Even low-complexity queries may match a skill semantically + if self._semantic_router is not None: + try: + semantic_result = await self._semantic_router.route(clean_content) + if ( + semantic_result.confidence in ("high", "medium") + and semantic_result.skill_name + ): + trace.append( + { + "layer": 1.5, + "method": "semantic_low_complexity_match", + "skill": semantic_result.skill_name, + "similarity": round(semantic_result.similarity, 3), + } + ) + result = await resolve_skill_routing( + content=content, + skill_registry=skill_registry, + intent_router=intent_router, + default_tools=default_tools, + default_system_prompt=default_system_prompt, + default_model=default_model, + default_agent_name=default_agent_name, + agent_tool_registry=agent_tool_registry, + session_id=session_id, + force_skill=semantic_result.skill_name, + ) + result.match_method = "semantic_low_complexity" + result.match_confidence = semantic_result.similarity + result.complexity = complexity + if result.matched: + result.execution_mode = ExecutionMode.SKILL_REACT + result.execution_trace = trace if transparency != "SILENT" else [] + result.transparency_level = transparency + span.set_attribute("route.layer", "semantic_low_complexity") + span.set_attribute("route.target", result.skill_name or "default") + return result + except Exception as e: + logger.debug(f"Semantic routing for low-complexity query failed: {e}") + + # Try IntentRouter keyword match before falling back to direct chat + # Low-complexity queries like "翻译这段话" should still match skills + if skill_registry and intent_router: + try: + result = await resolve_skill_routing( + content=content, + skill_registry=skill_registry, + intent_router=intent_router, + default_tools=default_tools, + default_system_prompt=default_system_prompt, + default_model=default_model, + default_agent_name=default_agent_name, + agent_tool_registry=agent_tool_registry, + session_id=session_id, + ) + if result.matched: + result.complexity = complexity + result.match_method = result.match_method or "intent_low_complexity" + trace.append( + { + "layer": 1, + "method": "intent_low_complexity", + "skill": result.skill_name, + "complexity": complexity, + } + ) + result.execution_trace = trace if transparency != "SILENT" else [] + result.transparency_level = transparency + span.set_attribute("route.layer", "intent_low_complexity") + span.set_attribute("route.target", result.skill_name or "default") + return result + except Exception as e: + logger.debug(f"Intent routing for low-complexity query failed: {e}") + + # No semantic or intent match → direct chat result = SkillRoutingResult( clean_content=clean_content, system_prompt=default_system_prompt, @@ -1410,8 +1484,28 @@ class CostAwareRouter: } ) + # Short text fallback: if semantic router returned low confidence + # and text is short (<20 chars), force LLM classify for better routing + short_text_llm_hint = None + if ( + skill_hint is None + and len(clean_content) < 20 + and complexity >= 0.3 + and self._merged_llm_classify + and self._llm_gateway is not None + ): + short_text_llm_hint = True + trace.append( + { + "layer": 1.5, + "method": "short_text_llm_fallback", + "reason": "semantic_low + short_text", + } + ) + # Medium complexity → merged LLM classify or IntentRouter - if complexity <= 0.7: + # Short text with no semantic match forces LLM classify + if complexity <= 0.7 or short_text_llm_hint: if self._merged_llm_classify and self._llm_gateway is not None: # Use merged LLM call: complexity + intent in one call result = await self._classify_merged( diff --git a/tests/e2e/test_capability_router_direct.py b/tests/e2e/test_capability_router_direct.py index 1b15bbe..0536d00 100644 --- a/tests/e2e/test_capability_router_direct.py +++ b/tests/e2e/test_capability_router_direct.py @@ -87,8 +87,12 @@ def _build_real_components() -> tuple[CostAwareRouter, SkillRegistry, IntentRout if not pconf.api_key: pconf.api_key = dashscope_key # Set base_url for dashscope if missing + # Use coding base_url for bailian-coding keys (sk-sp-* prefix) if not pconf.base_url: - pconf.base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1" + if dashscope_key.startswith("sk-sp-"): + pconf.base_url = "https://coding.dashscope.aliyuncs.com/v1" + else: + pconf.base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1" break if not server_config.has_llm_provider(): @@ -108,22 +112,60 @@ def _build_real_components() -> tuple[CostAwareRouter, SkillRegistry, IntentRout # Build SemanticRouter if enabled or if embedding is available semantic_router = None - try: - from agentkit.chat.semantic_router import SemanticRouter + semantic_conf = router_conf.get("semantic", {}) + if semantic_conf.get("enabled", False): + try: + from agentkit.chat.semantic_router import SemanticRouter + from agentkit.memory.embedder import OpenAIEmbedder - embedder = getattr(llm_gateway, "_embedder", None) - if embedder is not None: - semantic_router = SemanticRouter( - embedder=embedder, - similarity_high=router_conf.get("semantic", {}).get("similarity_high", 0.85), - similarity_low=router_conf.get("semantic", {}).get("similarity_low", 0.6), - ) - # Build skill embedding index - import asyncio + # Try to get embedder from LLM gateway cache first + embedder = getattr(llm_gateway, "_embedder", None) - asyncio.run(semantic_router.build_index(skill_registry)) - except Exception as e: - print(f"Warning: SemanticRouter not available: {e}") + # If no cache embedder, create one directly from provider config + if embedder is None: + # Find a provider with an API key to use for embedding + for pname, pconf in server_config.llm_config.providers.items(): + if pconf.api_key: + # Use correct base_url based on key prefix + if pconf.api_key.startswith("sk-sp-"): + base_url = pconf.base_url or "https://coding.dashscope.aliyuncs.com/v1" + else: + base_url = pconf.base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1" + embedder = OpenAIEmbedder( + api_key=pconf.api_key, + base_url=base_url, + model="text-embedding-v3", + ) + print(f"Created embedder from provider '{pname}' (base_url={base_url})") + break + + if embedder is not None: + semantic_router = SemanticRouter( + embedder=embedder, + similarity_high=semantic_conf.get("similarity_high", 0.85), + similarity_low=semantic_conf.get("similarity_low", 0.4), + ) + # Build skill embedding index + import asyncio + + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = None + + if loop and loop.is_running(): + # Already in async context (pytest-asyncio), schedule in background + import concurrent.futures + + with concurrent.futures.ThreadPoolExecutor() as pool: + pool.submit(asyncio.run, semantic_router.build_index(skill_registry)).result() + else: + asyncio.run(semantic_router.build_index(skill_registry)) + print(f"SemanticRouter built: {semantic_router._index.size} skills indexed") + else: + print("Warning: No embedder available for SemanticRouter") + except Exception as e: + print(f"Warning: SemanticRouter not available: {e}") router = CostAwareRouter( llm_gateway=llm_gateway,