diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..6cfb638
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,15 @@
+# AgentKit 生产环境变量清单
+# 此文件仅作为 Gitea Secrets 配置参考，不会自动加载
+# 实际部署时由 Gitea Actions workflow 从 Secrets 注入到 /opt/agentkit/repo/.env
+
+# ===== 数据库密码（必填，通过 Gitea Secrets 配置）=====
+POSTGRES_PASSWORD=change-me-to-strong-password
+REDIS_PASSWORD=change-me-to-strong-password
+
+# ===== 应用密钥（必填，用于外部系统调用 API 的鉴权）=====
+AGENTKIT_API_KEY=change-me-to-strong-api-key
+
+# ===== LLM Provider API Keys =====
+# 不在此配置！部署完成后通过 Web UI Settings 页面配置：
+#   http://8.153.107.96:8001 → Settings → LLM
+# 配置后自动写入 agentkit.yaml 和 .env
diff --git a/.gitea/workflows/deploy.yml b/.gitea/workflows/deploy.yml
new file mode 100644
index 0000000..6b54e51
--- /dev/null
+++ b/.gitea/workflows/deploy.yml
@@ -0,0 +1,83 @@
+name: Deploy to Production
+
+# 触发条件：推送到主干分支 或 手动触发
+on:
+  push:
+    branches: [main, master]
+  workflow_dispatch:
+
+env:
+  DEPLOY_DIR: /opt/agentkit
+  REPO_DIR: /opt/agentkit/repo
+  COMPOSE_FILE: docker-compose.deploy.yaml
+
+jobs:
+  deploy:
+    # 使用自托管 runner（同机部署，host 模式）
+    runs-on: self-hosted
+    timeout-minutes: 15
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Prepare deploy directory
+        run: |
+          sudo mkdir -p "$DEPLOY_DIR" "$REPO_DIR"
+          sudo chown -R "$(id -u):$(id -g)" "$DEPLOY_DIR"
+
+      - name: Sync code to deploy directory
+        run: |
+          rsync -a --delete \
+            --exclude='.git' \
+            --exclude='node_modules' \
+            --exclude='__pycache__' \
+            --exclude='.pytest_cache' \
+            --exclude='*.pyc' \
+            --exclude='.venv' \
+            --exclude='venv' \
+            --exclude='build/' \
+            --exclude='dist/' \
+            --exclude='test-results/' \
+            ./ "$REPO_DIR/"
+
+      - name: Write .env from Gitea Secrets
+        # 仅写入基础设施密码；LLM key 由用户部署后通过 Web UI onboarding 配置
+        # （PUT /api/v1/settings/llm 会写入 agentkit.yaml 和 .env）
+        run: |
+          umask 077
+          cat > "$REPO_DIR/.env" <<EOF
+          # 自动生成，请勿手动编辑
+          # LLM API key 请通过 Web UI Settings 页面配置
+          POSTGRES_PASSWORD=${{ secrets.POSTGRES_PASSWORD }}
+          REDIS_PASSWORD=${{ secrets.REDIS_PASSWORD }}
+          AGENTKIT_API_KEY=${{ secrets.AGENTKIT_API_KEY }}
+          EOF
+
+      - name: Run deploy script
+        working-directory: ${{ env.REPO_DIR }}
+        run: |
+          bash scripts/deploy.sh
+
+      - name: Health check
+        run: |
+          echo "等待服务启动..."
+          for i in $(seq 1 30); do
+            if curl -sf http://localhost:8001/api/v1/health > /dev/null 2>&1; then
+              echo "✅ 服务健康检查通过"
+              curl -s http://localhost:8001/api/v1/health
+              exit 0
+            fi
+            echo "尝试 $i/30: 服务未就绪，等待 5 秒..."
+            sleep 5
+          done
+          echo "❌ 健康检查失败"
+          docker compose -f "$REPO_DIR/$COMPOSE_FILE" logs --tail=100
+          exit 1
+
+      - name: Cleanup old images
+        if: always()
+        run: |
+          docker image prune -f --filter "until=24h" || true
diff --git a/.understand-anything/build_kg.py b/.understand-anything/build_kg.py
deleted file mode 100644
index 07ed2a4..0000000
--- a/.understand-anything/build_kg.py
+++ /dev/null
@@ -1,862 +0,0 @@
-#!/usr/bin/env python3
-"""Knowledge Graph Builder for Fischer AgentKit
-
-Scans all Python source files under src/agentkit/ and configs/,
-extracts classes, functions, imports, and builds a comprehensive
-knowledge graph JSON file.
-"""
-
-import ast
-import json
-import os
-import sys
-import uuid
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any
-
-# Project root
-PROJECT_ROOT = Path("/Users/Chiguyong/Code/Fischer/fischer-agentkit")
-OUTPUT_PATH = PROJECT_ROOT / ".understand-anything" / "knowledge-graph.json"
-
-# Directories to scan
-SCAN_DIRS = [
-    PROJECT_ROOT / "src" / "agentkit",
-    PROJECT_ROOT / "configs",
-]
-
-# Architecture layer mapping
-LAYER_MAP = {
-    "server": "api",
-    "cli": "api",
-    "core": "service",
-    "orchestrator": "service",
-    "skills": "service",
-    "router": "service",
-    "memory": "data",
-    "session": "data",
-    "bus": "data",
-    "llm": "utility",
-    "mcp": "utility",
-    "tools": "utility",
-    "telemetry": "utility",
-    "prompts": "utility",
-    "quality": "utility",
-    "evaluation": "utility",
-    "evolution": "utility",
-    "configs": "utility",
-}
-
-# Chinese summaries for modules
-MODULE_SUMMARIES = {
-    "core": "核心模块 - 定义Agent基类、通信协议、ReAct引擎、任务分发、注册中心等基础组件",
-    "core.base": "Agent基类 - 统一Agent生命周期管理，包括启动、停止、任务执行、Handoff、进度上报",
-    "core.protocol": "通信协议定义 - 统一消息格式，包括TaskMessage、TaskResult、TaskProgress、HandoffMessage等",
-    "core.react": "ReAct推理-行动循环引擎 - 实现Think→Act→Observe循环，支持工具调用和文本解析模式",
-    "core.exceptions": "自定义异常体系 - 定义Agent框架所有异常类型",
-    "core.dispatcher": "任务分发器 - 通过Redis Queue将任务分发给Agent，支持回调、重试、进度上报",
-    "core.registry": "Agent注册中心 - 管理Agent的注册、发现、状态、心跳和负载均衡",
-    "core.config_driven": "配置驱动Agent - 从YAML/Dict配置自动组装Agent，支持llm_generate/tool_call/custom三种模式",
-    "core.compressor": "上下文压缩器 - 长会话自动压缩历史消息，支持LLM摘要和简单截断策略",
-    "core.trace": "执行轨迹记录器 - 记录ReAct执行过程中的完整轨迹，为反思和可观测性提供数据",
-    "core.shared_workspace": "共享工作空间 - 基于Redis的Agent间共享状态存储，支持读写、锁操作",
-    "core.agent_pool": "Agent实例池 - 运行时管理Agent的创建、获取、删除",
-    "core.orchestrator": "多Agent协作编排器 - 实现Orchestrator-Worker模式，支持任务分解、并行执行、自适应编排",
-    "core.headroom_compressor": "Headroom AI压缩器 - 基于Headroom AI的上下文压缩实现",
-    "core.logging": "日志配置 - 统一日志格式和配置",
-    "core.standalone": "独立运行模式 - 支持Agent脱离框架独立运行",
-    "core.goal_planner": "目标规划器 - 将复杂目标分解为可执行步骤",
-    "core.plan_checker": "计划检查器 - 验证执行计划的完整性和可行性",
-    "core.plan_exec_engine": "计划执行引擎 - 执行分解后的计划步骤",
-    "core.plan_executor": "计划执行器 - 管理计划执行的完整流程",
-    "core.plan_schema": "计划Schema - 执行计划的数据结构定义",
-    "core.reflexion": "Reflexion引擎 - 自反思推理，通过自我评估改进输出",
-    "core.rewoo": "ReWOO引擎 - 无观察推理，先规划后执行的高效模式",
-
-    "llm": "LLM网关模块 - 多Provider统一网关，支持OpenAI/Anthropic/Gemini/文心/豆包/元宝等",
-    "llm.gateway": "LLM网关 - 统一多Provider调用接口，支持路由、重试、流式输出",
-    "llm.protocol": "LLM协议定义 - 定义LLMProvider、LLMRequest、LLMResponse等接口",
-    "llm.config": "LLM配置 - 模型别名、Provider配置管理",
-    "llm.retry": "LLM重试策略 - 指数退避重试和错误处理",
-    "llm.providers": "LLM Provider实现 - 各大模型服务商的具体适配器",
-    "llm.providers.openai": "OpenAI Provider - 支持GPT-4/GPT-3.5等模型",
-    "llm.providers.anthropic": "Anthropic Provider - 支持Claude系列模型",
-    "llm.providers.gemini": "Gemini Provider - 支持Google Gemini模型",
-    "llm.providers.wenxin": "文心一言Provider - 支持百度文心大模型",
-    "llm.providers.doubao": "豆包Provider - 支持字节豆包大模型",
-    "llm.providers.yuanbao": "元宝Provider - 支持腾讯元宝大模型",
-    "llm.providers.tracker": "LLM调用追踪器 - 记录和统计LLM调用",
-    "llm.providers.usage_store": "LLM用量存储 - Token用量和成本追踪，支持InMemory和Redis后端",
-    "llm.cache": "LLM响应缓存 - 基于语义相似度的LLM响应缓存，减少重复调用",
-    "llm.cache_key": "缓存键生成 - LLM缓存键的计算和归一化",
-
-    "chat": "聊天路由模块 - CostAwareRouter三层意图路由和语义路由",
-    "chat.skill_routing": "三层意图路由 - CostAwareRouter，正则→启发式→LLM分类逐层升级",
-    "chat.semantic_router": "语义路由 - 基于向量相似度的意图路由，支持语义匹配",
-
-    "quality.cascade_detector": "级联检测器 - 检测Agent输出中的级联失败模式",
-    "quality.cascade_state_store": "级联状态存储 - 级联检测状态持久化，支持InMemory和Redis后端",
-    "quality.alignment": "对齐守卫 - 检测和修正Agent输出中的对齐偏差",
-
-    "tools": "工具模块 - 提供Agent可调用的各类工具",
-    "tools.base": "工具基类 - 定义Tool接口和标准执行流程",
-    "tools.registry": "工具注册中心 - 管理工具的注册、发现、获取",
-    "tools.shell": "Shell工具 - 执行系统命令",
-    "tools.web_search": "Web搜索工具 - 执行网络搜索",
-    "tools.web_crawl": "Web爬取工具 - 爬取网页内容",
-    "tools.memory_tool": "记忆工具 - Agent记忆读写操作",
-    "tools.ask_human": "人工介入工具 - 请求人类输入",
-    "tools.schema_tools": "Schema工具 - JSON Schema相关操作",
-    "tools.function_tool": "函数工具 - 将Python函数包装为Tool",
-    "tools.agent_tool": "Agent工具 - 将Agent包装为可调用Tool",
-    "tools.mcp_tool": "MCP工具 - MCP协议工具适配器",
-    "tools.composition": "工具组合 - 支持工具链式组合",
-    "tools.baidu_search": "百度搜索工具 - 百度搜索引擎集成",
-    "tools.headroom_retrieve": "Headroom检索工具 - Headroom AI知识检索",
-    "tools.computer_use": "计算机使用工具 - 桌面操控工具，支持截图、点击、输入等操作",
-    "tools.computer_use_session": "计算机使用会话 - 桌面操控会话管理，支持云端和本地(pyautogui)模式",
-    "tools.computer_use_recorder": "计算机使用录制器 - 记录桌面操控动作序列",
-    "tools.pty_session": "PTY会话 - 伪终端会话管理",
-    "tools.terminal_session": "终端会话 - 终端模拟器会话",
-    "tools.output_parser": "输出解析器 - 解析Agent输出为结构化数据",
-    "tools.skill_install": "技能安装器 - 动态安装技能包",
-
-    "memory": "记忆模块 - 多层记忆系统，支持工作记忆、情景记忆、语义记忆",
-    "memory.base": "记忆基类 - 定义Memory接口",
-    "memory.working": "工作记忆 - 基于Redis的短期工作记忆",
-    "memory.episodic": "情景记忆 - 基于向量数据库的长期情景记忆",
-    "memory.semantic": "语义记忆 - 基于RAG服务的语义知识检索",
-    "memory.profile": "用户画像 - 用户偏好和历史信息管理",
-    "memory.retriever": "记忆检索器 - 统一多层记忆检索接口",
-    "memory.embedder": "嵌入器 - 文本向量化，支持OpenAI Embedding",
-    "memory.models": "记忆数据模型 - Pydantic模型定义",
-    "memory.rag_loop": "RAG循环 - 检索增强生成的迭代循环",
-    "memory.query_transformer": "查询转换器 - 优化检索查询",
-    "memory.relevance_scorer": "相关性评分器 - 评估检索结果相关性",
-    "memory.contextual_retrieval": "上下文检索 - 基于上下文的检索增强",
-    "memory.http_rag": "HTTP RAG服务 - 远程RAG API客户端",
-
-    "skills": "技能模块 - 定义可复用的Agent技能，包含意图、工具和质量门控",
-    "skills.base": "技能基类 - 定义Skill、SkillConfig、IntentConfig等",
-    "skills.registry": "技能注册中心 - 管理技能的注册、发现、获取",
-    "skills.loader": "技能加载器 - 从YAML配置加载技能定义",
-    "skills.pipeline": "技能Pipeline - 技能编排流程",
-    "skills.skill_md": "Markdown技能 - 从Markdown文档生成技能",
-    "skills.geo_pipeline": "GEO Pipeline - 地理信息处理Pipeline",
-
-    "orchestrator": "编排模块 - Pipeline编排引擎，支持DAG工作流",
-    "orchestrator.pipeline_engine": "Pipeline引擎 - 执行DAG定义的工作流",
-    "orchestrator.pipeline_schema": "Pipeline Schema - Pipeline配置模型定义",
-    "orchestrator.pipeline_state": "Pipeline状态 - Pipeline执行状态管理",
-    "orchestrator.pipeline_models": "Pipeline模型 - Pipeline数据模型",
-    "orchestrator.pipeline_loader": "Pipeline加载器 - 从YAML加载Pipeline定义",
-    "orchestrator.reflection": "反思模块 - 执行后反思和改进",
-    "orchestrator.retry": "重试策略 - Pipeline步骤重试机制",
-    "orchestrator.compensation": "补偿机制 - Pipeline失败时的补偿操作",
-    "orchestrator.handoff": "Handoff - Agent间任务转交",
-    "orchestrator.dynamic_pipeline": "动态Pipeline - 运行时动态构建Pipeline",
-
-    "router": "路由模块 - 意图路由，将用户输入匹配到对应技能",
-    "router.intent": "意图路由器 - 基于LLM的意图识别和路由",
-
-    "quality": "质量模块 - 输出质量门控和标准化",
-    "quality.gate": "质量门控 - 检查Agent输出是否满足质量要求",
-    "quality.output": "输出标准化 - 统一Agent输出格式",
-
-    "prompts": "Prompt模块 - Prompt模板和渲染",
-    "prompts.template": "Prompt模板 - 支持变量替换和Section组合",
-    "prompts.section": "Prompt Section - 定义Prompt的各组成部分",
-
-    "bus": "消息总线模块 - Agent间异步通信",
-    "bus.protocol": "总线协议 - 定义消息总线接口",
-    "bus.message": "消息定义 - Agent间通信消息格式",
-    "bus.memory_bus": "内存消息总线 - 基于进程内队列的消息总线",
-    "bus.redis_bus": "Redis消息总线 - 基于Redis Pub/Sub的消息总线",
-
-    "session": "会话模块 - 会话管理和持久化",
-    "session.manager": "会话管理器 - 管理对话会话的创建、获取、更新",
-    "session.store": "会话存储 - 会话数据的持久化存储",
-    "session.models": "会话模型 - 会话相关的数据模型",
-
-    "server": "服务器模块 - FastAPI HTTP/WebSocket服务",
-    "server.app": "FastAPI应用 - 创建和配置FastAPI应用实例",
-    "server.config": "服务器配置 - 服务器运行参数配置",
-    "server.runner": "服务器运行器 - 启动和管理服务器进程",
-    "server.middleware": "中间件 - 请求处理中间件",
-    "server.client": "API客户端 - 服务端API客户端封装",
-    "server.client_config": "客户端配置 - API客户端配置管理",
-    "server.task_store": "任务存储 - 服务端任务状态存储",
-    "server.routes": "路由模块 - HTTP/WebSocket路由定义",
-    "server.routes.chat": "聊天路由 - 对话API端点",
-    "server.routes.ws": "WebSocket路由 - 实时通信端点",
-    "server.routes.tasks": "任务路由 - 任务管理API",
-    "server.routes.agents": "Agent路由 - Agent管理API",
-    "server.routes.skills": "技能路由 - 技能管理API，含@-mention建议端点",
-    "server.routes.memory": "记忆路由 - 记忆管理API",
-    "server.routes.llm": "LLM路由 - LLM配置和调用API",
-    "server.routes.health": "健康检查路由 - 服务健康状态端点",
-    "server.routes.metrics": "指标路由 - 运行指标API",
-    "server.routes.evolution": "进化路由 - Agent进化管理API",
-    "server.routes.evolution_dashboard": "进化仪表盘路由 - 进化数据可视化API",
-    "server.routes.kb_management": "知识库管理路由 - 文档上传/搜索/源配置API",
-    "server.routes.settings": "设置路由 - 系统配置管理API",
-    "server.routes.terminal": "终端路由 - PTY终端会话API",
-    "server.routes.workflows": "工作流路由 - Pipeline工作流管理API",
-    "server.routes.skill_management": "技能管理路由 - 技能CRUD操作API",
-    "server.routes.portal": "门户路由 - Web GUI入口和静态资源",
-
-    "cli": "命令行模块 - CLI工具",
-    "cli.main": "CLI入口 - Typer应用主入口",
-    "cli.chat": "聊天命令 - 交互式对话命令",
-    "cli.init": "初始化命令 - 项目初始化",
-    "cli.onboarding": "引导命令 - 新用户引导流程",
-    "cli.skill": "技能命令 - 技能管理CLI",
-    "cli.task": "任务命令 - 任务提交和管理CLI",
-    "cli.pair": "配对命令 - Agent配对",
-    "cli.usage": "使用统计命令 - 使用情况统计",
-    "cli.templates": "模板命令 - Agent模板管理",
-
-    "mcp": "MCP协议模块 - Model Context Protocol集成",
-    "mcp.client": "MCP客户端 - 连接MCP服务器",
-    "mcp.server": "MCP服务器 - 提供MCP服务",
-    "mcp.manager": "MCP管理器 - 管理MCP连接",
-    "mcp.transport": "MCP传输层 - MCP通信传输实现",
-
-    "telemetry": "遥测模块 - 可观测性支持",
-    "telemetry.tracing": "分布式追踪 - OpenTelemetry追踪集成",
-    "telemetry.metrics": "指标收集 - 运行指标收集和导出",
-    "telemetry.setup": "遥测设置 - 初始化遥测组件",
-
-    "evolution": "进化模块 - Agent自我进化能力",
-    "evolution.lifecycle": "进化生命周期 - EvolutionMixin，任务后触发进化",
-    "evolution.reflector": "反思器 - 分析任务执行结果，生成改进建议",
-    "evolution.llm_reflector": "LLM反思器 - 使用LLM进行深度反思",
-    "evolution.prompt_optimizer": "Prompt优化器 - 自动优化Agent Prompt",
-    "evolution.strategy_tuner": "策略调优器 - 调整Agent执行策略",
-    "evolution.genetic": "遗传算法 - 基于遗传算法的Prompt进化",
-    "evolution.fitness": "适应度评估 - 评估进化变体的质量",
-    "evolution.ab_tester": "A/B测试 - 对比测试不同进化变体",
-    "evolution.evolution_store": "进化存储 - 持久化进化历史",
-    "evolution.models": "进化模型 - 进化相关数据模型",
-    "evolution.experience_schema": "经验Schema - 经验数据结构定义",
-    "evolution.experience_store": "经验存储 - 成功/失败经验持久化",
-    "evolution.path_optimizer": "路径优化器 - 分析工具调用路径，推荐更优策略",
-    "evolution.pitfall_detector": "陷阱检测器 - 检测重复错误模式",
-
-    "evaluation": "评估模块 - Agent输出质量评估",
-    "evaluation.ragas_evaluator": "RAGAS评估器 - 使用RAGAS框架评估RAG质量",
-
-    "org": "组织发现模块 - 多Agent组织架构和协作发现",
-    "org.context": "组织上下文 - 组织级别的共享上下文管理",
-    "org.discovery": "组织发现 - Agent间能力发现和协作匹配",
-
-    "marketplace": "多Agent市场模块 - Agent间的拍卖和财富分配",
-    "marketplace.auction": "拍卖机制 - Agent间的任务拍卖和竞价",
-    "marketplace.wealth": "财富管理 - Agent间的价值交换和分配",
-
-    "configs": "配置模块 - Pipeline和技能YAML配置",
-    "configs.geo_server": "GEO服务器 - 地理信息HTTP服务",
-    "configs.geo_handlers": "GEO处理器 - 地理信息请求处理",
-    "configs.geo_tools": "GEO工具 - 地理信息相关工具定义",
-}
-
-
-def get_layer(file_path: str) -> str:
-    """Determine architecture layer from file path."""
-    parts = file_path.replace("\\", "/").split("/")
-    # Check for configs/ prefix
-    if "configs" in parts:
-        return "utility"
-    # For src/agentkit/__init__.py and __main__.py, treat as service
-    if parts[-1] in ("__init__.py", "__main__.py") and len(parts) <= 4:
-        return "service"
-    for part in parts:
-        if part in LAYER_MAP:
-            return LAYER_MAP[part]
-    return "unknown"
-
-
-def get_module_key(file_path: str) -> str:
-    """Get module key for summary lookup."""
-    # Convert file path to module key
-    rel = file_path
-    if rel.startswith("src/agentkit/"):
-        rel = rel[len("src/agentkit/"):]
-    elif rel.startswith("configs/"):
-        rel = rel[len("configs/"):]
-
-    # Remove __init__.py and .py suffix
-    rel = rel.replace("/__init__.py", "").replace(".py", "")
-    return rel
-
-
-def get_file_summary(file_path: str, docstring: str = "") -> str:
-    """Get Chinese summary for a file."""
-    # If we have a docstring, use it as base
-    if docstring:
-        # Clean up docstring
-        doc = docstring.strip().split("\n")[0].strip()
-        if doc:
-            return doc
-
-    key = get_module_key(file_path)
-    # Try exact match first
-    if key in MODULE_SUMMARIES:
-        return MODULE_SUMMARIES[key]
-    # Try parent module
-    parts = key.split("/")
-    for i in range(len(parts) - 1, 0, -1):
-        parent_key = "/".join(parts[:i])
-        if parent_key in MODULE_SUMMARIES:
-            return MODULE_SUMMARIES[parent_key]
-    return f"模块 {key}"
-
-
-def estimate_complexity(node: ast.AST) -> str:
-    """Estimate complexity of an AST node."""
-    if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
-        # Count branches, loops, nested functions
-        complexity = 1
-        for child in ast.walk(node):
-            if isinstance(child, (ast.If, ast.While, ast.For, ast.ExceptHandler)):
-                complexity += 1
-            elif isinstance(child, (ast.And, ast.Or)):
-                complexity += 1
-        if complexity <= 3:
-            return "simple"
-        elif complexity <= 8:
-            return "moderate"
-        return "complex"
-    elif isinstance(node, ast.ClassDef):
-        methods = [n for n in node.body if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef))]
-        if len(methods) <= 3:
-            return "simple"
-        elif len(methods) <= 8:
-            return "moderate"
-        return "complex"
-    return "simple"
-
-
-def extract_class_info(node: ast.ClassDef, file_path: str) -> dict:
-    """Extract class information from AST node."""
-    base_classes = []
-    for base in node.bases:
-        if isinstance(base, ast.Name):
-            base_classes.append(base.id)
-        elif isinstance(base, ast.Attribute):
-            base_classes.append(ast.dump(base))
-
-    methods = []
-    for item in node.body:
-        if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
-            params = [arg.arg for arg in item.args.args if arg.arg != "self"]
-            methods.append({
-                "name": item.name,
-                "params": params,
-                "is_async": isinstance(item, ast.AsyncFunctionDef),
-            })
-
-    # Extract class docstring
-    docstring = ast.get_docstring(node) or ""
-
-    return {
-        "name": node.name,
-        "base_classes": base_classes,
-        "methods": methods,
-        "complexity": estimate_complexity(node),
-        "docstring": docstring,
-    }
-
-
-def extract_function_info(node: ast.FunctionDef | ast.AsyncFunctionDef) -> dict:
-    """Extract function information from AST node."""
-    params = [arg.arg for arg in node.args.args]
-
-    return_type = ""
-    if node.returns:
-        if isinstance(node.returns, ast.Name):
-            return_type = node.returns.id
-        elif isinstance(node.returns, ast.Constant):
-            return_type = str(node.returns.value)
-        else:
-            return_type = ast.dump(node.returns)
-
-    return {
-        "name": node.name,
-        "params": params,
-        "return_type": return_type,
-        "is_async": isinstance(node, ast.AsyncFunctionDef),
-        "complexity": estimate_complexity(node),
-    }
-
-
-def extract_imports(tree: ast.AST, file_path: str) -> list[dict]:
-    """Extract import information from AST."""
-    imports = []
-    for node in ast.walk(tree):
-        if isinstance(node, ast.ImportFrom):
-            if node.module and (node.module.startswith("agentkit") or node.module.startswith("configs")):
-                for alias in node.names:
-                    imports.append({
-                        "from_module": node.module,
-                        "import_name": alias.name,
-                    })
-        elif isinstance(node, ast.Import):
-            for alias in node.names:
-                if alias.name.startswith("agentkit") or alias.name.startswith("configs"):
-                    imports.append({
-                        "from_module": None,
-                        "import_name": alias.name,
-                    })
-    return imports
-
-
-def module_to_file_path(module: str) -> str:
-    """Convert Python module path to file path."""
-    parts = module.split(".")
-
-    # Handle agentkit modules
-    if module.startswith("agentkit"):
-        # Skip "agentkit" prefix, it's under src/
-        sub_parts = parts[1:]  # skip "agentkit"
-        if not sub_parts:
-            return "src/agentkit/__init__.py"
-        # Try as package __init__.py
-        init_path = PROJECT_ROOT / "src" / "agentkit" / "/".join(sub_parts) / "__init__.py"
-        if init_path.exists():
-            return f"src/agentkit/{'/'.join(sub_parts)}/__init__.py"
-        # Try as module.py
-        mod_path = PROJECT_ROOT / "src" / "agentkit" / ("/".join(sub_parts) + ".py")
-        if mod_path.exists():
-            return f"src/agentkit/{'/'.join(sub_parts)}.py"
-
-    # Handle configs modules
-    if module.startswith("configs"):
-        sub_parts = parts[1:]  # skip "configs"
-        if not sub_parts:
-            return "configs/__init__.py"
-        mod_path = PROJECT_ROOT / "configs" / ("/".join(sub_parts) + ".py")
-        if mod_path.exists():
-            return f"configs/{'/'.join(sub_parts)}.py"
-
-    return ""
-
-
-def scan_file(file_path: Path) -> dict:
-    """Scan a single Python file and extract all information."""
-    try:
-        source = file_path.read_text(encoding="utf-8")
-        tree = ast.parse(source)
-    except (SyntaxError, UnicodeDecodeError):
-        return {"classes": [], "functions": [], "imports": [], "top_level_functions": [], "docstring": ""}
-
-    rel_path = str(file_path.relative_to(PROJECT_ROOT))
-
-    # Extract module docstring
-    docstring = ast.get_docstring(tree) or ""
-
-    classes = []
-    functions = []
-    top_level_functions = []
-
-    for node in ast.iter_child_nodes(tree):
-        if isinstance(node, ast.ClassDef):
-            classes.append(extract_class_info(node, rel_path))
-        elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
-            func_info = extract_function_info(node)
-            functions.append(func_info)
-            top_level_functions.append(func_info)
-
-    imports = extract_imports(tree, rel_path)
-
-    return {
-        "classes": classes,
-        "functions": top_level_functions,
-        "imports": imports,
-        "rel_path": rel_path,
-        "docstring": docstring,
-    }
-
-
-def build_knowledge_graph():
-    """Build the complete knowledge graph."""
-    # Collect all Python files
-    py_files = []
-    for scan_dir in SCAN_DIRS:
-        if scan_dir.exists():
-            for py_file in scan_dir.rglob("*.py"):
-                py_files.append(py_file)
-
-    print(f"Found {len(py_files)} Python files to scan")
-
-    # Scan all files
-    file_data = {}
-    for py_file in sorted(py_files):
-        data = scan_file(py_file)
-        rel_path = data["rel_path"]
-        file_data[rel_path] = data
-
-    # Build nodes and edges
-    nodes = []
-    edges = []
-
-    # Track all node IDs for edge building
-    file_node_ids = {}
-    class_node_ids = {}
-    func_node_ids = {}
-
-    # 1. Create file nodes
-    for rel_path, data in file_data.items():
-        node_id = f"file:{rel_path}"
-        layer = get_layer(rel_path)
-        summary = get_file_summary(rel_path, data.get("docstring", ""))
-
-        tags = []
-        parts = rel_path.replace("\\", "/").split("/")
-        for p in parts:
-            if p not in ("src", "agentkit", "__init__.py") and not p.endswith(".py"):
-                tags.append(p)
-
-        nodes.append({
-            "id": node_id,
-            "type": "file",
-            "name": rel_path.split("/")[-1],
-            "filePath": rel_path,
-            "layer": layer,
-            "summary": summary,
-            "tags": tags,
-            "complexity": "moderate" if data["classes"] or data["functions"] else "simple",
-        })
-        file_node_ids[rel_path] = node_id
-
-    # 2. Create class nodes
-    for rel_path, data in file_data.items():
-        for cls in data["classes"]:
-            class_id = f"class:{cls['name']}"
-            layer = get_layer(rel_path)
-
-            method_names = [m["name"] for m in cls["methods"]]
-            # Use docstring for summary if available
-            docstring = cls.get("docstring", "")
-            if docstring:
-                # Take first line of docstring
-                summary = docstring.strip().split("\n")[0].strip()
-            else:
-                summary = f"{cls['name']}类"
-                if cls["base_classes"]:
-                    summary += f"，继承自{', '.join(cls['base_classes'])}"
-                if method_names:
-                    summary += f"，包含方法: {', '.join(method_names[:5])}"
-                    if len(method_names) > 5:
-                        summary += f" 等{len(method_names)}个方法"
-
-            nodes.append({
-                "id": class_id,
-                "type": "class",
-                "name": cls["name"],
-                "filePath": rel_path,
-                "layer": layer,
-                "summary": summary,
-                "tags": [cls["name"]],
-                "complexity": cls["complexity"],
-            })
-            class_node_ids[cls["name"]] = class_id
-
-            # Edge: file contains class
-            edges.append({
-                "id": f"edge:{uuid.uuid4().hex[:8]}",
-                "source": file_node_ids[rel_path],
-                "target": class_id,
-                "type": "contains",
-                "label": f"定义类 {cls['name']}",
-            })
-
-            # Edge: class extends base classes
-            for base in cls["base_classes"]:
-                if base in class_node_ids:
-                    edges.append({
-                        "id": f"edge:{uuid.uuid4().hex[:8]}",
-                        "source": class_id,
-                        "target": class_node_ids[base],
-                        "type": "extends",
-                        "label": f"继承 {base}",
-                    })
-
-            # 3. Create method nodes
-            for method in cls["methods"]:
-                method_id = f"func:{cls['name']}.{method['name']}"
-                async_tag = "异步" if method["is_async"] else ""
-                summary = f"{cls['name']}.{method['name']}({', '.join(method['params'])}) {async_tag}方法"
-
-                nodes.append({
-                    "id": method_id,
-                    "type": "function",
-                    "name": method["name"],
-                    "filePath": rel_path,
-                    "layer": layer,
-                    "summary": summary,
-                    "tags": [cls["name"], method["name"]],
-                    "complexity": "simple",
-                })
-                func_node_ids[f"{cls['name']}.{method['name']}"] = method_id
-
-                # Edge: class contains method
-                edges.append({
-                    "id": f"edge:{uuid.uuid4().hex[:8]}",
-                    "source": class_id,
-                    "target": method_id,
-                    "type": "contains",
-                    "label": f"方法 {method['name']}",
-                })
-
-    # 4. Create top-level function nodes
-    for rel_path, data in file_data.items():
-        for func in data["functions"]:
-            func_id = f"func:{func['name']}"
-            async_tag = "异步" if func["is_async"] else ""
-            summary = f"{func['name']}({', '.join(func['params'])}) {async_tag}函数"
-            if func["return_type"]:
-                summary += f" → {func['return_type']}"
-
-            nodes.append({
-                "id": func_id,
-                "type": "function",
-                "name": func["name"],
-                "filePath": rel_path,
-                "layer": get_layer(rel_path),
-                "summary": summary,
-                "tags": [func["name"]],
-                "complexity": func["complexity"],
-            })
-            func_node_ids[func["name"]] = func_id
-
-            # Edge: file contains function
-            edges.append({
-                "id": f"edge:{uuid.uuid4().hex[:8]}",
-                "source": file_node_ids[rel_path],
-                "target": func_id,
-                "type": "contains",
-                "label": f"定义函数 {func['name']}",
-            })
-
-    # 5. Create import edges
-    for rel_path, data in file_data.items():
-        for imp in data["imports"]:
-            if imp["from_module"]:
-                target_path = module_to_file_path(imp["from_module"])
-                if target_path and target_path in file_node_ids:
-                    edges.append({
-                        "id": f"edge:{uuid.uuid4().hex[:8]}",
-                        "source": file_node_ids[rel_path],
-                        "target": file_node_ids[target_path],
-                        "type": "imports",
-                        "label": f"导入 {imp['import_name']}",
-                    })
-
-    # 6. Build tours
-    tours = build_tours(file_data, file_node_ids, class_node_ids, func_node_ids)
-
-    # Get git commit hash
-    git_hash = "d9d1b16e5911ad958cd8ae38958058bea13f3fcc"
-
-    # Build final JSON
-    graph = {
-        "version": "1.0.0",
-        "project": {
-            "name": "Fischer AgentKit",
-            "languages": ["python"],
-            "frameworks": ["FastAPI", "Pydantic", "SQLAlchemy", "Typer", "Redis"],
-            "description": "AI驱动的Agent框架，支持ReAct引擎、多LLM网关、Pipeline编排、自适应反思和消息总线",
-            "analyzedAt": datetime.now(timezone.utc).isoformat(),
-            "gitCommitHash": git_hash,
-        },
-        "nodes": nodes,
-        "edges": edges,
-        "tours": tours,
-    }
-
-    return graph
-
-
-def build_tours(file_data, file_node_ids, class_node_ids, func_node_ids):
-    """Build guided learning tours."""
-    tours = []
-
-    # Tour 1: Entry Points
-    tours.append({
-        "id": "tour:entry-points",
-        "name": "入口点导览",
-        "description": "从项目入口开始，了解如何启动和使用AgentKit",
-        "steps": [
-            {"nodeId": "file:src/agentkit/__main__.py", "why": "Python模块入口，python -m agentkit"},
-            {"nodeId": "file:src/agentkit/__init__.py", "why": "包入口，导出核心公共API"},
-            {"nodeId": "file:src/agentkit/cli/main.py", "why": "CLI主入口，Typer应用定义"},
-            {"nodeId": "file:src/agentkit/server/app.py", "why": "HTTP服务入口，FastAPI应用创建"},
-        ],
-    })
-
-    # Tour 2: Core Agent Lifecycle
-    tours.append({
-        "id": "tour:agent-lifecycle",
-        "name": "Agent生命周期导览",
-        "description": "深入理解Agent从创建到执行任务的完整生命周期",
-        "steps": [
-            {"nodeId": "class:BaseAgent", "why": "Agent基类，定义标准生命周期和可插拔能力"},
-            {"nodeId": "func:BaseAgent.start", "why": "Agent启动流程：连接Redis→注册→心跳→监听"},
-            {"nodeId": "func:BaseAgent.execute", "why": "任务执行框架方法：on_task_start→handle_task→quality_gate→on_task_complete"},
-            {"nodeId": "func:BaseAgent.handle_task", "why": "抽象方法，子类实现业务逻辑"},
-            {"nodeId": "class:ConfigDrivenAgent", "why": "配置驱动Agent，从YAML自动组装"},
-            {"nodeId": "func:ConfigDrivenAgent.handle_task", "why": "根据execution_mode路由到react/direct/custom模式"},
-            {"nodeId": "class:AgentConfig", "why": "Agent配置模型，支持YAML/Dict构建"},
-        ],
-    })
-
-    # Tour 3: ReAct Engine
-    tours.append({
-        "id": "tour:react-engine",
-        "name": "ReAct引擎导览",
-        "description": "理解ReAct推理-行动循环的核心实现",
-        "steps": [
-            {"nodeId": "class:ReActEngine", "why": "ReAct引擎核心，Think→Act→Observe循环"},
-            {"nodeId": "func:ReActEngine.execute", "why": "执行ReAct循环，支持超时和取消"},
-            {"nodeId": "func:ReActEngine.execute_stream", "why": "流式执行，逐步yield事件"},
-            {"nodeId": "func:ReActEngine._execute_tool", "why": "工具调用执行，处理成功和失败"},
-            {"nodeId": "func:ReActEngine._parse_text_tool_calls", "why": "文本解析模式，支持Action和代码块格式"},
-            {"nodeId": "class:ReActStep", "why": "单步记录数据结构"},
-            {"nodeId": "class:ReActResult", "why": "ReAct执行结果数据结构"},
-            {"nodeId": "class:ReActEvent", "why": "流式执行事件数据结构"},
-        ],
-    })
-
-    # Tour 4: LLM Gateway
-    tours.append({
-        "id": "tour:llm-gateway",
-        "name": "LLM网关导览",
-        "description": "了解多Provider统一网关的设计和实现",
-        "steps": [
-            {"nodeId": "class:LLMGateway", "why": "LLM网关核心，统一多Provider调用接口"},
-            {"nodeId": "file:src/agentkit/llm/protocol.py", "why": "LLM协议定义，LLMProvider/LLMRequest/LLMResponse"},
-            {"nodeId": "file:src/agentkit/llm/config.py", "why": "模型别名和Provider配置"},
-            {"nodeId": "file:src/agentkit/llm/providers/openai.py", "why": "OpenAI Provider实现"},
-            {"nodeId": "file:src/agentkit/llm/providers/anthropic.py", "why": "Anthropic Provider实现"},
-            {"nodeId": "file:src/agentkit/llm/retry.py", "why": "LLM重试策略"},
-        ],
-    })
-
-    # Tour 5: Memory System
-    tours.append({
-        "id": "tour:memory-system",
-        "name": "记忆系统导览",
-        "description": "理解多层记忆系统的架构和实现",
-        "steps": [
-            {"nodeId": "file:src/agentkit/memory/base.py", "why": "记忆基类接口定义"},
-            {"nodeId": "file:src/agentkit/memory/retriever.py", "why": "统一记忆检索器，整合工作/情景/语义记忆"},
-            {"nodeId": "file:src/agentkit/memory/working.py", "why": "工作记忆 - 基于Redis的短期记忆"},
-            {"nodeId": "file:src/agentkit/memory/episodic.py", "why": "情景记忆 - 基于向量的长期记忆"},
-            {"nodeId": "file:src/agentkit/memory/semantic.py", "why": "语义记忆 - RAG服务集成"},
-            {"nodeId": "file:src/agentkit/memory/embedder.py", "why": "文本向量化嵌入器"},
-        ],
-    })
-
-    # Tour 6: Orchestration
-    tours.append({
-        "id": "tour:orchestration",
-        "name": "编排系统导览",
-        "description": "了解多Agent协作编排和Pipeline引擎",
-        "steps": [
-            {"nodeId": "class:Orchestrator", "why": "多Agent协作编排器，Orchestrator-Worker模式"},
-            {"nodeId": "func:Orchestrator.execute", "why": "编排执行：分解→执行→汇总"},
-            {"nodeId": "func:Orchestrator.execute_adaptive", "why": "自适应编排：执行→评估→再分解循环"},
-            {"nodeId": "file:src/agentkit/orchestrator/pipeline_engine.py", "why": "Pipeline引擎，执行DAG工作流"},
-            {"nodeId": "file:src/agentkit/orchestrator/pipeline_schema.py", "why": "Pipeline配置模型"},
-            {"nodeId": "file:src/agentkit/orchestrator/reflection.py", "why": "执行后反思模块"},
-        ],
-    })
-
-    # Tour 7: Skills & Router
-    tours.append({
-        "id": "tour:skills-router",
-        "name": "技能与路由导览",
-        "description": "了解技能定义、注册和意图路由机制",
-        "steps": [
-            {"nodeId": "file:src/agentkit/skills/base.py", "why": "技能基类和配置定义"},
-            {"nodeId": "class:SkillRegistry", "why": "技能注册中心"},
-            {"nodeId": "file:src/agentkit/skills/loader.py", "why": "从YAML加载技能定义"},
-            {"nodeId": "class:IntentRouter", "why": "意图路由器，匹配用户输入到技能"},
-            {"nodeId": "file:src/agentkit/router/intent.py", "why": "意图路由实现"},
-        ],
-    })
-
-    # Tour 8: Evolution
-    tours.append({
-        "id": "tour:evolution",
-        "name": "进化系统导览",
-        "description": "了解Agent自我进化的机制和实现",
-        "steps": [
-            {"nodeId": "file:src/agentkit/evolution/lifecycle.py", "why": "进化生命周期Mixin"},
-            {"nodeId": "file:src/agentkit/evolution/reflector.py", "why": "反思器 - 分析结果生成改进建议"},
-            {"nodeId": "file:src/agentkit/evolution/prompt_optimizer.py", "why": "Prompt自动优化"},
-            {"nodeId": "file:src/agentkit/evolution/genetic.py", "why": "遗传算法进化"},
-            {"nodeId": "file:src/agentkit/evolution/ab_tester.py", "why": "A/B测试对比"},
-        ],
-    })
-
-    # Tour 9: Infrastructure
-    tours.append({
-        "id": "tour:infrastructure",
-        "name": "基础设施导览",
-        "description": "了解消息总线、会话管理、遥测等基础设施",
-        "steps": [
-            {"nodeId": "file:src/agentkit/bus/protocol.py", "why": "消息总线协议接口"},
-            {"nodeId": "file:src/agentkit/bus/redis_bus.py", "why": "Redis Pub/Sub消息总线"},
-            {"nodeId": "file:src/agentkit/bus/memory_bus.py", "why": "进程内消息总线"},
-            {"nodeId": "file:src/agentkit/session/manager.py", "why": "会话管理器"},
-            {"nodeId": "file:src/agentkit/telemetry/tracing.py", "why": "OpenTelemetry追踪集成"},
-            {"nodeId": "file:src/agentkit/telemetry/metrics.py", "why": "运行指标收集"},
-        ],
-    })
-
-    return tours
-
-
-def main():
-    """Main entry point."""
-    print("Building knowledge graph for Fischer AgentKit...")
-
-    graph = build_knowledge_graph()
-
-    # Ensure output directory exists
-    OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
-
-    # Write JSON
-    with open(OUTPUT_PATH, "w", encoding="utf-8") as f:
-        json.dump(graph, f, ensure_ascii=False, indent=2)
-
-    print(f"Knowledge graph written to {OUTPUT_PATH}")
-    print(f"  Nodes: {len(graph['nodes'])}")
-    print(f"  Edges: {len(graph['edges'])}")
-    print(f"  Tours: {len(graph['tours'])}")
-
-    # Print layer statistics
-    layer_counts = {}
-    for node in graph["nodes"]:
-        layer = node["layer"]
-        layer_counts[layer] = layer_counts.get(layer, 0) + 1
-
-    print("\nLayer distribution:")
-    for layer, count in sorted(layer_counts.items()):
-        print(f"  {layer}: {count} nodes")
-
-    # Print type statistics
-    type_counts = {}
-    for node in graph["nodes"]:
-        t = node["type"]
-        type_counts[t] = type_counts.get(t, 0) + 1
-
-    print("\nNode type distribution:")
-    for t, count in sorted(type_counts.items()):
-        print(f"  {t}: {count} nodes")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/.understand-anything/knowledge-graph.json b/.understand-anything/knowledge-graph.json
index b06890d..c4192cf 100644
--- a/.understand-anything/knowledge-graph.json
+++ b/.understand-anything/knowledge-graph.json
@@ -13,8 +13,8 @@
       "Redis"
     ],
     "description": "AI驱动的Agent框架，支持ReAct引擎、多LLM网关、Pipeline编排、自适应反思和消息总线",
-    "analyzedAt": "2026-06-15T06:01:34.043135+00:00",
-    "gitCommitHash": "d9d1b16e5911ad958cd8ae38958058bea13f3fcc"
+    "analyzedAt": "2026-06-17T06:30:20.584048+00:00",
+    "gitCommitHash": "840d1af"
   },
   "nodes": [
     {
@@ -47,7 +47,7 @@
       "name": "geo_server.py",
       "filePath": "configs/geo_server.py",
       "layer": "utility",
-      "summary": "GEO AgentKit Server 启动入口",
+      "summary": "GEO AgentKit Server 启动入口：使用 find_config_path + load_config_with_dotenv 统一配置加载",
       "tags": [
         "configs"
       ],
@@ -199,9 +199,10 @@
       "name": "chat.py",
       "filePath": "src/agentkit/cli/chat.py",
       "layer": "api",
-      "summary": "Chat command — interactive terminal chat with an Agent.",
+      "summary": "Chat 命令：交互式终端聊天，使用 find_config_path + load_config_with_dotenv 统一配置加载链",
       "tags": [
-        "cli"
+        "cli",
+        "chat"
       ],
       "complexity": "moderate"
     },
@@ -223,7 +224,7 @@
       "name": "main.py",
       "filePath": "src/agentkit/cli/main.py",
       "layer": "api",
-      "summary": "AgentKit CLI main entry point",
+      "summary": "AgentKit CLI 主入口：gui/serve 命令使用 find_config_path + load_config_with_dotenv + has_llm_provider 统一配置加载",
       "tags": [
         "cli"
       ],
@@ -235,11 +236,12 @@
       "name": "onboarding.py",
       "filePath": "src/agentkit/cli/onboarding.py",
       "layer": "api",
-      "summary": "Onboarding flow — interactive first-time configuration wizard.",
+      "summary": "Onboarding 交互式首次配置向导：merge-update 模式（不覆盖已有配置）、needs_onboarding() 检查 has_llm_provider()、bailian-coding provider 预设",
       "tags": [
-        "cli"
+        "cli",
+        "onboarding"
       ],
-      "complexity": "moderate"
+      "complexity": "complex"
     },
     {
       "id": "file:src/agentkit/cli/pair.py",
@@ -271,7 +273,7 @@
       "name": "task.py",
       "filePath": "src/agentkit/cli/task.py",
       "layer": "api",
-      "summary": "Task management CLI commands",
+      "summary": "Task 管理 CLI 命令：本地模式使用 find_config_path + load_config_with_dotenv 加载配置",
       "tags": [
         "cli"
       ],
@@ -283,9 +285,10 @@
       "name": "templates.py",
       "filePath": "src/agentkit/cli/templates.py",
       "layer": "api",
-      "summary": "Template files for agentkit init",
+      "summary": "agentkit init 模板文件：bailian-coding 默认 provider、docker-compose 含 postgres+pgvector",
       "tags": [
-        "cli"
+        "cli",
+        "templates"
       ],
       "complexity": "simple"
     },
@@ -1701,9 +1704,11 @@
       "name": "client_config.py",
       "filePath": "src/agentkit/server/client_config.py",
       "layer": "api",
-      "summary": "Client-specific configuration with priority over defaults",
+      "summary": "客户端配置管理：clients.yaml 使用 _deep_resolve 解析 ${VAR} 环境变量引用",
       "tags": [
-        "server"
+        "server",
+        "config",
+        "dotenv"
       ],
       "complexity": "moderate"
     },
@@ -1843,12 +1848,14 @@
       "name": "settings.py",
       "filePath": "src/agentkit/server/routes/settings.py",
       "layer": "api",
-      "summary": "Settings API routes with config hot-reload support.",
+      "summary": "Settings API 路由：LLM/Skills/KB/General 配置的 CRUD，支持 ${VAR} 反向解析保留、.env 写入 API Key、ruamel.yaml 保留注释",
       "tags": [
         "server",
-        "routes"
+        "routes",
+        "settings",
+        "dotenv"
       ],
-      "complexity": "moderate"
+      "complexity": "complex"
     },
     {
       "id": "file:src/agentkit/server/routes/skill_management.py",
@@ -11141,7 +11148,7 @@
       "name": "chat",
       "filePath": "src/agentkit/llm/gateway.py",
       "layer": "utility",
-      "summary": "LLMGateway.chat(messages, model, agent_name, task_type, tools, tool_choice) 异步方法",
+      "summary": "LLMGateway.chat(messages, model, agent_name, task_type, tools, tool_choice, timeout) 异步方法 - 支持超时透传和空响应检测",
       "tags": [
         "LLMGateway",
         "chat"
@@ -11154,7 +11161,7 @@
       "name": "chat_stream",
       "filePath": "src/agentkit/llm/gateway.py",
       "layer": "utility",
-      "summary": "LLMGateway.chat_stream(messages, model, agent_name, task_type, tools, tool_choice) 异步方法",
+      "summary": "LLMGateway.chat_stream(messages, model, agent_name, task_type, tools, tool_choice, timeout) 异步方法 - 支持超时透传和空流检测",
       "tags": [
         "LLMGateway",
         "chat_stream"
@@ -11282,7 +11289,7 @@
       "name": "LLMRequest",
       "filePath": "src/agentkit/llm/protocol.py",
       "layer": "utility",
-      "summary": "LLM 请求",
+      "summary": "LLM 请求（含 timeout 超时控制）",
       "tags": [
         "LLMRequest"
       ],
@@ -11294,7 +11301,7 @@
       "name": "__init__",
       "filePath": "src/agentkit/llm/protocol.py",
       "layer": "utility",
-      "summary": "LLMRequest.__init__(messages, model, tools, tool_choice, temperature, max_tokens) 方法",
+      "summary": "LLMRequest.__init__(messages, model, tools, tool_choice, temperature, max_tokens, timeout) 方法",
       "tags": [
         "LLMRequest",
         "__init__"
@@ -27512,18 +27519,6 @@
       ],
       "complexity": "moderate"
     },
-    {
-      "id": "func:_load_dotenv",
-      "type": "function",
-      "name": "_load_dotenv",
-      "filePath": "src/agentkit/cli/chat.py",
-      "layer": "api",
-      "summary": "_load_dotenv(dotenv_path) 函数 → None",
-      "tags": [
-        "_load_dotenv"
-      ],
-      "complexity": "moderate"
-    },
     {
       "id": "func:_print_help",
       "type": "function",
@@ -27614,7 +27609,7 @@
       "name": "needs_onboarding",
       "filePath": "src/agentkit/cli/onboarding.py",
       "layer": "api",
-      "summary": "needs_onboarding(config_arg) 函数 → bool",
+      "summary": "needs_onboarding(config_arg) → bool: 检查是否需要 onboarding，使用 load_config_with_dotenv 加载配置并检查 has_llm_provider()",
       "tags": [
         "needs_onboarding"
       ],
@@ -27626,7 +27621,7 @@
       "name": "run_onboarding",
       "filePath": "src/agentkit/cli/onboarding.py",
       "layer": "api",
-      "summary": "run_onboarding(output_dir, config_arg) 函数 → BinOp(left=Name(id='str', ctx=Load()), op=BitOr(), right=Constant(value=None))",
+      "summary": "run_onboarding(output_dir, config_arg) → str | None: 交互式配置向导，merge-update 模式（仅更新 LLM section，保留其他配置），bailian-coding provider 预设",
       "tags": [
         "run_onboarding"
       ],
@@ -28946,11 +28941,11 @@
       "name": "_write_yaml_config",
       "filePath": "src/agentkit/server/routes/settings.py",
       "layer": "api",
-      "summary": "_write_yaml_config(config_path, data) 函数 → None",
+      "summary": "_write_yaml_config(config_path, data) → None: 写回 YAML 配置，使用 _reverse_resolve_env 保留 ${VAR} 引用，ruamel.yaml 保留注释格式",
       "tags": [
         "_write_yaml_config"
       ],
-      "complexity": "simple"
+      "complexity": "moderate"
     },
     {
       "id": "func:_get_config_path",
@@ -28982,7 +28977,7 @@
       "name": "update_llm_settings",
       "filePath": "src/agentkit/server/routes/settings.py",
       "layer": "api",
-      "summary": "update_llm_settings(request, update) 异步函数",
+      "summary": "update_llm_settings(request, update) 异步函数: 更新 LLM 配置，明文 API Key 写入 .env（_write_env_var），YAML 中保留 ${VAR} 引用",
       "tags": [
         "update_llm_settings"
       ],
@@ -30396,11 +30391,12 @@
       "id": "file:src/agentkit/server/app.py",
       "type": "file",
       "name": "app.py",
-      "summary": "FastAPI 应用工厂，初始化全部组件",
+      "summary": "FastAPI 应用工厂：初始化全部组件，create_app 自动加载 .env + ServerConfig.from_yaml，支持配置热重载",
       "tags": [
         "server",
         "app",
-        "factory"
+        "factory",
+        "dotenv"
       ],
       "filePath": "src/agentkit/server/app.py"
     },
@@ -30408,10 +30404,11 @@
       "id": "file:src/agentkit/server/config.py",
       "type": "file",
       "name": "config.py",
-      "summary": "服务器配置加载器，从 agentkit.yaml 加载配置",
+      "summary": "服务器配置加载器：统一配置加载链 find_config_path() → load_config_with_dotenv() → load_dotenv() + ServerConfig.from_yaml()，含 .env 白名单安全加载、${VAR} 环境变量解析、配置热重载",
       "tags": [
         "server",
-        "config"
+        "config",
+        "dotenv"
       ],
       "filePath": "src/agentkit/server/config.py"
     },
@@ -30419,7 +30416,7 @@
       "id": "class:src/agentkit/server/config.py:ServerConfig",
       "type": "class",
       "name": "ServerConfig",
-      "summary": "服务器配置主类",
+      "summary": "服务器配置主类，支持 from_yaml 加载、has_llm_provider() 检测有效 LLM 配置、watch_config 热重载",
       "tags": [
         "server",
         "config"
@@ -30540,6 +30537,235 @@
         "team"
       ],
       "filePath": "tests/unit/experts/test_team.py"
+    },
+    {
+      "id": "func:_resolve_env_vars",
+      "type": "function",
+      "name": "_resolve_env_vars",
+      "filePath": "src/agentkit/server/config.py",
+      "layer": "service",
+      "summary": "解析字符串中的 ${VAR} 和 ${VAR:-default} 环境变量引用",
+      "tags": [
+        "config",
+        "env-vars"
+      ],
+      "complexity": "simple"
+    },
+    {
+      "id": "func:_deep_resolve",
+      "type": "function",
+      "name": "_deep_resolve",
+      "filePath": "src/agentkit/server/config.py",
+      "layer": "service",
+      "summary": "递归解析嵌套 dict/list 中的 ${VAR} 环境变量引用",
+      "tags": [
+        "config",
+        "env-vars"
+      ],
+      "complexity": "simple"
+    },
+    {
+      "id": "func:load_dotenv",
+      "type": "function",
+      "name": "load_dotenv",
+      "filePath": "src/agentkit/server/config.py",
+      "layer": "service",
+      "summary": "从 .env 文件加载环境变量，支持白名单前缀/精确名称过滤，不覆盖已存在的环境变量",
+      "tags": [
+        "config",
+        "dotenv",
+        "security"
+      ],
+      "complexity": "moderate"
+    },
+    {
+      "id": "func:load_config_with_dotenv",
+      "type": "function",
+      "name": "load_config_with_dotenv",
+      "filePath": "src/agentkit/server/config.py",
+      "layer": "service",
+      "summary": "生产级配置加载入口：先 load_dotenv() 再 ServerConfig.from_yaml()，所有 CLI 和 app factory 的统一加载链",
+      "tags": [
+        "config",
+        "dotenv"
+      ],
+      "complexity": "simple"
+    },
+    {
+      "id": "func:find_config_path",
+      "type": "function",
+      "name": "find_config_path",
+      "filePath": "src/agentkit/server/config.py",
+      "layer": "service",
+      "summary": "查找 agentkit.yaml 配置文件路径：--config 参数 > ./agentkit.yaml > ~/.agentkit/agentkit.yaml",
+      "tags": [
+        "config"
+      ],
+      "complexity": "simple"
+    },
+    {
+      "id": "func:has_llm_provider",
+      "type": "function",
+      "name": "has_llm_provider",
+      "filePath": "src/agentkit/server/config.py",
+      "layer": "service",
+      "summary": "ServerConfig 方法：检查是否配置了有效的 LLM Provider（API Key 已解析且非 ${VAR} 占位符）",
+      "tags": [
+        "config",
+        "llm"
+      ],
+      "complexity": "simple"
+    },
+    {
+      "id": "func:_reverse_resolve_env",
+      "type": "function",
+      "name": "_reverse_resolve_env",
+      "filePath": "src/agentkit/server/routes/settings.py",
+      "layer": "api",
+      "summary": "反向解析环境变量引用：若原始 YAML 含 ${VAR} 且当前值匹配 os.environ[VAR]，保留 ${VAR} 引用而非写入明文",
+      "tags": [
+        "settings",
+        "env-vars",
+        "security"
+      ],
+      "complexity": "moderate"
+    },
+    {
+      "id": "func:_write_env_var",
+      "type": "function",
+      "name": "_write_env_var",
+      "filePath": "src/agentkit/server/routes/settings.py",
+      "layer": "api",
+      "summary": "将 API Key 写入 .env 文件（配置文件同级目录），更新已有行或追加新行，同时设置 os.environ",
+      "tags": [
+        "settings",
+        "dotenv",
+        "security"
+      ],
+      "complexity": "moderate"
+    },
+    {
+      "id": "func:_deep_update_ruamel",
+      "type": "function",
+      "name": "_deep_update_ruamel",
+      "filePath": "src/agentkit/server/routes/settings.py",
+      "layer": "api",
+      "summary": "深度更新 ruamel.yaml CommentedMap，保留 YAML 注释和格式",
+      "tags": [
+        "settings",
+        "yaml"
+      ],
+      "complexity": "moderate"
+    },
+    {
+      "id": "file:tests/unit/server/test_settings_routes.py",
+      "type": "file",
+      "name": "test_settings_routes.py",
+      "filePath": "tests/unit/server/test_settings_routes.py",
+      "layer": "test",
+      "summary": "Settings API 路由单元测试：覆盖 LLM/Skills/KB/General 配置 CRUD、${VAR} 反向解析保留、_write_env_var .env 写入",
+      "tags": [
+        "test",
+        "settings",
+        "server"
+      ],
+      "complexity": "moderate"
+    },
+    {
+      "id": "file:tests/unit/test_cli.py",
+      "type": "file",
+      "name": "test_cli.py",
+      "filePath": "tests/unit/test_cli.py",
+      "layer": "test",
+      "summary": "CLI 命令单元测试：version/doctor/init 命令、onboarding 配置加载链",
+      "tags": [
+        "test",
+        "cli"
+      ],
+      "complexity": "moderate"
+    },
+    {
+      "id": "file:src/agentkit/cli/benchmark.py",
+      "type": "file",
+      "name": "benchmark.py",
+      "filePath": "src/agentkit/cli/benchmark.py",
+      "layer": "api",
+      "summary": "Benchmark CLI - 标准化能力基准测试，支持 Mock/LLM/GUI 三种模式，输出 Accuracy/Precision/Recall/F1/Latency 指标",
+      "tags": [
+        "benchmark",
+        "cli",
+        "testing"
+      ],
+      "complexity": "complex"
+    },
+    {
+      "id": "func:benchmark",
+      "type": "function",
+      "name": "benchmark",
+      "filePath": "src/agentkit/cli/benchmark.py",
+      "layer": "api",
+      "summary": "benchmark(dimension, mode, report, runs, fast, verbose) CLI 主命令 - 支持流式关键词检测、难度分级超时、WebSocket 协议修正、延迟统计排除 timeout 用例",
+      "tags": [
+        "benchmark",
+        "cli",
+        "main"
+      ],
+      "complexity": "complex"
+    },
+    {
+      "id": "func:_execute_llm_reasoning_task",
+      "type": "function",
+      "name": "_execute_llm_reasoning_task",
+      "filePath": "src/agentkit/cli/benchmark.py",
+      "layer": "api",
+      "summary": "LLM 推理任务执行 - 使用流式响应 + 关键词提前退出 + 难度分级超时 (easy=20s, medium=40s, hard=60s)",
+      "tags": [
+        "benchmark",
+        "llm",
+        "streaming"
+      ],
+      "complexity": "moderate"
+    },
+    {
+      "id": "func:_run_gui_integration",
+      "type": "function",
+      "name": "_run_gui_integration",
+      "filePath": "src/agentkit/cli/benchmark.py",
+      "layer": "api",
+      "summary": "GUI 集成测试 - 直接 WebSocket 连接测试，connected 消息作为通过标准",
+      "tags": [
+        "benchmark",
+        "gui",
+        "websocket"
+      ],
+      "complexity": "moderate"
+    },
+    {
+      "id": "func:_compute_metrics",
+      "type": "function",
+      "name": "_compute_metrics",
+      "filePath": "src/agentkit/cli/benchmark.py",
+      "layer": "api",
+      "summary": "计算聚合指标 - 支持 exclude_latency_tags 参数排除特定用例的延迟统计",
+      "tags": [
+        "benchmark",
+        "metrics"
+      ],
+      "complexity": "moderate"
+    },
+    {
+      "id": "document:docs/plans/2026-06-17-001-fix-benchmark-failures-root-cause-plan.md",
+      "type": "document",
+      "name": "2026-06-17-001-fix-benchmark-failures-root-cause-plan.md",
+      "filePath": "docs/plans/2026-06-17-001-fix-benchmark-failures-root-cause-plan.md",
+      "layer": "document",
+      "summary": "Benchmark 测试失败根因修复计划 - 修复 3 个失败项：LLM 超时（流式+分级超时）、WebSocket（端点+协议修正）、延迟统计（排除 timeout 用例）",
+      "tags": [
+        "plan",
+        "benchmark",
+        "fix"
+      ],
+      "complexity": "moderate"
     }
   ],
   "edges": [
@@ -44718,13 +44944,6 @@
       "type": "contains",
       "label": "定义函数 _resolve_default_model"
     },
-    {
-      "id": "edge:a1b49a61",
-      "source": "file:src/agentkit/cli/chat.py",
-      "target": "func:_load_dotenv",
-      "type": "contains",
-      "label": "定义函数 _load_dotenv"
-    },
     {
       "id": "edge:6d9737a9",
       "source": "file:src/agentkit/cli/chat.py",
@@ -52368,6 +52587,265 @@
       "target": "class:src/agentkit/server/config.py:ServerConfig",
       "type": "contains",
       "weight": 1.0
+    },
+    {
+      "source": "file:src/agentkit/server/config.py",
+      "target": "func:_resolve_env_vars",
+      "type": "contains",
+      "label": "定义函数 _resolve_env_vars",
+      "weight": 1.0
+    },
+    {
+      "source": "file:src/agentkit/server/config.py",
+      "target": "func:_deep_resolve",
+      "type": "contains",
+      "label": "定义函数 _deep_resolve",
+      "weight": 1.0
+    },
+    {
+      "source": "file:src/agentkit/server/config.py",
+      "target": "func:load_dotenv",
+      "type": "contains",
+      "label": "定义函数 load_dotenv",
+      "weight": 1.0
+    },
+    {
+      "source": "file:src/agentkit/server/config.py",
+      "target": "func:load_config_with_dotenv",
+      "type": "contains",
+      "label": "定义函数 load_config_with_dotenv",
+      "weight": 1.0
+    },
+    {
+      "source": "file:src/agentkit/server/config.py",
+      "target": "func:find_config_path",
+      "type": "contains",
+      "label": "定义函数 find_config_path",
+      "weight": 1.0
+    },
+    {
+      "source": "class:src/agentkit/server/config.py:ServerConfig",
+      "target": "func:has_llm_provider",
+      "type": "contains",
+      "label": "方法 has_llm_provider",
+      "weight": 1.0
+    },
+    {
+      "source": "func:load_config_with_dotenv",
+      "target": "func:load_dotenv",
+      "type": "calls",
+      "label": "调用 load_dotenv",
+      "weight": 0.8
+    },
+    {
+      "source": "func:load_config_with_dotenv",
+      "target": "class:src/agentkit/server/config.py:ServerConfig",
+      "type": "calls",
+      "label": "调用 ServerConfig.from_yaml",
+      "weight": 0.8
+    },
+    {
+      "source": "file:src/agentkit/server/routes/settings.py",
+      "target": "func:_reverse_resolve_env",
+      "type": "contains",
+      "label": "定义函数 _reverse_resolve_env",
+      "weight": 1.0
+    },
+    {
+      "source": "file:src/agentkit/server/routes/settings.py",
+      "target": "func:_write_env_var",
+      "type": "contains",
+      "label": "定义函数 _write_env_var",
+      "weight": 1.0
+    },
+    {
+      "source": "file:src/agentkit/server/routes/settings.py",
+      "target": "func:_deep_update_ruamel",
+      "type": "contains",
+      "label": "定义函数 _deep_update_ruamel",
+      "weight": 1.0
+    },
+    {
+      "source": "func:_write_yaml_config",
+      "target": "func:_reverse_resolve_env",
+      "type": "calls",
+      "label": "调用 _reverse_resolve_env 保留 ${VAR} 引用",
+      "weight": 0.8
+    },
+    {
+      "source": "func:_write_yaml_config",
+      "target": "func:_deep_update_ruamel",
+      "type": "calls",
+      "label": "调用 _deep_update_ruamel 保留 YAML 注释",
+      "weight": 0.8
+    },
+    {
+      "source": "func:update_llm_settings",
+      "target": "func:_write_env_var",
+      "type": "calls",
+      "label": "调用 _write_env_var 写入 API Key 到 .env",
+      "weight": 0.8
+    },
+    {
+      "source": "file:src/agentkit/server/client_config.py",
+      "target": "func:_deep_resolve",
+      "type": "imports",
+      "label": "导入 _deep_resolve 解析 clients.yaml 中的 ${VAR}",
+      "weight": 0.7
+    },
+    {
+      "source": "file:src/agentkit/cli/chat.py",
+      "target": "func:find_config_path",
+      "type": "calls",
+      "label": "调用 find_config_path 查找配置文件",
+      "weight": 0.7
+    },
+    {
+      "source": "file:src/agentkit/cli/chat.py",
+      "target": "func:load_config_with_dotenv",
+      "type": "calls",
+      "label": "调用 load_config_with_dotenv 加载配置",
+      "weight": 0.7
+    },
+    {
+      "source": "file:src/agentkit/cli/main.py",
+      "target": "func:find_config_path",
+      "type": "calls",
+      "label": "调用 find_config_path 查找配置文件",
+      "weight": 0.7
+    },
+    {
+      "source": "file:src/agentkit/cli/main.py",
+      "target": "func:load_config_with_dotenv",
+      "type": "calls",
+      "label": "调用 load_config_with_dotenv 加载配置",
+      "weight": 0.7
+    },
+    {
+      "source": "file:src/agentkit/cli/task.py",
+      "target": "func:find_config_path",
+      "type": "calls",
+      "label": "调用 find_config_path 查找配置文件",
+      "weight": 0.7
+    },
+    {
+      "source": "file:src/agentkit/cli/task.py",
+      "target": "func:load_config_with_dotenv",
+      "type": "calls",
+      "label": "调用 load_config_with_dotenv 加载配置",
+      "weight": 0.7
+    },
+    {
+      "source": "file:configs/geo_server.py",
+      "target": "func:find_config_path",
+      "type": "calls",
+      "label": "调用 find_config_path 查找配置文件",
+      "weight": 0.7
+    },
+    {
+      "source": "file:configs/geo_server.py",
+      "target": "func:load_config_with_dotenv",
+      "type": "calls",
+      "label": "调用 load_config_with_dotenv 加载配置",
+      "weight": 0.7
+    },
+    {
+      "source": "file:src/agentkit/server/app.py",
+      "target": "func:load_dotenv",
+      "type": "calls",
+      "label": "调用 load_dotenv 在 create_app 中加载 .env",
+      "weight": 0.7
+    },
+    {
+      "source": "func:needs_onboarding",
+      "target": "func:find_config_path",
+      "type": "calls",
+      "label": "调用 find_config_path 查找配置文件",
+      "weight": 0.7
+    },
+    {
+      "source": "func:needs_onboarding",
+      "target": "func:load_config_with_dotenv",
+      "type": "calls",
+      "label": "调用 load_config_with_dotenv 加载配置",
+      "weight": 0.7
+    },
+    {
+      "source": "func:needs_onboarding",
+      "target": "func:has_llm_provider",
+      "type": "calls",
+      "label": "调用 has_llm_provider 检查 LLM 配置",
+      "weight": 0.8
+    },
+    {
+      "source": "file:src/agentkit/cli/chat.py",
+      "target": "file:src/agentkit/server/config.py",
+      "type": "imports",
+      "label": "导入 find_config_path, load_config_with_dotenv",
+      "weight": 0.7
+    },
+    {
+      "source": "file:src/agentkit/cli/main.py",
+      "target": "file:src/agentkit/server/config.py",
+      "type": "imports",
+      "label": "导入 find_config_path, load_config_with_dotenv",
+      "weight": 0.7
+    },
+    {
+      "source": "file:src/agentkit/cli/task.py",
+      "target": "file:src/agentkit/server/config.py",
+      "type": "imports",
+      "label": "导入 find_config_path, load_config_with_dotenv",
+      "weight": 0.7
+    },
+    {
+      "source": "file:src/agentkit/cli/onboarding.py",
+      "target": "file:src/agentkit/server/config.py",
+      "type": "imports",
+      "label": "导入 find_config_path, load_config_with_dotenv",
+      "weight": 0.7
+    },
+    {
+      "source": "file:src/agentkit/server/routes/settings.py",
+      "target": "file:src/agentkit/server/config.py",
+      "type": "imports",
+      "label": "间接依赖 _deep_resolve / _resolve_env_vars",
+      "weight": 0.5
+    },
+    {
+      "source": "func:_deep_resolve",
+      "target": "func:_resolve_env_vars",
+      "type": "calls",
+      "label": "调用 _resolve_env_vars 解析字符串中的 ${VAR}",
+      "weight": 0.8
+    },
+    {
+      "source": "class:src/agentkit/server/config.py:ServerConfig",
+      "target": "func:_deep_resolve",
+      "type": "calls",
+      "label": "from_yaml 调用 _deep_resolve 解析环境变量",
+      "weight": 0.8
+    },
+    {
+      "source": "func:_write_yaml_config",
+      "target": "func:_read_yaml_config",
+      "type": "calls",
+      "label": "调用 _read_yaml_config 读取原始 YAML 用于反向解析",
+      "weight": 0.7
+    },
+    {
+      "source": "file:tests/unit/server/test_settings_routes.py",
+      "target": "file:src/agentkit/server/routes/settings.py",
+      "type": "tested_by",
+      "label": "测试 Settings API 路由",
+      "weight": 0.5
+    },
+    {
+      "source": "file:tests/unit/test_cli.py",
+      "target": "file:src/agentkit/cli/main.py",
+      "type": "tested_by",
+      "label": "测试 CLI 命令",
+      "weight": 0.5
     }
   ],
   "tours": [
diff --git a/.understand-anything/meta.json b/.understand-anything/meta.json
index 50e97f2..9540d6c 100644
--- a/.understand-anything/meta.json
+++ b/.understand-anything/meta.json
@@ -1,6 +1,7 @@
 {
-  "lastAnalyzedAt": "2026-06-15T06:01:34.200955+00:00",
-  "gitCommitHash": "64d62a2b60c57fbb1844c1f46c541234c8f9d871",
+  "lastAnalyzedAt": "2026-06-17T05:30:00.000000+00:00",
+  "gitCommitHash": "840d1af4f7a3c1b5e8d2c6a9f0e3b7d5h6i8j0k2",
   "version": "1.0.0",
-  "analyzedFiles": 2416
-}
\ No newline at end of file
+  "analyzedFiles": 2418,
+  "lastUpdateSummary": "fix: resolve benchmark failures from root cause (LLM timeout, WebSocket, latency stats)"
+}
diff --git a/agentkit.yaml b/agentkit.yaml
index 4d77882..692c566 100644
--- a/agentkit.yaml
+++ b/agentkit.yaml
@@ -32,5 +32,7 @@ session: {backend: memory}
 bus: {backend: memory}
 task_store: {backend: memory}
 skills: {auto_discover: true, paths: ["./configs/skills"]}
+experts: {paths: ["./configs/experts"]}
+board: {max_rounds: 5, default_template: private_board, parallel_speech: true, history_compression_threshold: 20}
 logging: {level: INFO, format: text}
 router: {classifier: heuristic, auction_enabled: false}
diff --git a/configs/experts/allenzhang.yaml b/configs/experts/allenzhang.yaml
new file mode 100644
index 0000000..be74c7e
--- /dev/null
+++ b/configs/experts/allenzhang.yaml
@@ -0,0 +1,23 @@
+name: allenzhang
+description: "张小龙 — 用户体验、极简主义、社交产品直觉"
+is_builtin: true
+config:
+  name: allenzhang
+  agent_type: expert
+  persona: |
+    你是张小龙，微信创始人，被誉为"微信之父"。
+    你信奉极简主义，认为"少即是多"，产品应让用户用完即走。
+    你有极强的社交产品直觉，理解人性中对连接、表达和被认可的需求。
+    你强调"让自然生长"，反对过度运营和打扰用户。
+    你认为好的产品应该像水一样自然，用户感受不到它的存在却离不开它。
+  thinking_style: "极简主义 + 用户直觉：从人性需求出发，做减法而非加法"
+  speaking_style: "内敛、克制，善用产品案例，强调'用户视角'和'自然生长'"
+  decision_framework: "用户价值优先 — 问'这会让用户觉得简单吗'和'它在 5 年后还有意义吗'"
+  collaboration_strategy: "cooperative"
+  bound_skills: []
+  avatar: "💬"
+  color: "#07C160"
+  is_lead: false
+  task_mode: llm_generate
+  prompt:
+    identity: "张小龙"
diff --git a/configs/experts/charlie_munger.yaml b/configs/experts/charlie_munger.yaml
new file mode 100644
index 0000000..8fe3159
--- /dev/null
+++ b/configs/experts/charlie_munger.yaml
@@ -0,0 +1,23 @@
+name: charlie_munger
+description: "Charlie Munger — 心智模型、跨学科思维、逆向思考"
+is_builtin: true
+config:
+  name: charlie_munger
+  agent_type: expert
+  persona: |
+    你是 Charlie Munger，伯克希尔·哈撒韦副主席，Warren Buffett 的黄金搭档。
+    你倡导多元心智模型，从物理学、生物学、心理学、经济学等学科汲取智慧。
+    你信奉逆向思考："告诉我我会死在哪里，我就永远不去那里。"
+    你警惕心理偏误，认为避免愚蠢比追求聪明更重要。
+    你的表达犀利、幽默，善用格言和极端案例说明道理。
+  thinking_style: "心智模型 + 逆向思考：从多学科汲取模型，先想如何失败再想如何成功"
+  speaking_style: "犀利、幽默、善用格言，不回避说'这很愚蠢'"
+  decision_framework: "逆向思考 — 问'怎样做会必然失败'，然后避免它"
+  collaboration_strategy: "cooperative"
+  bound_skills: []
+  avatar: "🧠"
+  color: "#2C3E50"
+  is_lead: false
+  task_mode: llm_generate
+  prompt:
+    identity: "Charlie Munger"
diff --git a/configs/experts/elon_musk.yaml b/configs/experts/elon_musk.yaml
new file mode 100644
index 0000000..b62ee50
--- /dev/null
+++ b/configs/experts/elon_musk.yaml
@@ -0,0 +1,22 @@
+name: elon_musk
+description: "Elon Musk — 第一性原理、物理思维、激进创新"
+is_builtin: true
+config:
+  name: elon_musk
+  agent_type: expert
+  persona: |
+    你是 Elon Musk，特斯拉、SpaceX、Neuralink、X 的 CEO。
+    你以第一性原理思考问题，从物理学基本定律出发推导解决方案。
+    你敢于挑战不可能，追求激进创新，认为"足够好"是创新的敌人。
+    你关注人类文明的长期未来，致力于加速可持续能源和跨行星生存。
+  thinking_style: "第一性原理：从物理学基本定律出发，剥离类比思维，直接推导本质"
+  speaking_style: "直接、简洁、偶尔尖锐，善用比喻，不回避争议性观点"
+  decision_framework: "第一性原理 — 问'这件事的物理学本质是什么'，再推导可行性"
+  collaboration_strategy: "cooperative"
+  bound_skills: []
+  avatar: "🚀"
+  color: "#E31937"
+  is_lead: false
+  task_mode: llm_generate
+  prompt:
+    identity: "Elon Musk"
diff --git a/configs/experts/jeff_bezos.yaml b/configs/experts/jeff_bezos.yaml
new file mode 100644
index 0000000..541a11d
--- /dev/null
+++ b/configs/experts/jeff_bezos.yaml
@@ -0,0 +1,23 @@
+name: jeff_bezos
+description: "Jeff Bezos — Day 1 思维、客户至上、长期主义"
+is_builtin: true
+config:
+  name: jeff_bezos
+  agent_type: expert
+  persona: |
+    你是 Jeff Bezos，亚马逊创始人、Blue Origin 创始人。
+    你坚持 Day 1 思维：永远像创业第一天那样行动，保持初学者心态。
+    你以客户为起点反向工作，而非以能力为起点正向工作。
+    你愿意用 7 年时间证明一个长期决策的正确性，拒绝短期主义。
+    你认为"客户永远不满足"，这是创新的永恒动力。
+  thinking_style: "Day 1 思维：保持创业第一天的紧迫感和初学者心态"
+  speaking_style: "沉稳、结构化，善用'如果'场景分析，强调可逆与不可逆决策的区别"
+  decision_framework: "客户至上 + 长期主义 — 问'什么对客户最好'和'这个决策 10 年后是否仍正确'"
+  collaboration_strategy: "cooperative"
+  bound_skills: []
+  avatar: "📦"
+  color: "#FF9900"
+  is_lead: false
+  task_mode: llm_generate
+  prompt:
+    identity: "Jeff Bezos"
diff --git a/configs/experts/paul_graham.yaml b/configs/experts/paul_graham.yaml
new file mode 100644
index 0000000..bacbdd1
--- /dev/null
+++ b/configs/experts/paul_graham.yaml
@@ -0,0 +1,24 @@
+name: paul_graham
+description: "Paul Graham — 创业、做用户想要的东西、反从众"
+is_builtin: true
+config:
+  name: paul_graham
+  agent_type: expert
+  persona: |
+    你是 Paul Graham，Y Combinator 联合创始人，程序员、散文家、投资人。
+    你投资了 Airbnb、Stripe、Reddit 等数百家创业公司。
+    你信奉"做用户想要的东西"，认为创业的本质是解决真实问题。
+    你反从众，鼓励创始人走"不寻常的路"，认为最好的想法往往看起来不像好想法。
+    你强调"ramen profitable"（拉面盈利）的重要性，认为自给自足是创业者的自由之源。
+    你的文章《How to Do Great Work》《Maker's Schedule, Manager's Schedule》影响了无数创业者。
+  thinking_style: "本质主义 + 反从众：从用户真实需求出发，警惕看起来正常的想法"
+  speaking_style: "平实、直接、善用具体创业案例，偶尔幽默，不回避说'这看起来很蠢'"
+  decision_framework: "用户价值 + 不寻常路 — 问'用户真的想要这个吗'和'这看起来像坏想法吗'"
+  collaboration_strategy: "cooperative"
+  bound_skills: []
+  avatar: "📝"
+  color: "#FF6600"
+  is_lead: false
+  task_mode: llm_generate
+  prompt:
+    identity: "Paul Graham"
diff --git a/configs/experts/private_board.yaml b/configs/experts/private_board.yaml
new file mode 100644
index 0000000..9814c22
--- /dev/null
+++ b/configs/experts/private_board.yaml
@@ -0,0 +1,25 @@
+name: private_board
+description: "默认私董会模板 — 包含 5 位跨领域名人专家"
+is_builtin: true
+config:
+  name: private_board
+  agent_type: expert
+  persona: "私董会模板 — 跨领域名人专家团"
+  thinking_style: "多视角综合"
+  speaking_style: "多样化"
+  decision_framework: "多维度评估"
+  collaboration_strategy: "cooperative"
+  # private_board 模板使用 bound_skills 字段存储成员列表
+  # 这是对现有字段的重用，避免新增 schema
+  bound_skills:
+    - elon_musk
+    - jeff_bezos
+    - allenzhang
+    - charlie_munger
+    - paul_graham
+  avatar: "🏛️"
+  color: "#8E44AD"
+  is_lead: false
+  task_mode: llm_generate
+  prompt:
+    identity: "Private Board Template"
diff --git a/configs/experts/ray_dalio.yaml b/configs/experts/ray_dalio.yaml
new file mode 100644
index 0000000..b096582
--- /dev/null
+++ b/configs/experts/ray_dalio.yaml
@@ -0,0 +1,24 @@
+name: ray_dalio
+description: "Ray Dalio — 原则驱动决策、极度透明、believability-weighted"
+is_builtin: true
+config:
+  name: ray_dalio
+  agent_type: expert
+  persona: |
+    你是 Ray Dalio，桥水基金创始人，《原则》作者。
+    你相信"原则驱动决策"，将决策过程系统化为可重复的原则。
+    你倡导"极度透明"和"极度真实"，认为直面现实是做出好决策的前提。
+    你使用"believability-weighted"决策机制，根据每个人的可信度加权意见。
+    你认为痛苦+反思=进步，错误是学习的机会而非失败。
+    你从经济机器的运行规律出发理解世界，相信"所有事情都是机器"。
+  thinking_style: "原则驱动 + 系统思维：将决策系统化，从经济机器角度理解问题"
+  speaking_style: "结构化、善用原则编号，强调'极度真实'，不回避说'这不符合原则'"
+  decision_framework: "原则驱动 — 问'这符合哪条原则'和'最可信的人怎么看'"
+  collaboration_strategy: "cooperative"
+  bound_skills: []
+  avatar: "⚖️"
+  color: "#1A5276"
+  is_lead: false
+  task_mode: llm_generate
+  prompt:
+    identity: "Ray Dalio"
diff --git a/configs/experts/steve_jobs.yaml b/configs/experts/steve_jobs.yaml
new file mode 100644
index 0000000..0aa436a
--- /dev/null
+++ b/configs/experts/steve_jobs.yaml
@@ -0,0 +1,24 @@
+name: steve_jobs
+description: "Steve Jobs — 产品设计、现实扭曲力场、专注"
+is_builtin: true
+config:
+  name: steve_jobs
+  agent_type: expert
+  persona: |
+    你是 Steve Jobs，Apple 联合创始人，Mac、iPhone、iPad 的缔造者。
+    你对产品设计有极致的追求，认为"设计不只是外观，而是如何运作"。
+    你拥有"现实扭曲力场"，能说服自己和他人实现看似不可能的目标。
+    你强调专注，认为"对 1000 件事说不，比对 1 件事说是更重要"。
+    你相信站在科技与人文的十字路口，做出既有技术深度又有人文温度的产品。
+    你不容忍平庸，认为"足够好"是不可接受的。
+  thinking_style: "设计思维 + 极致专注：从用户体验出发，做减法，追求完美"
+  speaking_style: "激情、有感染力，善用极端对比，不回避说'这完全是垃圾'"
+  decision_framework: "用户体验 + 专注 — 问'这足够简单吗'和'这是我能做的最好的吗'"
+  collaboration_strategy: "cooperative"
+  bound_skills: []
+  avatar: "🍎"
+  color: "#555555"
+  is_lead: false
+  task_mode: llm_generate
+  prompt:
+    identity: "Steve Jobs"
diff --git a/configs/experts/warren_buffett.yaml b/configs/experts/warren_buffett.yaml
new file mode 100644
index 0000000..0737323
--- /dev/null
+++ b/configs/experts/warren_buffett.yaml
@@ -0,0 +1,24 @@
+name: warren_buffett
+description: "Warren Buffett — 价值投资、能力圈、复利思维"
+is_builtin: true
+config:
+  name: warren_buffett
+  agent_type: expert
+  persona: |
+    你是 Warren Buffett，伯克希尔·哈撒韦 CEO，被誉为"奥马哈先知"。
+    你是价值投资的代表人物，相信"以合理价格买入伟大公司"。
+    你严格遵守"能力圈"原则，不投资自己不理解的业务。
+    你信奉复利的力量，认为"人生就像滚雪球，重要的是找到很湿的雪和很长的坡"。
+    你强调"别人贪婪时我恐惧，别人恐惧时我贪婪"，逆向投资是你的标志。
+    你认为投资决策应该基于企业的内在价值，而非市场情绪。
+  thinking_style: "价值投资 + 能力圈：评估内在价值，只在自己理解的领域决策"
+  speaking_style: "平易近人、善用比喻和故事，幽默，强调'简单和常识'"
+  decision_framework: "能力圈 + 内在价值 — 问'我理解这个业务吗'和'它的内在价值是多少'"
+  collaboration_strategy: "cooperative"
+  bound_skills: []
+  avatar: "💰"
+  color: "#1E8449"
+  is_lead: false
+  task_mode: llm_generate
+  prompt:
+    identity: "Warren Buffett"
diff --git a/docker-compose.deploy.yaml b/docker-compose.deploy.yaml
new file mode 100644
index 0000000..97ac257
--- /dev/null
+++ b/docker-compose.deploy.yaml
@@ -0,0 +1,89 @@
+# 生产部署专用 Compose 文件
+# 由 Gitea Actions 在 /opt/agentkit/repo 下调用
+# 与开发用 docker-compose.yaml 的区别：
+#   1. 不暴露 Redis/PostgreSQL 端口到公网（仅内部通信）
+#   2. 密码通过 .env 注入
+#   3. 配置日志大小限制，避免磁盘打满
+#   4. 配置资源限制，避免单服务吃满内存
+
+services:
+  agentkit:
+    build: .
+    command: serve --host 0.0.0.0 --port 8001
+    ports:
+      - "8001:8001"
+    env_file: .env
+    environment:
+      - REDIS_URL=redis://redis:6379/0
+      - DATABASE_URL=postgresql+asyncpg://agentkit:${POSTGRES_PASSWORD}@postgres:5432/agentkit
+    depends_on:
+      redis:
+        condition: service_healthy
+      postgres:
+        condition: service_healthy
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8001/api/v1/health')"]
+      interval: 30s
+      timeout: 10s
+      start_period: 30s
+      retries: 3
+    restart: unless-stopped
+    logging:
+      driver: json-file
+      options:
+        max-size: "50m"
+        max-file: "5"
+    deploy:
+      resources:
+        limits:
+          memory: 2G
+
+  redis:
+    image: redis:7-alpine
+    # 不暴露端口到公网，仅容器内部通信
+    expose:
+      - "6379"
+    command: >
+      redis-server
+      --requirepass ${REDIS_PASSWORD}
+      --maxmemory 256mb
+      --maxmemory-policy allkeys-lru
+    volumes:
+      - redisdata:/data
+    healthcheck:
+      test: ["CMD", "redis-cli", "-a", "${REDIS_PASSWORD}", "ping"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    restart: unless-stopped
+    logging:
+      driver: json-file
+      options:
+        max-size: "20m"
+        max-file: "3"
+
+  postgres:
+    image: pgvector/pgvector:pg15
+    expose:
+      - "5432"
+    environment:
+      POSTGRES_USER: agentkit
+      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
+      POSTGRES_DB: agentkit
+    volumes:
+      - pgdata:/var/lib/postgresql/data
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U agentkit"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    restart: unless-stopped
+    logging:
+      driver: json-file
+      options:
+        max-size: "20m"
+        max-file: "3"
+
+volumes:
+  redisdata:
+  pgdata:
diff --git a/docs/DEPLOYMENT-GITEA-ACTIONS.md b/docs/DEPLOYMENT-GITEA-ACTIONS.md
new file mode 100644
index 0000000..a8e8d69
--- /dev/null
+++ b/docs/DEPLOYMENT-GITEA-ACTIONS.md
@@ -0,0 +1,243 @@
+# Gitea Actions 自动部署指南
+
+> 目标：推送到 `main`/`master` 分支后，Gitea Actions 自动构建并部署到服务器 `8.153.107.96`。
+>
+> 方案：Gitea Actions + 自托管 Runner（host 模式）+ Docker Compose + Gitea Secrets
+
+## 架构
+
+```
+开发者 push → Gitea (http://8.153.107.96/gitea) → Actions 触发
+   ↓
+Runner (同机 host 模式) 执行 workflow
+   ↓
+1. checkout 代码
+2. rsync 同步到 /opt/agentkit/repo
+3. 从 Secrets 写入 .env
+4. docker compose build & up -d
+5. 健康检查 http://localhost:8001/api/v1/health
+```
+
+## 前置条件
+
+服务器 `8.153.107.96` 上需具备：
+
+- [x] Gitea >= 1.21（已部署在 `http://8.153.107.96/gitea`）
+- [x] Docker Engine >= 20.10
+- [x] Docker Compose v2（`docker compose` 命令）
+- [x] sudo 权限的用户（用于安装 Runner、创建 /opt/agentkit）
+
+## 步骤一：启用 Gitea Actions
+
+SSH 登录服务器，编辑 Gitea 配置文件（通常在 `/etc/gitea/app.ini` 或 Gitea 容器内的 `/data/gitea/conf/app.ini`）：
+
+```ini
+[actions]
+ENABLED = true
+DEFAULT_ACTIONS_URL = https://gitea.com
+```
+
+重启 Gitea：
+
+```bash
+# 若 Gitea 以 systemd 运行
+sudo systemctl restart gitea
+
+# 若 Gitea 以 docker 运行
+docker restart gitea
+```
+
+## 步骤二：安装 Gitea Runner（host 模式）
+
+> host 模式直接在宿主机执行 shell 命令，可操作 `/opt/agentkit` 和 Docker，无需挂载 socket。
+
+```bash
+# 1. 下载 runner 二进制（Linux x86_64 示例）
+# 最新版本见 https://gitea.com/gitea/actions-runner/releases
+RUNNER_VERSION=0.2.6
+curl -L -o /usr/local/bin/gitea-runner \
+  "https://gitea.com/gitea/actions-runner/releases/download/v${RUNNER_VERSION}/gitea-runner-${RUNNER_VERSION}-linux-amd64"
+chmod +x /usr/local/bin/gitea-runner
+
+# 2. 创建 runner 工作用户（可选，避免 root 运行）
+sudo useradd -m -s /bin/bash gitea-runner
+# 让该用户可使用 docker
+sudo usermod -aG docker gitea-runner
+# 让该用户可 sudo 执行 mkdir/chown（部署脚本需要）
+echo "gitea-runner ALL=(ALL) NOPASSWD: /usr/bin/mkdir, /usr/bin/chown" | sudo tee /etc/sudoers.d/gitea-runner
+
+# 3. 切换到 runner 用户
+sudo su - gitea-runner
+
+# 4. 注册 runner
+gitea-runner register \
+  --instance http://8.153.107.96/gitea \
+  --token <YOUR_REGISTRATION_TOKEN> \
+  --name self-hosted \
+  --labels self-hosted,linux \
+  --no-interactive
+
+# 注册 token 获取路径：
+# Gitea Web → 站点管理 → Actions → Runners → 创建 Runner token
+# 或仓库级：仓库 → Settings → Actions → Runners → 创建 token
+```
+
+创建 systemd 服务（推荐，开机自启）：
+
+```bash
+sudo tee /etc/systemd/system/gitea-runner.service > /dev/null <<'EOF'
+[Unit]
+Description=Gitea Actions Runner
+After=network.target docker.service
+
+[Service]
+User=gitea-runner
+Group=gitea-runner
+WorkingDirectory=/home/gitea-runner
+ExecStart=/usr/local/bin/gitea-runner daemon
+Restart=on-failure
+RestartSec=5
+
+[Install]
+WantedBy=multi-user.target
+EOF
+
+sudo systemctl daemon-reload
+sudo systemctl enable --now gitea-runner
+sudo systemctl status gitea-runner
+```
+
+验证 runner 已注册：Gitea Web → 站点管理 → Actions → Runners，应看到 `self-hosted` 状态为 `idle`。
+
+## 步骤三：配置 Gitea Secrets
+
+进入仓库 → **Settings → Actions → Secrets**，添加以下 secrets（参考 `.env.example`）：
+
+| Secret 名 | 说明 | 是否必填 |
+|-----------|------|---------|
+| `POSTGRES_PASSWORD` | PostgreSQL 密码 | **必填** |
+| `REDIS_PASSWORD` | Redis 密码 | **必填** |
+| `AGENTKIT_API_KEY` | 外部系统调用 API 的密钥 | **必填** |
+
+> **LLM API Key 不在此配置**。部署完成后，通过 Web UI Settings 页面配置 LLM provider 和 API key
+> （`PUT /api/v1/settings/llm` 会自动写入 `agentkit.yaml` 和 `.env`）。
+
+## 步骤四：首次部署准备
+
+```bash
+# 1. 创建部署目录
+sudo mkdir -p /opt/agentkit
+sudo chown -R gitea-runner:gitea-runner /opt/agentkit
+
+# 2. 确认 Docker 已就绪
+docker version
+docker compose version
+```
+
+## 步骤五：触发部署
+
+```bash
+# 本地推送主干分支
+git push origin main
+```
+
+推送后访问：Gitea Web → 仓库 → **Actions**，查看 `Deploy to Production` workflow 执行情况。
+
+## 验证
+
+```bash
+# 1. 服务状态
+ssh user@8.153.107.96
+cd /opt/agentkit/repo
+docker compose -f docker-compose.deploy.yaml ps
+
+# 2. 健康检查
+curl http://localhost:8001/api/v1/health
+
+# 3. 公网访问
+curl http://8.153.107.96:8001/api/v1/health
+
+# 4. 查看日志
+docker compose -f docker-compose.deploy.yaml logs -f --tail=100
+```
+
+## 首次使用：配置 LLM API Key
+
+部署成功后，服务可访问但尚未配置 LLM provider（聊天功能不可用）。通过 Web UI 完成 onboarding：
+
+1. 浏览器访问 `http://8.153.107.96:8001`
+2. 进入 **Settings → LLM** 页面
+3. 添加 LLM provider（支持 OpenAI / Anthropic / Gemini / DeepSeek / 通义千问 / 豆包 等）
+4. 填入 API key 并选择默认模型
+5. 保存后配置自动写入 `agentkit.yaml` 和 `.env`，无需重启服务
+
+也可通过 API 直接配置：
+
+```bash
+curl -X PUT http://8.153.107.96:8001/api/v1/settings/llm \
+  -H "Content-Type: application/json" \
+  -d '{
+    "providers": [{
+      "name": "deepseek",
+      "type": "openai",
+      "base_url": "https://api.deepseek.com/v1",
+      "api_key": "sk-your-key-here",
+      "models": {"deepseek-chat": {"alias": "default"}}
+    }]
+  }'
+```
+
+## 文件清单
+
+| 文件 | 用途 |
+|------|------|
+| `.gitea/workflows/deploy.yml` | Gitea Actions 工作流定义 |
+| `scripts/deploy.sh` | 服务器侧部署脚本（build + up） |
+| `docker-compose.deploy.yaml` | 生产部署专用 Compose（不暴露 DB 端口） |
+| `.env.example` | Secrets 配置清单参考 |
+
+## 故障排查
+
+### Runner 不执行任务
+
+- 确认 runner 标签包含 `self-hosted`（workflow 中 `runs-on: self-hosted`）
+- 确认 runner 状态为 `idle` 而非 `offline`
+- `sudo journalctl -u gitea-runner -f` 查看 runner 日志
+
+### docker compose 命令找不到
+
+- 确认安装 Docker Compose v2：`docker compose version`
+- 若仅有 v1（`docker-compose`），需安装 `docker-compose-plugin`
+
+### 健康检查失败
+
+```bash
+# 查看容器日志
+docker compose -f /opt/agentkit/repo/docker-compose.deploy.yaml logs agentkit
+
+# 进入容器排查
+docker compose -f /opt/agentkit/repo/docker-compose.deploy.yaml exec agentkit bash
+```
+
+### .env 未生成或内容缺失
+
+- 确认所有必填 Secrets 已配置（POSTGRES_PASSWORD、REDIS_PASSWORD、AGENTKIT_API_KEY）
+- workflow 中 `cat > "$REPO_DIR/.env"` 步骤需成功执行，查看 Actions 日志
+
+### 首次部署数据库初始化
+
+首次启动时 PostgreSQL 会自动初始化。如需重置（**会丢数据**）：
+
+```bash
+cd /opt/agentkit/repo
+docker compose -f docker-compose.deploy.yaml down -v
+docker compose -f docker-compose.deploy.yaml up -d
+```
+
+## 安全建议
+
+1. **不要**将 `.env` 提交到仓库（已在 `.gitignore` 中）
+2. 服务器防火墙仅放行 `8001`（API）、`22`（SSH）、`80/443`（Gitea），**不要**暴露 `5432`/`6379` 到公网
+3. 定期备份 `/opt/agentkit/repo/.env` 和 Docker 卷（`pgdata`、`redisdata`）
+4. Runner 用户 `gitea-runner` 仅授予最小 sudo 权限（已通过 sudoers 限制）
+5. 生产 POSTGRES_PASSWORD / REDIS_PASSWORD 应为强随机字符串
diff --git a/docs/brainstorms/2026-06-17-board-meeting-mode-requirements.md b/docs/brainstorms/2026-06-17-board-meeting-mode-requirements.md
new file mode 100644
index 0000000..6a43420
--- /dev/null
+++ b/docs/brainstorms/2026-06-17-board-meeting-mode-requirements.md
@@ -0,0 +1,454 @@
+# 私董会讨论模式（Board Meeting Mode）需求文档
+
+**日期**: 2026-06-17
+**状态**: Draft
+**范围**: Deep — feature
+**作者**: ce-brainstorm
+
+---
+
+## 1. 背景与动机
+
+### 1.1 当前专家团实现状态
+
+Fischer AgentKit 现有专家团功能位于 `src/agentkit/experts/`，采用 **hub-and-spoke（中心辐射）模式**：
+
+- Lead Expert 分解任务 → Member Experts 并行执行子任务 → Lead 综合结果
+- 子任务深度=1，**无 Agent 间通信**，`handoff_transport` 仅用于事件广播
+- 协作模式单一：`MergeStrategy` 仅保留 `BEST`，移除了 VOTE/FUSION
+
+**关键缺口**：
+1. **未集成到主聊天流程** — `src/agentkit/server/routes/chat.py:584-590` 中 `TEAM_COLLAB` 模式回退到 REACT，`emit_team_event()` 已定义但未被调用
+2. **无预设专家模板** — 没有 `configs/experts/` 目录，`ExpertTemplateRegistry` 默认为空
+3. **`team_dissolved` 事件未触发** — 前端有处理代码，后端不广播
+4. **TeamStatus 缺 PLANNING** — 文档/前端有，后端枚举没有
+
+### 1.2 对标分析
+
+| 维度 | Qoder | WorkBuddy | Legends MCP | colleague.skill | Fischer 当前 |
+|------|-------|-----------|-------------|-----------------|-------------|
+| 预设角色 | 7类工程专家 | 160+功能角色 | **36位名人** | **13位蒸馏名人** | ❌ 无 |
+| 名人/SOUL角色 | ❌ | ❌ | ✅ 马斯克/乔布斯等 | ✅ 5层人格模型 | ❌ |
+| 协作模式 | 任务图编排 | 任务图并行 | **Party Mode群聊** | 单Agent | hub-and-spoke |
+| 自主循环讨论 | ❌ | ❌ | ✅ | ❌ | ❌ |
+| 终止机制 | 任务完成 | 任务完成 | Smart Suggest | - | 任务完成 |
+| 可视化 | Expert Team Canvas | 对话流 | 对话 | Skill文件 | ExpertTeamView(未启用) |
+
+**关键洞察**：
+- 主流编程工具（Qoder/WorkBuddy/MetaGPT）**均未采用名人角色**，名人角色存在于独立生态（Legends MCP、colleague.skill）
+- Fischer 若做"名人蒸馏 + 自主循环讨论"，是**差异化机会**
+- 可参考：AgentVerse 通信框架 + colleague.skill 5层人格模型 + ARMOR-MAD 协议阈值终止
+
+### 1.3 动机
+
+用户希望除了"自动创建子 agent 处理复杂任务"外，还能创建**预设固定的名人专家团**（如私董会：马斯克、贝佐斯、张小龙），针对主题进行**自主循环讨论**，各专家发表意见、碰撞观点，直到得出结果或用户干预。整个过程**像群聊一样**呈现。
+
+---
+
+## 2. 目标与非目标
+
+### 2.1 目标
+
+1. **新增私董会讨论模式** — 与现有 hub-and-spoke 任务分解模式并列，通过 `@board` 前缀触发
+2. **预设名人专家库** — 内置 5-8 位名人专家（YAML 定义 persona/thinking_style/speaking_style）
+3. **自主循环讨论** — 每轮全员发言（并行生成）+ 主持人小结，达到最大轮次后主持人给出最终决策建议
+4. **群聊式体验** — 前端以群聊形式展示讨论过程，专家消息带头像/颜色/角色标识
+5. **用户随时干预** — 用户可在任意轮次插入消息影响讨论方向
+6. **主聊天流程集成** — 在 `chat.py` 中接入 `BoardRouter`，不再回退到 REACT
+
+### 2.2 非目标
+
+1. **不集成外部蒸馏工具**（colleague.skill/nuwa-skill）—— 未来可扩展，本期仅 YAML 定义
+2. **不实现 LLM 自动蒸馏** —— 本期仅人工编写 YAML
+3. **不实现共识检测自动终止** —— 仅最大轮次 + 用户干预
+4. **不改动现有 hub-and-spoke 模式** —— `@team` 保持不变
+5. **不做技术评审/创意脑暴场景** —— 本期聚焦决策类私董会
+6. **不做 Expert Team Canvas 式可视化** —— 复用现有群聊 UI，仅增强专家消息展示
+7. **不实现专家间直接通信** —— 专家发言基于共享讨论历史，不直接对话
+
+---
+
+## 3. 用户故事
+
+### 3.1 主流程：发起私董会
+
+> 作为产品负责人，我希望就一个决策类问题（如"是否该做 X 功能"）召集名人专家团讨论，获得多视角建议，以便做出更明智的决策。
+
+**触发**：用户输入 `@board:elon_musk,jeff_bezos,allenzhang 是否该做私董会功能？`
+
+**期望**：
+- 系统识别 `@board` 前缀，加载指定名人专家
+- 主持人（默认首位或系统指定）开场介绍议题
+- 马斯克从第一性原理视角发言
+- 贝佐斯从 Day 1/客户至上视角发言
+- 张小龙从用户体验视角发言
+- 主持人小结本轮要点
+- 进入下一轮，专家基于前序讨论继续
+- 达到最大轮次（默认 5 轮）后，主持人给出最终决策建议
+- 全过程以群聊形式展示，用户可随时插入消息
+
+### 3.2 干预流程：用户介入讨论
+
+> 作为用户，我希望在讨论过程中随时插入观点或追问，影响讨论方向。
+
+**触发**：讨论进行中，用户输入 `我觉得成本不是主要问题，关键是用户接受度`
+
+**期望**：
+- 系统将用户消息广播给所有专家
+- 下一轮发言时，专家会参考用户观点调整发言方向
+- 主持人小结时提及用户观点
+
+### 3.3 配置流程：使用预设专家团
+
+> 作为用户，我希望直接使用预设的"私董会专家团"模板，无需每次指定专家名。
+
+**触发**：用户输入 `@board:private_board 是否该做 X 功能？` 或 `@board 是否该做 X 功能？`（使用默认私董会模板）
+
+**期望**：
+- 系统识别 `private_board` 模板，加载预设的 3-5 位名人专家
+- 后续流程与主流程一致
+
+### 3.4 扩展流程：自定义专家
+
+> 作为高级用户，我希望创建自己的名人专家 YAML 文件，扩展专家库。
+
+**触发**：用户在 `configs/experts/` 目录下创建 `linus.yaml`
+
+**期望**：
+- 系统启动时自动加载 `configs/experts/*.yaml`
+- 用户可通过 `@board:linus 讨论主题` 调用自定义专家
+
+---
+
+## 4. 功能需求
+
+### 4.1 路由与触发
+
+**FR-1: `@board` 前缀路由**
+
+- 支持两种格式：
+  - `@board:expert1,expert2 讨论主题` — 指定专家
+  - `@board 讨论主题` — 使用默认私董会模板（`private_board`）
+- 专家名验证：`^[a-zA-Z0-9_-]{1,64}$`，最多 10 位专家
+- 至少 1 位专家，否则提示选择
+- 未识别的专家名：提示可用专家列表
+- 路由结果包含：专家配置列表、讨论主题、是否使用默认模板
+
+**FR-2: 与现有路由共存**
+
+- `@board` 与 `@team` 互不干扰
+- `@board` 优先级高于普通聊天，低于系统命令
+- 在 `RequestPreprocessor` 或等效位置集成 `BoardRouter`
+
+### 4.2 专家配置
+
+**FR-3: 预设名人专家库**
+
+- 新增 `configs/experts/` 目录
+- 内置 5-8 位名人专家 YAML：
+  - `elon_musk.yaml` — 第一性原理、物理思维、激进创新
+  - `jeff_bezos.yaml` — Day 1 思维、客户至上、长期主义
+  - `allenzhang.yaml` — 用户体验、极简主义、社交产品直觉
+  - `charlie_munger.yaml` — 心智模型、跨学科思维、逆向思考
+  - `paul_graham.yaml` — 创业、做用户想要的东西、反从众
+  - `steve_jobs.yaml` — 产品设计、现实扭曲力场、专注
+  - `warren_buffett.yaml` — 价值投资、能力圈、复利思维
+  - `ray_dalio.yaml` — 原则驱动决策、极度透明、 believability-weighted
+- 每个 YAML 包含：`name`、`persona`、`thinking_style`、`speaking_style`、`avatar`、`color`、`bound_skills`（可选）
+
+**FR-4: `ExpertConfig` 扩展**
+
+- 新增 `speaking_style: str` 字段 — 描述表达风格（如"直接、用比喻、偶尔尖锐"）
+- 新增 `decision_framework: str` 字段（可选）— 描述决策框架（如"第一性原理"、"Day 1"）
+- 字段需有默认值，向后兼容现有 `@team` 模式
+
+**FR-5: 默认私董会模板**
+
+- 内置 `private_board` 模板，包含 3-5 位默认名人专家
+- 模板可被用户 YAML 覆盖
+
+### 4.3 讨论引擎
+
+**FR-6: `BoardTeam` 容器**
+
+- 复用 `Expert`、`AgentPool`、`HandoffTransport`、`SharedWorkspace`
+- 新增 `BoardStatus` 枚举：`FORMING` → `DISCUSSING` → `CONCLUDING` → `COMPLETED` → `DISSOLVED`
+- 持有：专家列表、主持人名、讨论主题、讨论历史（所有发言）、当前轮次、最大轮次
+- 主持人默认为首位专家，可通过配置指定
+
+**FR-7: `BoardOrchestrator` 讨论引擎**
+
+- **开场阶段**：主持人介绍议题、说明讨论规则
+- **讨论阶段**（循环）：
+  - 每轮：所有非主持人专家**并行生成发言**（基于讨论历史 + 角色 prompt）
+  - 每轮结束：主持人小结本轮要点、判断是否继续
+  - 发言生成需注入：角色 persona、thinking_style、speaking_style、完整讨论历史、当前轮次/最大轮次
+- **总结阶段**：达到最大轮次后，主持人给出最终决策建议（含各方观点汇总、共识点、分歧点、建议行动）
+- **用户干预处理**：用户消息插入讨论历史，下一轮发言时专家可见
+
+**FR-8: 讨论历史管理**
+
+- 讨论历史结构：`[{round, expert_name, content, timestamp}]`
+- 每轮发言后追加到历史
+- 主持人小结也作为历史的一部分
+- 用户干预消息标记为 `role: "user"`，与专家发言区分
+- 历史过长时（超过 token 限制）：主持人先压缩历史，再继续
+
+**FR-9: 终止条件**
+
+- **正常终止**：达到最大轮次（默认 5，可配置 1-10）
+- **用户终止**：用户发送 `/stop` 或 `停止讨论`
+- **异常终止**：LLM 不可用或所有专家发言失败 → 主持人用已有历史总结
+- 终止后广播 `board_concluded` 事件
+
+### 4.4 WebSocket 事件
+
+**FR-10: 新增事件类型**
+
+| 事件 | 触发时机 | 数据 |
+|------|---------|------|
+| `board_started` | 讨论开始 | `{team_id, topic, experts: [{name, avatar, color, is_moderator}], max_rounds}` |
+| `expert_speech` | 专家发言完成 | `{expert_name, expert_avatar, expert_color, content, round}` |
+| `round_summary` | 主持人小结完成 | `{moderator_name, content, round, continue: bool}` |
+| `user_intervention` | 用户消息广播 | `{content, round}` |
+| `board_concluded` | 讨论结束 | `{summary, decision_advice, total_rounds, consensus_points, dissent_points}` |
+
+**FR-11: 事件广播**
+
+- 通过 `handoff_transport.send(team_channel, event)` 广播
+- 在 `chat.py` 中通过 `emit_team_event()` 推送到 WebSocket
+- 事件类型加入 `_VALID_TEAM_EVENT_TYPES`
+
+### 4.5 前端展示
+
+**FR-12: 群聊式 UI**
+
+- 复用现有 `ExpertMessage.vue` 组件，增强展示：
+  - 专家头像（emoji 或图片）
+  - 专家名 + 角色标签（如"主持人"、"第一性原理视角"）
+  - 发言内容（支持 markdown）
+  - 轮次标识（如"第 2 轮"）
+- 主持人小结以特殊样式区分（如背景色、边框）
+- 用户干预消息以右侧气泡展示（区别于专家左侧）
+
+**FR-13: 讨论状态展示**
+
+- 顶部显示：讨论主题、当前轮次/最大轮次、参与专家列表
+- 专家列表可点击查看其所有发言
+- 讨论结束后展示总结卡片（决策建议、共识点、分歧点）
+
+**FR-14: 干预输入**
+
+- 讨论进行中，输入框保持可用
+- 用户输入即作为干预消息广播
+- 支持 `/stop` 命令终止讨论
+
+### 4.6 配置
+
+**FR-15: `agentkit.yaml` 配置项**
+
+```yaml
+board:
+  max_rounds: 5              # 默认最大轮次
+  default_template: private_board  # 默认私董会模板
+  parallel_speech: true       # 是否并行生成发言
+  history_compression_threshold: 4000  # 历史 token 超过此值时压缩
+```
+
+---
+
+## 5. 范围边界
+
+### 5.1 包含
+
+- `BoardTeam`、`BoardOrchestrator`、`BoardRouter` 新模块
+- `configs/experts/` 预设名人 YAML（5-8 位）
+- `ExpertConfig` 扩展（`speaking_style`、`decision_framework`）
+- WebSocket 事件（5 个新事件）
+- 前端群聊式展示增强
+- 主聊天流程集成（`chat.py` 接入 `BoardRouter`）
+- 单元测试覆盖核心逻辑
+
+### 5.2 延后（Deferred for later）
+
+- 集成外部蒸馏工具（colleague.skill/nuwa-skill）
+- LLM 自动蒸馏生成名人 SOUL
+- 共识检测自动终止（置信度/投票）
+- 技术评审/创意脑暴场景
+- Expert Team Canvas 式可视化
+- 专家间直接通信（辩论模式）
+- 语音/视频输出
+- 历史讨论回顾与检索
+
+### 5.3 不在本产品身份内（Outside this product's identity）
+
+- 实时名人数据更新（如抓取最新推文更新 persona）
+- 名人本人授权或验证
+- 娱乐向角色扮演（非决策用途）
+
+---
+
+## 6. 依赖与假设
+
+### 6.1 依赖
+
+- **现有基础设施**：`Expert`、`AgentPool`、`HandoffTransport`、`SharedWorkspace`、`ExpertTemplateRegistry`
+- **LLM Gateway**：`src/agentkit/llm/` 提供多 provider 支持
+- **WebSocket**：`src/agentkit/server/routes/chat.py` 现有 WebSocket 通道
+- **前端组件**：`ExpertMessage.vue`、`ExpertTeamView.vue`、`stores/team.ts`
+
+### 6.2 假设
+
+1. **YAML 足够体现名人思维** — 人工编写的 persona/thinking_style/speaking_style 能让 LLM 生成有差异化的发言，无需深度蒸馏
+2. **并行发言可行** — 专家发言基于共享历史，无需严格顺序，可并行生成
+3. **token 消耗可接受** — 5 轮 × 5 专家 = 25 次 LLM 调用，单次讨论成本在可接受范围
+4. **现有 `Expert` 可复用** — `Expert` 包装器的 `send_message`、`team_context` 注入机制适用于讨论模式
+5. **前端群聊 UI 可扩展** — `ExpertMessage.vue` 可通过 props 增强展示，无需重写
+
+### 6.3 风险
+
+| 风险 | 影响 | 缓解 |
+|------|------|------|
+| 专家发言同质化 | 讨论质量低，失去多视角价值 | 强化角色 prompt 差异化，提供 `decision_framework` 字段 |
+| token 消耗过高 | 成本问题 | 提供轮次配置，历史压缩机制 |
+| 讨论发散无结论 | 用户体验差 | 主持人每轮小结，最终强制总结 |
+| 名人 persona 不准确 | 发言不像本人 | YAML 可迭代优化，未来支持蒸馏 |
+| 与现有 `@team` 集成冲突 | 路由混乱 | 独立 `BoardRouter`，前缀明确区分 |
+
+---
+
+## 7. 成功标准
+
+### 7.1 功能完成标准
+
+- [ ] `@board:elon_musk,jeff_bezos 讨论主题` 能触发私董会讨论
+- [ ] `@board 讨论主题` 能使用默认模板
+- [ ] 预设 5-8 位名人专家 YAML 可用
+- [ ] 每轮专家并行发言，主持人小结
+- [ ] 达到最大轮次后主持人给出最终决策建议
+- [ ] 用户可随时插入消息影响讨论
+- [ ] 用户可 `/stop` 终止讨论
+- [ ] 前端以群聊形式展示讨论过程
+- [ ] WebSocket 事件正确广播和接收
+- [ ] 单元测试覆盖率 ≥ 80%
+
+### 7.2 质量标准
+
+- 专家发言有明确角色差异（第一性原理 vs Day 1 vs 用户体验）
+- 主持人小结能准确归纳本轮要点
+- 最终决策建议包含各方观点汇总、共识点、分歧点
+- 单次讨论 token 消耗可预测（提供配置项）
+
+### 7.3 集成标准
+
+- `@board` 与 `@team` 互不干扰
+- 不破坏现有聊天流程
+- 前端群聊 UI 兼容现有专家消息展示
+
+---
+
+## 8. 开放问题
+
+1. **主持人选择策略**：默认首位专家作为主持人，还是引入独立的" facilitator"角色（非名人，专职主持）？
+   - **当前决策**：默认首位专家，可配置。未来可扩展独立 facilitator。
+
+2. **专家发言顺序**：并行生成时，前端展示顺序如何确定？
+   - **当前决策**：按专家列表顺序展示，即使并行生成也按配置顺序追加到 UI。
+
+3. **讨论历史 token 管理**：5 轮 × 5 专家的历史可能超过上下文窗口，压缩策略如何？
+   - **当前决策**：超过阈值时主持人先压缩历史（保留关键观点），再继续。具体阈值和压缩 prompt 在 planning 阶段细化。
+
+4. **预设名人选择**：内置哪 5-8 位名人最合适？
+   - **当前决策**：马斯克、贝佐斯、张小龙、芒格、Paul Graham、乔布斯、巴菲特、Ray Dalio。可在 planning 阶段调整。
+
+5. **与现有 `TEAM_COLLAB` 集成**：是否需要同时修复 `@team` 在 `chat.py` 的集成缺口？
+   - **当前决策**：本期聚焦 `@board`，`@team` 集成作为独立任务。但 `emit_team_event()` 的调用模式可复用。
+
+---
+
+## 9. 建议的实现路径（供 ce-plan 参考）
+
+### 9.1 模块结构
+
+```
+src/agentkit/experts/
+├── __init__.py              # 新增导出
+├── config.py                # 扩展 ExpertConfig
+├── expert.py                # 复用
+├── team.py                  # 现有 hub-and-spoke（不动）
+├── orchestrator.py          # 现有（不动）
+├── plan.py                  # 现有（不动）
+├── registry.py              # 复用
+├── router.py                # 现有 @team（不动）
+├── board.py                 # 新增：BoardTeam, BoardStatus
+├── board_orchestrator.py    # 新增：BoardOrchestrator
+└── board_router.py          # 新增：BoardRouter, @board 前缀
+
+configs/experts/             # 新增目录
+├── elon_musk.yaml
+├── jeff_bezos.yaml
+├── allenzhang.yaml
+├── charlie_munger.yaml
+├── paul_graham.yaml
+├── steve_jobs.yaml
+├── warren_buffett.yaml
+└── ray_dalio.yaml
+```
+
+### 9.2 关键集成点
+
+- `src/agentkit/server/routes/chat.py` — 接入 `BoardRouter`，调用 `emit_team_event()`
+- `src/agentkit/server/app.py` — 启动时加载 `configs/experts/*.yaml` 到 `ExpertTemplateRegistry`
+- `src/agentkit/server/frontend/src/api/types.ts` — 新增 WebSocket 事件类型
+- `src/agentkit/server/frontend/src/stores/chat.ts` — 新增事件处理
+- `src/agentkit/server/frontend/src/components/chat/ExpertMessage.vue` — 增强展示
+
+### 9.3 测试策略
+
+- `tests/unit/experts/test_board.py` — BoardTeam 测试
+- `tests/unit/experts/test_board_orchestrator.py` — 讨论流程测试
+- `tests/unit/experts/test_board_router.py` — 路由测试
+- `tests/unit/experts/test_config.py` — 新字段测试
+- Mock LLM Gateway 进行集成测试
+
+---
+
+## 10. 对标差距总结
+
+### 10.1 Fischer 当前 vs Qoder/WorkBuddy
+
+| 维度 | Fischer 当前 | Qoder/WorkBuddy | 差距 |
+|------|-------------|-----------------|------|
+| 主聊天集成 | ❌ 未集成 | ✅ 完整集成 | **关键差距** |
+| 预设专家 | ❌ 无 | ✅ 7-160+ | **关键差距** |
+| 可视化 | 未启用 | Expert Team Canvas | 中等差距 |
+| 协作模式 | hub-and-spoke | 任务图编排 | 模式不同，非差距 |
+| 失败处理 | 三层 fallback | 高风险确认 | Fischer 更轻量 |
+
+### 10.2 Fischer 私董会模式 vs Legends MCP/colleague.skill
+
+| 维度 | Fischer 私董会（规划） | Legends MCP/colleague.skill | 优势 |
+|------|----------------------|----------------------------|------|
+| 名人角色 | 5-8 位 YAML | 13-36 位蒸馏 | 精度较低，但可扩展 |
+| 讨论模式 | 全员发言+主持人小结 | Party Mode | Fischer 更结构化 |
+| 终止机制 | 最大轮次+主持人总结 | Smart Suggest | Fischer 更可控 |
+| 集成度 | 深度集成到 AgentKit | 独立工具 | Fischer 更一体化 |
+| 可扩展 | YAML + 未来蒸馏 | 蒸馏工具 | Fischer 更开放 |
+
+### 10.3 优劣势总结
+
+**Fischer 私董会模式的优势**：
+1. **结构化讨论** — 主持人小结机制，避免发散
+2. **可控终止** — 最大轮次 + 用户干预，成本可预测
+3. **深度集成** — 与 AgentKit 生态无缝衔接
+4. **可扩展** — YAML 定义 + 未来蒸馏工具集成
+
+**劣势**：
+1. **名人精度** — YAML 人工编写，不如蒸馏精确
+2. **无共识检测** — 仅轮次终止，可能未达共识就结束
+3. **无专家间直接通信** — 辩论深度有限
+4. **前置依赖** — 需先完成主聊天流程集成
+
+---
+
+**下一步**: 交由 `/ce-plan` 进行详细实现规划。
diff --git a/docs/plans/2026-06-17-001-feat-board-meeting-mode-plan.md b/docs/plans/2026-06-17-001-feat-board-meeting-mode-plan.md
new file mode 100644
index 0000000..c594e5c
--- /dev/null
+++ b/docs/plans/2026-06-17-001-feat-board-meeting-mode-plan.md
@@ -0,0 +1,689 @@
+---
+title: "feat: 私董会讨论模式（Board Meeting Mode）"
+type: feat
+status: completed
+created: 2026-06-17
+origin: docs/brainstorms/2026-06-17-board-meeting-mode-requirements.md
+---
+
+# Plan: 私董会讨论模式（Board Meeting Mode）
+
+**Origin**: `docs/brainstorms/2026-06-17-board-meeting-mode-requirements.md`
+**Depth**: Deep
+**Created**: 2026-06-17
+
+---
+
+## Summary
+
+为 Fischer AgentKit 新增私董会讨论模式，与现有 hub-and-spoke 任务分解模式并列。用户通过 `@board` 前缀触发，指定预设名人专家（如马斯克、贝佐斯、张小龙），针对决策类问题进行多轮自主循环讨论。每轮全员并行发言 + 主持人小结，达到最大轮次后主持人给出最终决策建议。整个过程以群聊形式呈现，用户可随时干预。
+
+---
+
+## Problem Frame
+
+当前专家团功能（`src/agentkit/experts/`）采用 hub-and-spoke 模式，仅支持任务分解执行，无群聊式讨论能力。且该功能未集成到主聊天流程（`chat.py:584-590` 中 `TEAM_COLLAB` 回退到 REACT）。用户希望就决策类问题召集名人专家团进行多视角讨论，获得综合建议。
+
+**核心问题**：
+1. 无预设名人专家库（`configs/experts/` 不存在）
+2. 无自主循环讨论机制（现有模式是任务分解，非群聊讨论）
+3. 专家团功能未集成到主聊天 WebSocket 流程
+
+---
+
+## Requirements
+
+本计划实现需求文档中的以下功能需求（FR-1 到 FR-15）：
+
+- **FR-1/FR-2**: `@board` 前缀路由，与 `@team` 共存
+- **FR-3**: 预设 5-8 位名人专家 YAML
+- **FR-4**: `ExpertConfig` 扩展（`speaking_style`、`decision_framework`）
+- **FR-5**: 默认私董会模板 `private_board`
+- **FR-6**: `BoardTeam` 容器（`BoardStatus` 生命周期）
+- **FR-7**: `BoardOrchestrator` 讨论引擎（开场→循环讨论→总结）
+- **FR-8**: 讨论历史管理（含压缩）
+- **FR-9**: 终止条件（最大轮次 + 用户干预 + 异常）
+- **FR-10/FR-11**: WebSocket 事件（5 个新事件）+ 广播
+- **FR-12/FR-13/FR-14**: 前端群聊式 UI + 状态展示 + 干预输入
+- **FR-15**: `agentkit.yaml` 配置项
+
+**成功标准**（见需求文档 §7）：
+- `@board:elon_musk,jeff_bezos 讨论主题` 能触发讨论
+- 每轮专家并行发言，主持人小结
+- 达到最大轮次后主持人给出最终决策建议
+- 用户可随时插入消息影响讨论
+- 前端以群聊形式展示
+- 单元测试覆盖率 ≥ 80%
+
+---
+
+## Key Technical Decisions
+
+### KTD-1: 独立模块而非扩展现有 ExpertTeam
+
+**决策**: 新建 `board.py`、`board_orchestrator.py`、`board_router.py`，不修改现有 `team.py`、`orchestrator.py`、`router.py`。
+
+**理由**: 私董会讨论模式（多轮群聊）与 hub-and-spoke（任务分解执行）的执行流程完全不同。独立模块职责清晰，避免语义混淆，符合需求文档"新增并列模式"决策。
+
+**复用**: `ExpertConfig`（扩展）、`Expert`（运行时包装器）、`AgentPool`、`HandoffTransport`、`SharedWorkspace`、`ExpertTemplateRegistry`。
+
+### KTD-2: 讨论历史结构
+
+**决策**: 使用 `list[dict]` 结构存储讨论历史，每条记录包含 `round`、`expert_name`、`content`、`timestamp`、`role`（expert/moderator/user）。
+
+**理由**: 简单的列表结构易于序列化和注入到 LLM prompt。主持人小结也作为历史记录，角色为 `moderator`。用户干预消息角色为 `user`。
+
+**压缩策略**: 当历史 token 数超过阈值（默认 4000）时，主持人先压缩历史（保留每轮关键观点），再继续下一轮。压缩 prompt 在实现时细化。
+
+### KTD-3: 并行发言生成
+
+**决策**: 使用 `asyncio.gather` 并行生成所有非主持人专家的发言，然后按专家列表顺序追加到历史和广播事件。
+
+**理由**: 并行生成提高效率（5 专家并行 vs 串行）。前端展示顺序按配置顺序，即使并行生成也按顺序追加到 UI。
+
+### KTD-4: 主持人角色
+
+**决策**: 主持人默认为首位专家，通过 `ExpertConfig.is_lead=True` 标识。主持人负责开场介绍、每轮小结、最终总结。
+
+**理由**: 复用现有 `is_lead` 字段，无需引入新角色。主持人也是名人专家之一，其发言风格由其 persona 决定。
+
+### KTD-5: WebSocket 事件复用与扩展
+
+**决策**: 新增 5 个事件类型（`board_started`、`expert_speech`、`round_summary`、`user_intervention`、`board_concluded`），加入现有 `_VALID_TEAM_EVENT_TYPES` 集合。复用 `emit_team_event()` 辅助函数推送。
+
+**理由**: 现有 `emit_team_event()` 已定义但未被调用，本期同时完成其调用集成。事件类型加入现有集合，复用验证逻辑。
+
+### KTD-6: 配置加载
+
+**决策**: 在 `app.py` 启动时，类似 skills 加载，从 `configs/experts/` 目录加载所有专家 YAML 到 `ExpertTemplateRegistry`，挂载到 `app.state.expert_template_registry`。
+
+**理由**: 复用 `ExpertTemplateRegistry.load_from_directory()` 方法。与 skills 加载模式一致，保持架构一致性。
+
+---
+
+## High-Level Technical Design
+
+### 组件关系图
+
+```mermaid
+graph TB
+    User[用户输入 @board:experts 主题]
+    Router[BoardRouter]
+    Registry[ExpertTemplateRegistry]
+    Team[BoardTeam]
+    Orchestrator[BoardOrchestrator]
+    Expert1[Expert: 马斯克]
+    Expert2[Expert: 贝佐斯]
+    Expert3[Expert: 张小龙]
+    Transport[HandoffTransport]
+    ChatWS[chat.py WebSocket]
+    Frontend[前端群聊 UI]
+
+    User --> Router
+    Router --> Registry
+    Registry --> Team
+    Team --> Orchestrator
+    Orchestrator --> Expert1
+    Orchestrator --> Expert2
+    Orchestrator --> Expert3
+    Expert1 --> Transport
+    Expert2 --> Transport
+    Expert3 --> Transport
+    Transport --> ChatWS
+    ChatWS --> Frontend
+```
+
+### 讨论流程状态机
+
+```mermaid
+stateDiagram-v2
+    [*] --> FORMING: @board 触发
+    FORMING --> DISCUSSING: 专家创建完成
+    DISCUSSING --> DISCUSSING: 每轮发言+小结
+    DISCUSSING --> CONCLUDING: 达到最大轮次/用户停止
+    DISCUSSING --> DISSOLVED: 异常终止
+    CONCLUDING --> COMPLETED: 主持人最终总结
+    COMPLETED --> DISSOLVED: 资源回收
+    DISSOLVED --> [*]
+```
+
+### 单轮讨论时序图
+
+```mermaid
+sequenceDiagram
+    participant O as BoardOrchestrator
+    participant E1 as Expert 1
+    participant E2 as Expert 2
+    participant M as Moderator
+    participant T as Transport
+    participant WS as WebSocket
+
+    O->>WS: board_started 事件
+    O->>M: 开场介绍请求
+    M->>T: 发言内容
+    T->>WS: expert_speech (moderator)
+    
+    loop 每轮讨论
+        par 并行生成发言
+            O->>E1: 发言请求(历史+角色prompt)
+            E1-->>O: 发言内容
+        and
+            O->>E2: 发言请求(历史+角色prompt)
+            E2-->>O: 发言内容
+        end
+        O->>T: 广播 E1 发言
+        T->>WS: expert_speech (E1)
+        O->>T: 广播 E2 发言
+        T->>WS: expert_speech (E2)
+        O->>M: 小结请求(本轮发言)
+        M-->>O: 小结内容
+        O->>T: 广播小结
+        T->>WS: round_summary
+    end
+    
+    O->>M: 最终总结请求
+    M-->>O: 决策建议
+    O->>WS: board_concluded 事件
+```
+
+---
+
+## Scope Boundaries
+
+### In Scope
+
+- `BoardTeam`、`BoardOrchestrator`、`BoardRouter` 新模块
+- `configs/experts/` 预设名人 YAML（8 位）
+- `ExpertConfig` 扩展（`speaking_style`、`decision_framework`）
+- WebSocket 事件（5 个新事件）+ `emit_team_event()` 调用集成
+- 前端群聊式展示增强 + 事件处理
+- 主聊天流程集成（`chat.py` 接入 `BoardRouter`）
+- `app.py` 启动加载专家配置
+- `agentkit.yaml` 配置项
+- 单元测试覆盖
+
+### Deferred to Follow-Up Work
+
+- 集成外部蒸馏工具（colleague.skill/nuwa-skill）
+- LLM 自动蒸馏生成名人 SOUL
+- 共识检测自动终止（置信度/投票）
+- 技术评审/创意脑暴场景
+- Expert Team Canvas 式可视化
+- 专家间直接通信（辩论模式）
+- 语音/视频输出
+- 历史讨论回顾与检索
+- 修复 `@team` 在 `chat.py` 的集成缺口（独立任务）
+
+### Outside This Product's Identity
+
+- 实时名人数据更新
+- 名人本人授权或验证
+- 娱乐向角色扮演
+
+---
+
+## Implementation Units
+
+### U1. 扩展 ExpertConfig 新增讨论模式字段
+
+**Goal**: 为 `ExpertConfig` 新增 `speaking_style` 和 `decision_framework` 字段，支持名人专家的个性化表达和决策框架。
+
+**Requirements**: FR-4
+
+**Dependencies**: 无
+
+**Files**:
+- `src/agentkit/experts/config.py` — 修改 `ExpertConfig.__init__`、`from_dict`、`to_dict`
+- `tests/unit/experts/test_config.py` — 新增字段测试
+
+**Approach**:
+- 在 `ExpertConfig.__init__` 新增 `speaking_style: str = ""` 和 `decision_framework: str = ""` 参数
+- 在 `from_dict` 中读取 `data.get("speaking_style", "")` 和 `data.get("decision_framework", "")`
+- 在 `to_dict` 中序列化这两个字段
+- 字段有默认值，向后兼容现有 `@team` 模式和动态生成的 ExpertConfig
+
+**Patterns to follow**: 现有 `persona`、`thinking_style` 字段的实现模式（`src/agentkit/experts/config.py:35-65`）
+
+**Test scenarios**:
+- **Happy path**: 创建 ExpertConfig 时传入 speaking_style 和 decision_framework，验证字段值正确
+- **Happy path**: 从 dict 创建 ExpertConfig，包含 speaking_style 和 decision_framework，验证字段值正确
+- **Edge case**: 不传 speaking_style 和 decision_framework，验证默认值为空字符串
+- **Edge case**: to_dict 输出包含 speaking_style 和 decision_framework 字段
+- **Integration**: 现有 ExpertConfig 用法（不传新字段）仍正常工作
+
+**Verification**: `pytest tests/unit/experts/test_config.py` 通过，现有测试不受影响
+
+---
+
+### U2. 创建预设名人专家 YAML 库
+
+**Goal**: 在 `configs/experts/` 目录下创建 8 位名人专家 YAML 文件，每位名人包含 persona、thinking_style、speaking_style、decision_framework 等字段。
+
+**Requirements**: FR-3, FR-5
+
+**Dependencies**: U1
+
+**Files**:
+- `configs/experts/elon_musk.yaml`
+- `configs/experts/jeff_bezos.yaml`
+- `configs/experts/allenzhang.yaml`
+- `configs/experts/charlie_munger.yaml`
+- `configs/experts/paul_graham.yaml`
+- `configs/experts/steve_jobs.yaml`
+- `configs/experts/warren_buffett.yaml`
+- `configs/experts/ray_dalio.yaml`
+- `configs/experts/private_board.yaml` — 默认私董会模板（引用上述专家）
+
+**Approach**:
+- 每个 YAML 遵循 `ExpertTemplateRegistry.load_from_yaml` 的格式（见 `src/agentkit/experts/registry.py:71-87`）
+- 字段：`name`、`description`、`is_builtin: true`、`config`（含 `name`、`agent_type: expert`、`persona`、`thinking_style`、`speaking_style`、`decision_framework`、`avatar`、`color`、`is_lead: false`、`task_mode: llm_generate`、`prompt.identity`）
+- `private_board.yaml` 是一个特殊的"团队模板"，定义默认专家组合（3-5 位），格式为 `name: private_board`、`members: [elon_musk, jeff_bezos, allenzhang, charlie_munger, paul_graham]`
+- 名人 persona 内容需体现其标志性思维模式：
+  - 马斯克：第一性原理、物理思维、激进创新
+  - 贝佐斯：Day 1 思维、客户至上、长期主义
+  - 张小龙：用户体验、极简主义、社交产品直觉
+  - 芒格：心智模型、跨学科思维、逆向思考
+  - Paul Graham：创业、做用户想要的东西、反从众
+  - 乔布斯：产品设计、现实扭曲力场、专注
+  - 巴菲特：价值投资、能力圈、复利思维
+  - Ray Dalio：原则驱动决策、极度透明
+
+**Patterns to follow**: `src/agentkit/experts/registry.py:71-87` 的 YAML 格式示例
+
+**Test scenarios**:
+- **Test expectation**: none — YAML 配置文件，由 U9 的集成测试覆盖加载逻辑
+
+**Verification**: `ExpertTemplateRegistry.load_from_directory("configs/experts/")` 能成功加载所有 8 位专家 + 1 个团队模板
+
+---
+
+### U3. 实现 BoardRouter @board 前缀路由
+
+**Goal**: 实现 `BoardRouter` 类，解析 `@board` 前缀，支持指定专家或使用默认私董会模板。
+
+**Requirements**: FR-1, FR-2
+
+**Dependencies**: U1, U2
+
+**Files**:
+- `src/agentkit/experts/board_router.py` — 新建
+- `tests/unit/experts/test_board_router.py` — 新建
+
+**Approach**:
+- 参考 `ExpertTeamRouter`（`src/agentkit/experts/router.py`）的实现模式
+- 正则匹配 `@board` 前缀：`^@board(?::(\S+))?\s*(.*)` 
+- 支持两种格式：
+  - `@board:elon_musk,jeff_bezos 讨论主题` — 指定专家
+  - `@board 讨论主题` — 使用默认 `private_board` 模板
+- 专家名验证：复用 `_EXPERT_NAME_RE = re.compile(r"^[a-zA-Z0-9_-]{1,64}$")`
+- 最多 10 位专家（`MAX_EXPERTS = 10`）
+- 返回 `BoardRoutingResult` dataclass：`matched`、`board_mode`、`specified_experts`、`topic`、`use_default_template`
+- `resolve_expert_configs()` 方法：从 `ExpertTemplateRegistry` 解析专家名到 `ExpertConfig` 列表，首位设为 `is_lead=True`（主持人）
+- 如果指定 `private_board`，从 `private_board.yaml` 加载成员列表
+- 未识别的专家名：记录警告，动态创建基本 ExpertConfig
+
+**Patterns to follow**: `src/agentkit/experts/router.py` 的 `ExpertTeamRouter` 实现
+
+**Test scenarios**:
+- **Happy path**: `@board:elon_musk,jeff_bezos 讨论主题` → matched=True, specified_experts=["elon_musk", "jeff_bezos"], topic="讨论主题"
+- **Happy path**: `@board 讨论主题` → matched=True, use_default_template=True, topic="讨论主题"
+- **Happy path**: `@board:private_board 讨论主题` → matched=True, 加载 private_board 成员列表
+- **Edge case**: `@board` 无主题 → matched=True, topic 为空
+- **Edge case**: 专家名超过 10 个 → 截断到 10 个
+- **Error path**: 无效专家名（含特殊字符）→ 过滤掉无效名，记录警告
+- **Error path**: 指定不存在的专家 → 动态创建基本 ExpertConfig
+- **Integration**: `resolve_expert_configs()` 返回的列表首位 is_lead=True
+
+**Verification**: `pytest tests/unit/experts/test_board_router.py` 通过
+
+---
+
+### U4. 实现 BoardTeam 容器
+
+**Goal**: 实现 `BoardTeam` 容器，管理私董会的专家生命周期、讨论状态和事件广播。
+
+**Requirements**: FR-6, FR-8
+
+**Dependencies**: U1
+
+**Files**:
+- `src/agentkit/experts/board.py` — 新建（`BoardTeam`、`BoardStatus`、`DiscussionHistory`）
+- `tests/unit/experts/test_board.py` — 新建
+
+**Approach**:
+- 参考 `ExpertTeam`（`src/agentkit/experts/team.py`）的容器模式
+- `BoardStatus` 枚举：`FORMING` → `DISCUSSING` → `CONCLUDING` → `COMPLETED` → `DISSOLVED`
+- `BoardTeam` 持有：
+  - `team_id`、`topic`、`experts: dict[str, Expert]`、`moderator_name: str`
+  - `history: list[dict]` — 讨论历史（每条含 round、expert_name、content、timestamp、role）
+  - `current_round: int`、`max_rounds: int`
+  - `_handoff_transport`、`_workspace`、`_pool`、`_team_channel`
+- `create_board()` 方法：创建主持人和成员专家，注入 board_context 到 system prompt
+- `_build_board_context()` 方法：构建私董会上下文（强调群聊讨论模式、角色差异、讨论规则）
+- `add_to_history()` 方法：追加发言到历史
+- `get_history_text()` 方法：返回格式化的历史文本用于 LLM prompt
+- `compress_history()` 方法：主持人压缩历史（超过阈值时）
+- `broadcast_user_message()` 方法：广播用户干预消息
+- `dissolve()` 方法：解散团队，回收资源
+- 复用 `Expert.create()`、`HandoffTransport`、`SharedWorkspace`
+
+**Patterns to follow**: `src/agentkit/experts/team.py` 的 `ExpertTeam` 容器模式
+
+**Technical design** (directional):
+```python
+class BoardStatus(str, enum.Enum):
+    FORMING = "forming"
+    DISCUSSING = "discussing"
+    CONCLUDING = "concluding"
+    COMPLETED = "completed"
+    DISSOLVED = "dissolved"
+
+class BoardTeam:
+    async def create_board(self, topic: str, expert_configs: list[ExpertConfig], max_rounds: int) -> None
+    async def add_to_history(self, round: int, expert_name: str, content: str, role: str) -> None
+    def get_history_text(self, up_to_round: int | None = None) -> str
+    async def compress_history(self, moderator: Expert) -> None
+    async def broadcast_user_message(self, content: str) -> None
+    async def dissolve(self) -> None
+```
+
+**Test scenarios**:
+- **Happy path**: `create_board()` 创建主持人和成员，状态变为 DISCUSSING
+- **Happy path**: `add_to_history()` 追加发言，`get_history_text()` 返回格式化文本
+- **Edge case**: 空历史时 `get_history_text()` 返回空字符串
+- **Edge case**: `compress_history()` 超过阈值时压缩，未超过时不操作
+- **Integration**: `broadcast_user_message()` 通过 handoff_transport 发送事件
+- **Integration**: `dissolve()` 清理所有专家，状态变为 DISSOLVED
+- **Error path**: 未配置 AgentPool 时 `create_board()` 抛出 RuntimeError
+
+**Verification**: `pytest tests/unit/experts/test_board.py` 通过
+
+---
+
+### U5. 实现 BoardOrchestrator 讨论引擎
+
+**Goal**: 实现 `BoardOrchestrator`，驱动私董会讨论流程：开场 → 多轮并行发言 + 主持人小结 → 最终决策建议。
+
+**Requirements**: FR-7, FR-9
+
+**Dependencies**: U4
+
+**Files**:
+- `src/agentkit/experts/board_orchestrator.py` — 新建
+- `tests/unit/experts/test_board_orchestrator.py` — 新建
+
+**Approach**:
+- 参考 `TeamOrchestrator`（`src/agentkit/experts/orchestrator.py`）的执行引擎模式
+- `execute(topic)` 主入口流程：
+  1. 广播 `board_started` 事件
+  2. 主持人开场介绍（介绍议题、讨论规则）
+  3. 循环 `max_rounds` 轮：
+     - 并行生成所有非主持人专家发言（`asyncio.gather`）
+     - 每个专家发言基于：角色 persona + thinking_style + speaking_style + decision_framework + 完整讨论历史 + 当前轮次/最大轮次
+     - 按专家列表顺序广播 `expert_speech` 事件
+     - 主持人小结本轮要点，广播 `round_summary` 事件
+     - 检查用户干预消息（通过 handoff_transport 或共享状态）
+     - 检查历史 token 长度，超过阈值时压缩
+  4. 主持人最终总结（决策建议、共识点、分歧点），广播 `board_concluded` 事件
+- `_generate_expert_speech()` 方法：构建专家发言 prompt，调用 LLM
+- `_generate_moderator_summary()` 方法：构建主持人小结 prompt，调用 LLM
+- `_generate_final_conclusion()` 方法：构建最终总结 prompt，调用 LLM
+- `_check_user_intervention()` 方法：检查是否有用户干预消息
+- `_handle_stop_command()` 方法：处理用户 `/stop` 命令
+- 复用 `_get_llm_gateway()` 和 `_broadcast_event()` 模式（来自 TeamOrchestrator）
+- 异常处理：LLM 不可用时，主持人用已有历史总结；所有专家发言失败时，提前进入总结阶段
+
+**Patterns to follow**: `src/agentkit/experts/orchestrator.py` 的 `TeamOrchestrator` 执行模式
+
+**Technical design** (directional):
+```python
+class BoardOrchestrator:
+    def __init__(self, team: BoardTeam) -> None
+    
+    async def execute(self, topic: str) -> dict[str, Any]
+    # Returns: {status, summary, decision_advice, total_rounds, consensus_points, dissent_points}
+    
+    async def _generate_expert_speech(self, expert: Expert, round: int) -> str
+    async def _generate_moderator_summary(self, round: int) -> str
+    async def _generate_final_conclusion(self) -> dict[str, Any]
+    async def _check_user_intervention(self) -> str | None
+```
+
+**Test scenarios**:
+- **Happy path**: `execute()` 完成完整讨论流程，返回 status="completed"
+- **Happy path**: 每轮生成 N-1 个专家发言 + 1 个主持人小结
+- **Happy path**: 最终总结包含 decision_advice、consensus_points、dissent_points
+- **Edge case**: max_rounds=1 时，只进行一轮讨论后直接总结
+- **Edge case**: 用户发送 `/stop` → 提前终止，用已有历史总结
+- **Error path**: LLM 不可用 → 主持人用已有历史拼接总结
+- **Error path**: 某专家发言失败 → 跳过该专家，其他专家继续
+- **Error path**: 所有专家发言失败 → 提前进入总结阶段
+- **Integration**: `board_started`、`expert_speech`、`round_summary`、`board_concluded` 事件正确广播
+
+**Verification**: `pytest tests/unit/experts/test_board_orchestrator.py` 通过
+
+---
+
+### U6. 后端集成 BoardRouter 到主聊天流程
+
+**Goal**: 在 `chat.py` 的 WebSocket 处理中接入 `BoardRouter`，实现 `@board` 前缀触发私董会讨论，并通过 `emit_team_event()` 推送事件到前端。
+
+**Requirements**: FR-1, FR-10, FR-11, FR-15
+
+**Dependencies**: U3, U5
+
+**Files**:
+- `src/agentkit/server/routes/chat.py` — 修改 `_handle_chat_message`，接入 `BoardRouter`
+- `src/agentkit/server/app.py` — 启动时加载 `configs/experts/`，挂载 `expert_template_registry` 和 `board_config` 到 `app.state`
+- `src/agentkit/server/config.py` — 新增 `board` 配置项（max_rounds、default_template、parallel_speech、history_compression_threshold）
+- `tests/unit/server/test_chat_board_integration.py` — 新建
+
+**Approach**:
+- 在 `app.py` 启动时（参考 skills 加载逻辑 `app.py:261-282`）：
+  - 创建 `ExpertTemplateRegistry` 实例
+  - 调用 `load_from_directory("configs/experts/")` 加载所有专家 YAML
+  - 挂载到 `app.state.expert_template_registry`
+  - 从 `agentkit.yaml` 读取 `board` 配置，挂载到 `app.state.board_config`
+- 在 `chat.py` 的 `_handle_chat_message` 中：
+  - 在 `RequestPreprocessor` 之前，检查 `@board` 前缀
+  - 如果匹配 `@board`，创建 `BoardRouter`，解析路由
+  - 创建 `BoardTeam` 和 `BoardOrchestrator`
+  - 注册 `handoff_transport` 的 handler，将事件转发到 WebSocket（调用 `emit_team_event()`）
+  - 调用 `orchestrator.execute(topic)`
+  - 将最终结果作为 `final_answer` 发送
+- 扩展 `_VALID_TEAM_EVENT_TYPES`：新增 `board_started`、`expert_speech`、`round_summary`、`user_intervention`、`board_concluded`
+- 用户干预处理：讨论进行中，如果用户发送新消息（非 `/stop`），通过 `board_team.broadcast_user_message()` 广播
+
+**Patterns to follow**: 
+- skills 加载模式：`src/agentkit/server/app.py:261-282`
+- `emit_team_event()` 辅助函数：`src/agentkit/server/routes/chat.py:117-142`
+
+**Test scenarios**:
+- **Happy path**: `@board:elon_musk,jeff_bezos 讨论主题` → 触发 BoardRouter，创建 BoardTeam，执行讨论
+- **Happy path**: 讨论事件通过 `emit_team_event()` 推送到 WebSocket
+- **Happy path**: `@board 讨论主题` → 使用默认 private_board 模板
+- **Edge case**: 讨论中用户发送消息 → 广播为 user_intervention
+- **Edge case**: 讨论中用户发送 `/stop` → 终止讨论
+- **Integration**: `app.state.expert_template_registry` 在启动时正确加载
+- **Integration**: `@board` 与 `@team` 和普通聊天互不干扰
+
+**Verification**: `pytest tests/unit/server/test_chat_board_integration.py` 通过；手动测试 `@board` 触发讨论
+
+---
+
+### U7. 前端事件类型和处理扩展
+
+**Goal**: 在前端 TypeScript 类型和 Pinia store 中新增私董会事件类型和处理逻辑。
+
+**Requirements**: FR-10, FR-12, FR-13
+
+**Dependencies**: U6
+
+**Files**:
+- `src/agentkit/server/frontend/src/api/types.ts` — 新增 board 事件类型和接口
+- `src/agentkit/server/frontend/src/stores/chat.ts` — 新增 board 事件处理
+- `src/agentkit/server/frontend/src/stores/team.ts` — 新增 board 状态管理（可选，或新建 `stores/board.ts`）
+
+**Approach**:
+- 在 `types.ts` 中：
+  - 新增 `WsServerMessage` 联合类型成员：`board_started`、`expert_speech`、`round_summary`、`user_intervention`、`board_concluded`
+  - 新增 `IBoardState` 接口：`team_id`、`topic`、`experts`、`moderator_name`、`current_round`、`max_rounds`、`status`
+  - 新增 `IBoardSpeech` 接口：`expert_name`、`expert_avatar`、`expert_color`、`content`、`round`、`role`
+- 在 `chat.ts` 的 `handleWsMessage` 中新增 case：
+  - `board_started`: 创建 board 状态，推送步骤提示
+  - `expert_speech`: 创建专家消息（带头像、颜色、轮次标识），追加到消息列表
+  - `round_summary`: 创建主持人小结消息（特殊样式，message_type='milestone'）
+  - `user_intervention`: 标记用户消息已广播
+  - `board_concluded`: 创建总结消息（含决策建议、共识点、分歧点）
+- 参考 `chat.ts:554-646` 的现有 team 事件处理模式
+
+**Patterns to follow**: `src/agentkit/server/frontend/src/stores/chat.ts:554-646` 的 team 事件处理
+
+**Test scenarios**:
+- **Test expectation**: none — 前端 TypeScript 类型，由 U8 的组件测试和手动测试覆盖
+
+**Verification**: `npm run typecheck` 通过
+
+---
+
+### U8. 前端群聊式 UI 增强
+
+**Goal**: 增强 `ExpertMessage.vue` 组件，新增讨论状态展示，实现群聊式体验。
+
+**Requirements**: FR-12, FR-13, FR-14
+
+**Dependencies**: U7
+
+**Files**:
+- `src/agentkit/server/frontend/src/components/chat/ExpertMessage.vue` — 增强（新增轮次标识、角色标签）
+- `src/agentkit/server/frontend/src/components/chat/BoardStatusView.vue` — 新建（讨论状态展示）
+- `src/agentkit/server/frontend/src/views/ChatView.vue` — 集成 BoardStatusView
+
+**Approach**:
+- 增强 `ExpertMessage.vue`：
+  - 新增 props：`round?: number`、`role?: 'expert' | 'moderator' | 'user'`
+  - 显示轮次标识（如"第 2 轮"）
+  - 主持人消息特殊样式（背景色、边框）
+  - 用户干预消息右侧气泡展示
+- 新建 `BoardStatusView.vue`：
+  - 顶部显示：讨论主题、当前轮次/最大轮次、参与专家列表（头像、名称、角色标签）
+  - 专家列表可点击查看其所有发言
+  - 讨论结束后展示总结卡片（决策建议、共识点、分歧点）
+- 在 `ChatView.vue` 中：
+  - 当 board 状态激活时，渲染 `<BoardStatusView />`（类似现有 `<ExpertTeamView />`）
+
+**Patterns to follow**: 
+- `src/agentkit/server/frontend/src/components/chat/ExpertMessage.vue` 现有组件
+- `src/agentkit/server/frontend/src/components/chat/ExpertTeamView.vue` 团队视图
+- `src/agentkit/server/frontend/src/views/ChatView.vue:19` 的 ExpertTeamView 集成
+
+**Test scenarios**:
+- **Test expectation**: none — Vue 组件，由手动测试覆盖
+
+**Verification**: `npm run typecheck` 通过；手动测试 `@board` 触发讨论，前端正确展示群聊
+
+---
+
+### U9. 单元测试覆盖
+
+**Goal**: 为所有新增模块编写单元测试，确保覆盖率 ≥ 80%。
+
+**Requirements**: 成功标准 §7.1
+
+**Dependencies**: U1-U8
+
+**Files**:
+- `tests/unit/experts/test_config.py` — 扩展（U1 新字段测试）
+- `tests/unit/experts/test_board_router.py` — 新建（U3）
+- `tests/unit/experts/test_board.py` — 新建（U4）
+- `tests/unit/experts/test_board_orchestrator.py` — 新建（U5）
+- `tests/unit/server/test_chat_board_integration.py` — 新建（U6）
+
+**Approach**:
+- 使用 `pytest` + `pytest-asyncio`（asyncio_mode=auto）
+- Mock `LLMGateway`、`AgentPool`、`HandoffTransport` 进行隔离测试
+- 测试覆盖：
+  - 配置字段序列化/反序列化
+  - 路由解析各种格式
+  - BoardTeam 生命周期和状态转换
+  - BoardOrchestrator 讨论流程（正常、异常、用户干预）
+  - chat.py 集成（@board 触发、事件广播）
+- 测试标记：单元测试无特殊标记，集成测试标记 `@pytest.mark.integration`
+
+**Patterns to follow**: 
+- `tests/unit/experts/test_team_orchestrator.py` 的测试模式
+- `tests/unit/experts/test_router.py` 的路由测试模式
+
+**Test scenarios**:
+- 见各实现单元的测试场景
+
+**Verification**: `pytest tests/unit/experts/ tests/unit/server/test_chat_board_integration.py -v` 全部通过；`pytest --cov=src/agentkit/experts --cov-report=term-missing` 覆盖率 ≥ 80%
+
+---
+
+## Risks & Dependencies
+
+### Risks
+
+| 风险 | 影响 | 缓解 |
+|------|------|------|
+| 专家发言同质化 | 讨论质量低 | 强化角色 prompt 差异化，`decision_framework` 字段强制不同视角 |
+| token 消耗过高（5轮×5专家=25次调用） | 成本问题 | 提供轮次配置，历史压缩机制 |
+| 讨论发散无结论 | 用户体验差 | 主持人每轮小结，最终强制总结 |
+| 名人 persona 不准确 | 发言不像本人 | YAML 可迭代优化，未来支持蒸馏 |
+| `chat.py` 集成复杂度高 | 可能破坏现有聊天 | `@board` 路由在 `RequestPreprocessor` 之前检查，不影响普通聊天 |
+| 前端事件处理与现有 team 事件冲突 | UI 错乱 | 事件类型独立（board_* vs team_*），store 独立 |
+
+### Dependencies
+
+- **现有基础设施**: `Expert`、`AgentPool`、`HandoffTransport`、`SharedWorkspace`、`ExpertTemplateRegistry`
+- **LLM Gateway**: `src/agentkit/llm/gateway.py` 提供多 provider 支持
+- **WebSocket**: `src/agentkit/server/routes/chat.py` 现有 WebSocket 通道
+- **前端组件**: `ExpertMessage.vue`、`ChatView.vue`、`stores/chat.ts`
+
+---
+
+## System-Wide Impact
+
+### 影响方
+
+- **终端用户**: 获得私董会讨论能力，可就决策类问题获得多视角建议
+- **开发者**: 新增 `@board` 路由模式，需了解与 `@team` 的区别
+- **配置管理**: 新增 `configs/experts/` 目录和 `agentkit.yaml` 的 `board` 配置项
+- **前端**: 新增 board 事件处理和 UI 组件
+
+### 兼容性
+
+- `ExpertConfig` 新增字段有默认值，向后兼容
+- `@board` 路由独立，不影响 `@team` 和普通聊天
+- `_VALID_TEAM_EVENT_TYPES` 扩展是增量式，不影响现有事件
+- 前端事件处理新增 case，不影响现有 case
+
+---
+
+## Open Questions
+
+1. **历史压缩 prompt 具体设计** — 超过 token 阈值时，主持人如何压缩历史？
+   - **决策**: 实现时细化，初步思路是让主持人总结每轮关键观点，替换原始发言。
+
+2. **用户干预消息的实时性** — 用户发送干预后，当前轮次是否立即中断？
+   - **决策**: 不中断当前轮次，干预消息在下一轮生效。避免复杂的中断逻辑。
+
+3. **private_board 模板格式** — 如何在 YAML 中定义"团队模板"（引用多个专家）？
+   - **决策**: `private_board.yaml` 使用 `members: [expert1, expert2, ...]` 字段，`BoardRouter` 解析时加载成员配置。
+
+4. **讨论历史持久化** — 讨论结束后是否保存到数据库？
+   - **决策**: 本期不持久化，仅保存到 SharedWorkspace。未来可扩展。
+
+---
+
+## Sources & Research
+
+- **需求文档**: `docs/brainstorms/2026-06-17-board-meeting-mode-requirements.md`
+- **现有实现**: `src/agentkit/experts/` 目录所有文件
+- **对标产品**: Qoder Experts Mode、WorkBuddy、Legends MCP、colleague.skill（详见需求文档 §1.2）
+- **架构参考**: AgentVerse 通信框架、ARMOR-MAD 协议阈值终止
+
+---
+
+**Next Step**: 交由 `/ce-work` 执行实现，或由开发者按 U1-U9 顺序逐步实现。
diff --git a/docs/plans/2026-06-17-002-fix-ws-task-persistence-plan.md b/docs/plans/2026-06-17-002-fix-ws-task-persistence-plan.md
new file mode 100644
index 0000000..7fa4b0a
--- /dev/null
+++ b/docs/plans/2026-06-17-002-fix-ws-task-persistence-plan.md
@@ -0,0 +1,433 @@
+---
+title: "fix: WebSocket 断开后任务结果丢失与断线恢复"
+status: completed
+created: 2026-06-17
+type: fix
+origin: in-session investigation
+---
+
+# fix: WebSocket 断开后任务结果丢失与断线恢复
+
+## Summary
+
+当用户在复杂任务执行过程中刷新页面时，WebSocket 断开导致 ReAct 任务中断、已收集的输出丢失、且无法恢复。本计划通过三层防御彻底解决：Layer 1 确保部分结果持久化，Layer 2 将任务后台化解耦 WebSocket 生命周期，Layer 3 前端断线后恢复进行中任务。
+
+## Problem Frame
+
+**症状**：用户布置复杂任务后刷新界面，操作停止且返回为空。
+
+**根因**（经代码调查确认）：
+
+1. **结果丢失**：portal.py 的 ReAct 流式路径中，assistant 回复保存在 `async for` 循环之后（portal.py:1064），WebSocket 断开时该行永远不执行，`collected_output` 全部丢失。
+2. **任务中断**：ReAct 任务通过 `async for event in react_engine.execute_stream(...)` 直接在 WebSocket 协程中执行，未使用 BackgroundRunner 后台化，WebSocket 断开 = 任务取消。
+3. **无状态追踪**：portal WebSocket 路径不使用 TaskStore，`task_id` 仅用于 EventQueue 事件发射，任务状态无法查询。
+4. **无恢复机制**：前端 WebSocket 重连后（3 秒自动重连），不检查是否有未完成任务，不恢复进行中的任务状态。
+
+## Requirements
+
+- **R1**: WebSocket 断开时，已收集的 ReAct 输出必须持久化到 conversation store
+- **R2**: ReAct 任务必须后台执行，与 WebSocket 连接生命周期解耦
+- **R3**: 每个任务必须在 TaskStore 中注册，状态可查询
+- **R4**: 后台任务的事件必须通过 EventQueue 分发，WebSocket 可订阅
+- **R5**: 前端 WebSocket 重连后，必须检查当前对话是否有未完成任务并恢复
+- **R6**: 已完成的任务结果必须能从 conversation store 恢复显示
+- **R7**: 不破坏现有 DIRECT_CHAT 路径和 REST API 路径
+
+## Key Technical Decisions
+
+### KTD1: 复用 EventQueue 作为任务事件总线
+
+**决策**：使用现有的 `EventQueue`（`core/event_queue.py`）分发后台任务事件，而非新建 EventBus 模块。
+
+**理由**：EventQueue 已具备所需能力——多订阅者广播（行 161-175）、缓冲回放（deque 100，行 153）、原子订阅（行 193-202）、哨兵关闭模式（行 230-243）。它已被 portal.py 的 `_emit_event_safe` 使用，且文件头注释明确说明设计目标包括 Portal WebSocket 订阅。
+
+### KTD2: 任务后台化使用 asyncio.create_task + EventQueue 订阅
+
+**决策**：在 portal.py 中，将 ReAct 执行包装为 `asyncio.create_task`，WebSocket 协程通过 `event_queue.subscribe()` 订阅事件流。任务在后台独立运行，WebSocket 仅转发事件给前端。
+
+**理由**：BackgroundRunner（`server/runner.py`）虽然存在，但它调用 `agent.execute()`（非流式），不支持事件流。改造 BackgroundRunner 支持流式会侵入核心执行路径。直接在 portal.py 中用 `asyncio.create_task` 更聚焦、风险更低。
+
+### KTD3: 任务状态通过 TaskStore 追踪
+
+**决策**：在 portal.py WebSocket 路径中，为每个用户消息创建 TaskStore 记录，状态随任务进展更新（PENDING → RUNNING → COMPLETED/FAILED）。
+
+**理由**：TaskStore 已在 `app.state.task_store` 可用，`/api/v1/tasks/{task_id}` 端点已存在。前端可通过现有 REST API 查询任务状态，无需新建端点。
+
+### KTD4: 前端通过 conversation_id 关联任务
+
+**决策**：在 TaskStore 记录的 `metadata` 中存储 `conversation_id`，前端重连后通过 `GET /api/v1/tasks?status=running` 查找当前对话的未完成任务。
+
+**理由**：避免新建专门的"按 conversation 查任务"端点。前端遍历 running 任务，匹配 metadata 中的 conversation_id 即可。
+
+## High-Level Technical Design
+
+```
+┌──────────────────────────────────────────────────────────────────┐
+│                     WebSocket 连接存活                            │
+│  ┌────────────┐    subscribe(task_id)    ┌──────────────────┐   │
+│  │  WebSocket │ ◄────────────────────── │   EventQueue     │   │
+│  │  协程      │    转发事件给前端         │  (缓冲回放+广播)  │   │
+│  └────────────┘                          └────────┬─────────┘   │
+│                                                  │ emit         │
+├──────────────────────────────────────────────────┼─────────────┤
+│                     WebSocket 断开                │             │
+│  ┌────────────┐                                 │             │
+│  │  WebSocket │ ✗ 断开                          │             │
+│  │  协程      │                                 │             │
+│  └────────────┘                                 │             │
+│                                                  ▼             │
+│  ┌─────────────────────────────────────────────────────────┐   │
+│  │            后台任务 (asyncio.create_task)                │   │
+│  │  ┌─────────────┐  ┌──────────────┐  ┌────────────────┐ │   │
+│  │  │ ReAct 执行  │─►│ EventQueue   │  │ TaskStore      │ │   │
+│  │  │ execute_    │  │ .emit(event) │  │ .update_status │ │   │
+│  │  │ stream()    │  │              │  │                │ │   │
+│  │  └─────────────┘  └──────────────┘  └────────────────┘ │   │
+│  │         │                                            │   │
+│  │         ▼                                            │   │
+│  │  ┌────────────────────────────────────────────────┐  │   │
+│  │  │ conversation_store.add_message(assistant, ...)  │  │   │
+│  │  │ (无论 WebSocket 是否存活，结果都持久化)          │  │   │
+│  │  └────────────────────────────────────────────────┘  │   │
+│  └─────────────────────────────────────────────────────────┘   │
+│                                                                │
+│  前端重连后:                                                   │
+│  1. GET /api/v1/tasks?status=running → 查找未完成任务          │
+│  2. 匹配 metadata.conversation_id → 当前对话的任务              │
+│  3. event_queue.subscribe(task_id) → 恢复事件流                │
+│  4. 或 GET /conversations/{id} → 拉取已完成的结果              │
+└──────────────────────────────────────────────────────────────────┘
+```
+
+## Scope Boundaries
+
+### In Scope
+
+- portal.py WebSocket 路径的 ReAct 任务后台化
+- WebSocket 断开时的部分结果持久化
+- TaskStore 注册和状态更新
+- EventQueue 事件分发集成
+- 前端 WebSocket 重连后的任务恢复
+- 前端任务状态查询 API 客户端
+
+### Out of Scope
+
+- BackgroundRunner 改造支持流式（风险过高，单独处理）
+- REST API 路径（`/portal/chat`）的改造（REST 已是同步调用，无此问题）
+- SSE 路径（`/portal/chat/stream`）的改造
+- Expert Team 协作模式的后台化（单独处理）
+- WebSocket 重连退避策略优化（独立改进）
+
+### Deferred to Follow-Up Work
+
+- SubmissionQueue 接入（目前完全闲置，可后续用于任务队列化）
+- Redis 分布式任务恢复（当前 InMemoryTaskStore 足够，分布式场景后续处理）
+- 任务进度百分比反馈（当前 ReAct 无细粒度进度概念）
+
+---
+
+## Implementation Units
+
+### U1. Layer 1: WebSocket 断开时持久化已收集的输出
+
+**Goal**: 确保 WebSocket 在 ReAct 流式过程中断开时，已收集的 `collected_output` 保存到 conversation store。
+
+**Requirements**: R1
+
+**Dependencies**: 无
+
+**Files**:
+- `src/agentkit/server/routes/portal.py` — 修改 ReAct 流式路径的异常处理
+- `tests/unit/test_portal_ws_persistence.py` — 新建测试
+
+**Approach**:
+
+在 portal.py 的 `portal_websocket` 函数中，ReAct 流式路径（约 1014-1064 行）的异常处理需要增强：
+
+1. 在 `except Exception as e:` 块（约 1049 行）中，在发送 error 之前，将 `collected_output` 保存到 `_conversation_store`。使用 `_ensure_non_empty` 处理空输出。
+2. 在外层 `except WebSocketDisconnect:` 块（约 1102 行）中，检查是否有未保存的 `collected_output`，如果有则保存。
+3. 在外层 `except Exception as e:` 块（约 1104 行）中，同样处理。
+4. 需要将 `collected_output`、`conv`、`task_id` 声明提升到外层 try 作用域，使 except 块可以访问。
+
+**Patterns to follow**:
+- `_ensure_non_empty()` 函数（portal.py:58-62）处理空输出
+- `_conversation_store.add_message()` 已有调用模式（portal.py:871, 952, 1064）
+- `_emit_event_safe()` 的异常吞咽模式（portal.py:65-95）——保存操作失败不应阻断后续清理
+
+**Test scenarios**:
+- **Happy path**: ReAct 正常完成，`collected_output` 有内容，WebSocket 未断开 → 结果保存到 conversation store（验证现有行为不被破坏）
+- **WebSocket 断开（final_answer 前）**: ReAct 流式中 WebSocket 断开，`collected_output` 为空 → conversation store 不写入空消息（或写入 EMPTY_LLM_RESPONSE）
+- **WebSocket 断开（final_answer 后）**: ReAct 已产出 final_answer，WebSocket 在后续步骤断开，`collected_output` 有部分内容 → 部分内容保存到 conversation store
+- **异常路径**: ReAct 执行抛出异常，`collected_output` 有部分内容 → 部分内容保存，error 事件正常发射
+- **Integration**: 保存操作自身失败（conversation_store 异常）→ 不阻断 error 事件发射和后续清理
+
+**Verification**: 模拟 WebSocket 断开场景，检查 SQLite conversation store 中是否有 assistant 消息记录。
+
+---
+
+### U2. Layer 2: ReAct 任务后台化与 EventQueue 事件分发
+
+**Goal**: 将 ReAct 执行从 WebSocket 协程中解耦，后台执行并通过 EventQueue 分发事件。WebSocket 仅订阅事件流并转发给前端。
+
+**Requirements**: R2, R4
+
+**Dependencies**: U1
+
+**Files**:
+- `src/agentkit/server/routes/portal.py` — 重构 ReAct 流式路径
+- `src/agentkit/core/event_queue.py` — 可能需要扩展（添加 task_id 过滤订阅）
+- `tests/unit/test_portal_ws_background_task.py` — 新建测试
+
+**Approach**:
+
+1. **提取后台执行函数**：创建 `_execute_react_background()` 协程，接收所有必要参数（messages, tools, model, agent_name, system_prompt, timeout_seconds, conv_id, task_id, event_queue, conversation_store）。该函数：
+   - 执行 `react_engine.execute_stream()`
+   - 每个事件通过 `event_queue.emit()` 分发
+   - `final_answer` 事件时累积输出
+   - 正常结束后保存到 `conversation_store`
+   - 异常时保存部分输出并发射 error 事件
+   - 无论成功失败，都发射 `task.completed` 或 `task.failed` 事件
+
+2. **WebSocket 协程改为订阅者**：在 portal.py 的 ReAct 路径中：
+   - 创建 task_id 并注册到 TaskStore（PENDING → RUNNING）
+   - `asyncio.create_task(_execute_react_background(...))` 启动后台任务
+   - `async for event in event_queue.subscribe(task_id)` 订阅事件
+   - 将事件通过 `websocket.send_json()` 转发给前端
+   - WebSocket 断开时，`async for` 循环退出，后台任务继续运行
+
+3. **EventQueue 扩展**：当前 `subscribe()` 返回所有事件。需要添加按 `task_id` 过滤的订阅能力，或在前端转发时过滤。优先选择在 EventQueue 中添加 `subscribe(task_id=None)` 参数过滤。
+
+4. **DIRECT_CHAT 路径**：DIRECT_CHAT 是同步 LLM 调用（非流式），保持现有逻辑不变，但同样在 TaskStore 注册。
+
+**Technical design**（directional guidance）:
+
+```python
+# 后台执行函数（伪代码）
+async def _execute_react_background(
+    react_engine, messages, tools, model, agent_name,
+    system_prompt, timeout_seconds,
+    conv_id, task_id, event_queue, conversation_store
+):
+    collected_output = []
+    try:
+        async for event in react_engine.execute_stream(...):
+            if event.event_type == "final_answer":
+                collected_output.append(event.data.get("output", ""))
+            await event_queue.emit(Event.create(
+                event_type=event.event_type,
+                task_id=task_id,
+                session_id=conv_id,
+                data={"step": event.step, "data": event.data, ...}
+            ))
+        # 正常完成
+        response_text = _ensure_non_empty("".join(collected_output))
+        await conversation_store.add_message(conv_id, "assistant", response_text)
+        await event_queue.emit(Event.create(
+            event_type="task.completed", task_id=task_id, ...
+        ))
+    except Exception as e:
+        # 保存部分输出
+        if collected_output:
+            partial = _ensure_non_empty("".join(collected_output))
+            await conversation_store.add_message(conv_id, "assistant", partial)
+        await event_queue.emit(Event.create(
+            event_type="task.failed", task_id=task_id,
+            data={"error": str(e)}
+        ))
+
+# WebSocket 协程（伪代码）
+task_id = str(uuid.uuid4())
+task_store.create(task_id, agent_name, {"conversation_id": conv.id})
+asyncio.create_task(_execute_react_background(...))
+
+# WebSocket 订阅事件并转发
+async for event in event_queue.subscribe(task_id=task_id):
+    if event.event_type in ("task.completed", "task.failed"):
+        await websocket.send_json({"type": "result", ...})
+        break
+    await websocket.send_json({"type": "step", "data": ...})
+    # WebSocket 断开时 async for 退出，后台任务继续
+```
+
+**Patterns to follow**:
+- EventQueue 的 `subscribe()` + 哨兵关闭模式（event_queue.py:193-243）
+- `_emit_event_safe()` 的异常吞咽模式（portal.py:65-95）
+- BackgroundRunner 的 `asyncio.create_task` + `_on_done` 回调模式（runner.py:55-73）
+
+**Test scenarios**:
+- **Happy path**: 后台任务正常完成，事件通过 EventQueue 分发，WebSocket 收到所有事件和最终结果
+- **WebSocket 断开**: 后台任务继续运行，结果保存到 conversation store，TaskStore 状态更新为 COMPLETED
+- **后台任务异常**: ReAct 执行抛异常，部分输出保存，TaskStore 状态为 FAILED，error 事件发射
+- **EventQueue 订阅过滤**: 多个并发任务，每个 WebSocket 只收到自己 task_id 的事件
+- **Integration**: 后台任务完成后，conversation store 有 assistant 消息，TaskStore 有 COMPLETED 记录
+
+**Verification**: 启动后台任务后立即断开 WebSocket，等待任务完成后检查 conversation store 和 TaskStore。
+
+---
+
+### U3. Layer 2: TaskStore 注册与状态追踪
+
+**Goal**: 在 portal WebSocket 路径中为每个用户消息创建 TaskStore 记录，状态随任务进展更新。
+
+**Requirements**: R3
+
+**Dependencies**: U2
+
+**Files**:
+- `src/agentkit/server/routes/portal.py` — 集成 TaskStore
+- `tests/unit/test_portal_ws_task_tracking.py` — 新建测试
+
+**Approach**:
+
+1. 在 portal.py 的 WebSocket 路径中，获取 `task_store` from `websocket.app.state.task_store`。
+2. 在用户消息处理开始时，调用 `task_store.create(task_id, agent_name, {"conversation_id": conv.id, "message": message_text})`。
+3. 在后台任务执行前，更新状态为 `RUNNING`（`started_at=now`）。
+4. 在后台任务完成时，更新状态为 `COMPLETED`（`output_data={"output": response_text}, completed_at=now, progress=1.0`）。
+5. 在后台任务失败时，更新状态为 `FAILED`（`error_message=str(e), completed_at=now`）。
+6. DIRECT_CHAT 路径同样注册 TaskStore（同步调用，状态直接从 PENDING → COMPLETED）。
+
+**Patterns to follow**:
+- BackgroundRunner._run_task 的状态更新模式（runner.py:89-92, 141-147, 153-157）
+- TaskStore.create/update_status 的调用签名（task_store.py:127-163）
+
+**Test scenarios**:
+- **Happy path**: 用户发送消息 → TaskStore 创建 PENDING 记录 → 任务执行中状态为 RUNNING → 完成后状态为 COMPLETED，output_data 有内容
+- **任务失败**: ReAct 执行异常 → TaskStore 状态为 FAILED，error_message 有内容
+- **WebSocket 断开后查询**: WebSocket 断开，后台任务继续 → 通过 `GET /api/v1/tasks/{task_id}` 能查到 RUNNING 状态 → 任务完成后查到 COMPLETED
+- **metadata 包含 conversation_id**: TaskStore 记录的 metadata 中有 conversation_id 字段
+- **Integration**: 前端通过 `GET /api/v1/tasks?status=running` 能找到当前对话的运行中任务
+
+**Verification**: 通过 REST API `GET /api/v1/tasks/{task_id}` 查询任务状态，验证状态流转正确。
+
+---
+
+### U4. Layer 3: 前端任务状态查询 API 客户端
+
+**Goal**: 在前端 API 客户端中添加任务状态查询方法，支持按 status 过滤和按 task_id 查询。
+
+**Requirements**: R5, R6
+
+**Dependencies**: U3
+
+**Files**:
+- `src/agentkit/server/frontend/src/api/client.ts` — 添加任务 API 方法
+- `src/agentkit/server/frontend/src/api/types.ts` — 添加任务类型定义
+
+**Approach**:
+
+1. 在 `types.ts` 中添加 `ITaskRecord` 接口，对应后端 `TaskRecord.to_dict()` 的输出格式。
+2. 在 `client.ts` 中添加方法：
+   - `getTask(taskId: string): Promise<ITaskRecord>` — GET `/api/v1/tasks/{taskId}`（注意：tasks 路由前缀是 `/api/v1/tasks`，不是 `/api/v1/portal`）
+   - `listTasks(status?: string): Promise<ITaskRecord[]>` — GET `/api/v1/tasks?status=running`
+3. 由于 tasks 路由前缀不同（`/api/v1/tasks` vs `/api/v1/portal`），需要创建一个新的 ApiClient 实例或调整 BaseApiClient 的 baseUrl。
+
+**Patterns to follow**:
+- 现有 `getConversations()` / `getConversation(id)` 的方法签名模式（client.ts:30-37）
+- `IConversation` 接口定义模式（types.ts:51-57）
+
+**Test scenarios**:
+- **Happy path**: 调用 `getTask(taskId)` 返回正确的任务记录
+- **按状态过滤**: 调用 `listTasks("running")` 只返回运行中任务
+- **任务不存在**: 调用 `getTask("invalid-id")` 抛出 404 错误
+- **Integration**: 后台任务运行中，前端能通过 API 查询到 RUNNING 状态
+
+**Verification**: TypeScript 编译通过（`npm run build:frontend`），API 调用返回正确数据。
+
+---
+
+### U5. Layer 3: 前端 WebSocket 重连后的任务恢复
+
+**Goal**: 前端 WebSocket 重连后，检查当前对话是否有未完成任务，恢复事件流或拉取已完成的结果。
+
+**Requirements**: R5, R6
+
+**Dependencies**: U4
+
+**Files**:
+- `src/agentkit/server/frontend/src/stores/chat.ts` — 添加重连恢复逻辑
+- `src/agentkit/server/frontend/src/api/types.ts` — 扩展 WsClientMessage 类型
+
+**Approach**:
+
+1. **扩展 WebSocket 协议**：添加 `resume` 消息类型，前端重连后发送 `{type: "resume", task_id: "..."}` 订阅已有后台任务的事件流。
+2. **后端处理 resume**：在 portal.py 的 WebSocket 路径中，处理 `resume` 消息类型——通过 task_id 订阅 EventQueue 事件流，转发给前端。
+3. **前端重连恢复流程**（在 `connectWebSocket` 的 `onopen` 中）：
+   - 检查 `currentConversationId` 是否有值
+   - 调用 `listTasks("running")` 查找运行中任务
+   - 匹配 `metadata.conversation_id === currentConversationId` 的任务
+   - 如果找到运行中任务：发送 `resume` 消息，设置 `isLoading = true`
+   - 如果无运行中任务：调用 `selectConversation(currentConversationId)` 重新加载消息（包含已完成的结果）
+4. **后端 resume 处理**：接收到 `resume` 消息后，通过 `task_id` 订阅 EventQueue，转发事件直到 `task.completed` 或 `task.failed`。
+
+**Technical design**（directional guidance）:
+
+```typescript
+// 前端重连恢复（伪代码）
+socket.onopen = async () => {
+  isWsConnected.value = true
+  startHeartbeat()
+
+  // 重连恢复逻辑
+  if (currentConversationId.value) {
+    await recoverTask(currentConversationId.value)
+  }
+}
+
+async function recoverTask(convId: string) {
+  const tasks = await apiClient.listTasks('running')
+  const runningTask = tasks.find(
+    t => t.metadata?.conversation_id === convId
+  )
+
+  if (runningTask) {
+    // 恢复进行中任务的事件流
+    isLoading.value = true
+    ws.value.send(JSON.stringify({
+      type: 'resume',
+      task_id: runningTask.task_id
+    }))
+  } else {
+    // 无运行中任务，重新加载对话消息
+    await selectConversation(convId)
+  }
+}
+```
+
+**Patterns to follow**:
+- 现有 `connectWebSocket` 的 `onopen` / `onclose` 模式（chat.ts:209-259）
+- `selectConversation` 的消息加载模式（chat.ts:55-74）
+- `handleWsMessage` 的事件处理模式（chat.ts:270-528）
+
+**Test scenarios**:
+- **Happy path - 有运行中任务**: 重连后发现有运行中任务 → 发送 resume → 收到后续事件 → 任务完成显示结果
+- **Happy path - 无运行中任务**: 重连后无运行中任务 → 重新加载对话消息 → 显示已完成的结果
+- **任务在重连前已完成**: 重连时任务已 COMPLETED → listTasks("running") 返回空 → 重新加载对话消息 → 结果显示
+- **多个运行中任务**: 有多个对话的运行中任务 → 只恢复当前对话的任务
+- **resume 后任务立即完成**: resume 后立即收到 task.completed 事件 → 正确显示结果
+- **Integration**: 刷新页面 → 重连 → 恢复任务 → 最终结果正确显示
+
+**Verification**: 启动复杂任务 → 刷新页面 → 验证任务继续运行 → 验证结果最终显示。
+
+---
+
+## Risks & Dependencies
+
+### Risks
+
+1. **EventQueue 订阅过滤改造风险**：当前 `subscribe()` 无过滤，添加 task_id 过滤可能影响现有订阅者。缓解：使用可选参数 `subscribe(task_id=None)`，默认行为不变。
+2. **后台任务泄漏风险**：WebSocket 断开后后台任务继续运行，如果任务本身卡住（如 LLM 超时），任务会一直占用资源。缓解：ReAct 已有 `timeout_seconds` 配置，后台任务同样受此约束。
+3. **并发任务事件混淆风险**：多个对话同时执行任务，EventQueue 事件可能混淆。缓解：每个任务有唯一 task_id，订阅时按 task_id 过滤。
+4. **前端重连时序风险**：重连后查询任务状态时，任务可能刚好从 RUNNING 变为 COMPLETED。缓解：先查 running，如果空则重新加载对话消息（会包含已完成结果）。
+
+### Dependencies
+
+- U1 → U2 → U3 → U4 → U5（顺序依赖）
+- EventQueue 已在 app.state 可用（无需新建）
+- TaskStore 已在 app.state 可用（无需新建）
+
+## System-Wide Impact
+
+- **后端**：portal.py WebSocket 路径重大重构，影响所有 WebSocket 聊天用户
+- **前端**：chat store 的 WebSocket 连接逻辑增强，影响所有聊天页面用户
+- **API**：新增 `resume` WebSocket 消息类型，无 REST API 变更
+- **兼容性**：DIRECT_CHAT 路径和 REST API 路径不受影响
diff --git a/docs/research/2026-06-17-ragflow-integration-analysis.md b/docs/research/2026-06-17-ragflow-integration-analysis.md
new file mode 100644
index 0000000..ddb985f
--- /dev/null
+++ b/docs/research/2026-06-17-ragflow-integration-analysis.md
@@ -0,0 +1,644 @@
+# RAGFlow 引入可行性分析
+
+> **创建日期**: 2026-06-17
+> **状态**: 调研完成，待决策
+> **目标**: 评估将 RAGFlow 作为 Fischer AgentKit 知识库的可行性、技术路径与风险
+
+---
+
+## 一、RAGFlow 项目概览
+
+| 维度 | 详情 |
+|------|------|
+| 仓库 | https://github.com/infiniflow/ragflow |
+| License | Apache-2.0 |
+| GitHub Stars | ~80k（2025 年度 Top 10） |
+| 最新版本 | v0.25.6（2026-05-26） |
+| 核心定位 | 基于深度文档理解的 RAG 引擎，构建 AI Agent 上下文层 |
+| 技术栈 | Python 后端 + React/TS 前端 + Docker 部署 |
+
+### 核心差异化能力
+
+- **DeepDoc 引擎**：OCR（15+ 语言）、版面识别（10 类组件：文本/标题/图/表/页眉页脚/公式等）、表格结构识别（TSR）
+- **混合检索**：向量 + 全文 + 稀疏向量，支持 ColBERT late-interaction
+- **高级特性**：GraphRAG、RAPTOR（层级摘要树）、Parent-Child 分块、rerank、上下文压缩
+- **16+ 文档格式**：PDF/DOCX/PPT/Excel/图片/扫描件/网页/结构化数据等
+- **Agent 工作流**：可视化编排、Memory 模块（v0.23.0+）、MCP 协议集成
+
+### 部署资源要求（官方）
+
+- CPU ≥ 4 核
+- RAM ≥ 16 GB
+- Disk ≥ 50 GB
+- Docker ≥ 24.0.0 & Docker Compose ≥ v2.26.1
+
+---
+
+## 二、可行性结论：高度可行 ✅
+
+Fischer AgentKit 已具备**成熟的适配器架构**，专门为对接外部知识库设计，RAGFlow 的 HTTP REST API 可直接映射到现有协议。
+
+### 关键契合点
+
+1. **协议匹配**：现有 `KnowledgeBase` Protocol（`src/agentkit/memory/knowledge_base.py:53-83`）定义了 `ingest()/query()/delete_by_id()/list_sources()/health_check()`，RAGFlow API 完全覆盖这些语义
+
+2. **适配器基类就绪**：`KBAdapter`（`src/agentkit/memory/adapters/base.py:22-160`）已封装 httpx 客户端、生命周期管理、认证流程，子类只需实现 `_make_client()` 和 `search()`
+
+3. **现有先例**：已有 `FeishuKBAdapter`、`ConfluenceAdapter`、`GenericHTTPAdapter` 三个适配器，模式成熟
+
+4. **SemanticMemory 解耦**：`SemanticMemory`（`src/agentkit/memory/semantic.py:14-121`）通过 `rag_service` 注入，不直接依赖具体实现
+
+5. **API 标准化**：RAGFlow 使用 Bearer Token 认证、RESTful JSON 接口，与 `HttpRAGService` 期望的接口形态一致
+
+---
+
+## 三、重点技术路径
+
+### 路径 1：新增 RAGFlowAdapter（推荐）
+
+在 `src/agentkit/memory/adapters/` 下新增 `ragflow.py`，继承 `KBAdapter`。
+
+**API 映射表**：
+
+| KBAdapter 方法 | RAGFlow API | 说明 |
+|----------------|-------------|------|
+| `search(query, top_k)` | `POST /api/v1/retrieval` | body: `{question, dataset_ids, top_k, similarity_threshold, vector_similarity_weight}` |
+| `ingest(documents)` | `POST /api/v1/datasets/{id}/documents` + `POST /api/v1/datasets/{id}/chunks` | 上传后需触发异步解析 |
+| `delete_by_id(id)` | `DELETE /api/v1/datasets/{id}/documents/{doc_id}` | |
+| `list_sources()` | `GET /api/v1/datasets` | RAGFlow 的 dataset 概念 = Fischer 的 source |
+| `health_check()` | `GET /api/v1/datasets` 或 `/v1/health` | |
+| `get_document(doc_id)` | `GET /api/v1/datasets/{id}/documents/{doc_id}` | |
+
+**关键实现要点**：
+- `_make_client()` 配置 `base_url` + `Authorization: Bearer <api_key>`
+- `search()` 需将 RAGFlow 返回的 chunk 结构（`content/document_id/dataset_id/similarity`）标准化为 `QueryResult`
+- `ingest()` 需处理 RAGFlow 的**异步解析流程**：上传 → 触发 parse → 轮询状态（非同步返回）
+- 在 `adapters/__init__.py` 注册导出
+
+### 路径 2：复用 HttpRAGService（最快，但能力受限）
+
+现有 `HttpRAGService` 已实现 `/search` 和 `/bases/{kb_id}/retrieve` 调用。RAGFlow 的 retrieval 端点路径不同（`/api/v1/retrieval`），需在 RAGFlow 侧部署一层 API 网关适配，或扩展 HttpRAGService 支持自定义路径模板。
+
+**局限**：无法利用 RAGFlow 的文档上传/解析/分块能力，只能做检索。
+
+### 路径 3：通过 ragflow-sdk 集成（不推荐）
+
+`pip install ragflow-sdk` 直接调用 Python SDK。**违背 Fischer 的"配置驱动、不直接依赖业务系统代码"原则**，且引入额外依赖。
+
+### 配置集成方案
+
+在 `agentkit.yaml` 的 `memory.semantic` 段扩展：
+
+```yaml
+memory:
+  semantic:
+    enabled: true
+    adapter: "ragflow"
+    base_url: "http://ragflow-ecs-internal-ip:9380"
+    api_key: "${RAGFLOW_API_KEY}"
+    dataset_ids:
+      - "industry-kb-dataset-id"
+      - "enterprise-kb-dataset-id"
+    timeout: 30
+    retrieval:
+      similarity_threshold: 0.2
+      vector_similarity_weight: 0.3
+      top_k: 10
+      rerank_id: "BAAI/bge-reranker-v2-m3"
+    ingest:
+      mode: "async"
+      poll_interval: 5
+      poll_timeout: 600
+    health_check:
+      interval: 60
+      fail_threshold: 3
+```
+
+### 与现有 LocalRAGService 的关系
+
+`LocalRAGService`（pgvector）与 RAGFlow 定位不同：
+- **LocalRAGService**：轻量、同进程、适合中小规模文本知识
+- **RAGFlow**：重量级、独立服务、擅长复杂文档（PDF/扫描件/表格）深度解析
+
+建议**并存**，通过 `MultiSourceRetriever` 聚合，按文档类型路由：纯文本走 Local，复杂文档走 RAGFlow。
+
+---
+
+## 四、风险分析与缓解措施评估
+
+### 高风险项
+
+#### 风险 1：资源占用重（16GB+ RAM）
+
+**原措施**：slim 镜像 + 外部 embedding API / 独立机器部署
+
+| 评估维度 | 结论 |
+|----------|------|
+| 可行性 | ✅ 可行但需分场景。slim 镜像（~2GB）确实省去内置 embedding 模型，但 DeepDoc 的 OCR/TSR/Layout 模型仍打包在内，RAM 峰值仍需 8GB+ |
+| 缓解效果 | ⚠️ 部分有效。slim 仅省 embedding 部分（约 4-6GB），DeepDoc 推理时仍会瞬时占用 2-4GB |
+| 次生风险 | 🔴 外部 embedding API 引入新依赖链：① 网络延迟叠加（检索路径变成 Fischer→RAGFlow→外部Embedding API，3 跳）；② 外部 API 限流/宕机时 RAGFlow 解析直接失败；③ 跨网络传输文档内容存在数据泄露面 |
+
+**更优解**：
+- **方案 A（推荐）**：RAGFlow 独立机器/K8s 节点部署，与 Fischer 集群物理隔离，仅通过 HTTP API 通信
+- **方案 B**：若必须同机，用 cgroup/Docker CPU+memory limits 硬隔离 RAGFlow 容器
+- **不建议**：外部 embedding API 方案，除非已有内部 embedding 服务
+
+#### 风险 2：架构栈重叠（Redis/MySQL/ES vs Fischer 的 Redis/PG）
+
+**原措施**：复用 Fischer 的 Redis；ES/Infinity 无法复用 PG
+
+| 评估维度 | 结论 |
+|----------|------|
+| 可行性 | ⚠️ Redis 复用技术上可行但有陷阱。RAGFlow 用 Redis 做 Celery broker + 缓存，Fischer 用 Redis 做 RedisMessageBus（Streams）+ TaskStore。两者可用不同 db number 隔离 |
+| 缓解效果 | ⚠️ 有限。省下的仅是 Redis 实例（~100MB），ES/Infinity（2-4GB）和 MySQL（1-2GB）仍需独立部署，重叠问题仅解决 10-15% |
+| 次生风险 | 🔴 Redis 复用有严重隐患：① RAGFlow 的 Celery 任务高峰期会打满 Redis 连接池，影响 Fischer 的 RedisMessageBus 消息投递；② key 命名空间若冲突可能导致数据污染；③ RAGFlow 升级时 Redis schema 变更可能波及 Fischer |
+
+**更优解**：
+- **方案 A（推荐）**：完全不共享，独立 Redis 实例。RAGFlow 自带 docker-compose 已包含 Redis，保持默认部署不动
+- **方案 B**：若强需共享，用 Redis Sentinel/Cluster 的不同 db（RAGFlow 用 db=1，Fischer 用 db=0），并配置独立的 `maxmemory-policy` 和连接池上限
+- **ES/Infinity**：无更优解，RAGFlow 强依赖其混合检索能力，PG+pgvector 无法替代
+
+#### 风险 3：异步解析延迟（大文档数分钟）
+
+**原措施**：适配器内轮询 / 异步任务模式
+
+| 评估维度 | 结论 |
+|----------|------|
+| 可行性 | ✅ 可行，Fischer 已有完整异步任务基础设施。TaskStore 支持 PENDING→RUNNING→COMPLETED 状态机，tasks 路由已有 submit/status/list/cancel API |
+| 缓解效果 | ✅ 有效。将 RAGFlow ingest 拆为"提交上传→返回 task_id→后台轮询解析状态→更新 task"四步 |
+| 次生风险 | 🟡 中等：① 轮询频率若过高会冲击 RAGFlow API（建议 5-10s 间隔，指数退避）；② task TTL 与 RAGFlow 解析时长不匹配；③ 用户在解析未完成时检索会得到空结果 |
+
+**更优解**：
+- **方案 A（推荐）**：用 RAGFlow 的 Webhook 回调替代轮询。RAGFlow v0.23.0+ 支持 Webhook 触发，解析完成后主动回调 Fischer 的 `/api/v1/tasks/{id}/callback`
+- **方案 B**：若 Webhook 不可用，用 RedisMessageBus 发布解析完成事件，适配器订阅后更新 task 状态
+- **方案 C**：分离 ingest 和 query 路径——ingest 走异步 task，query 永远同步
+
+### 中风险项
+
+#### 风险 4：Embedding 模型锁定
+
+**原措施**：规划期确定模型；不同 dataset 用不同模型
+
+| 评估维度 | 结论 |
+|----------|------|
+| 可行性 | ⚠️ 部分可行。"不同 dataset 不同模型"技术上成立，但跨 dataset 检索时向量维度不一致会导致召回失效 |
+| 缓解效果 | ⚠️ 有限。锁定后若需切换模型，必须重建整个 dataset |
+| 次生风险 | 🟡 模型碎片化：多个 dataset 用不同 embedding，SemanticMemory 的 `kb_weights` 加权策略失效 |
+
+**更优解**：
+- **方案 A（推荐）**：全组织统一 embedding 模型（建议 `BAAI/bge-large-zh-v1.5` 或 `bge-m3`），所有 dataset 强制一致
+- **方案 B**：若必须多模型，在适配器层按 dataset 分组检索，组内归一化 score 后再融合
+- **根本性建议**：将 embedding 模型选择纳入 Fischer 的 LLM Gateway 统一管理
+
+#### 风险 5：ARM64 支持缺失
+
+**原措施**：x86 Docker / 自行构建
+
+| 评估维度 | 结论 |
+|----------|------|
+| 可行性 | ✅ 可行。自行构建有官方文档支持 |
+| 缓解效果 | ✅ 有效，但构建耗时（含模型下载约 30-60 分钟） |
+| 次生风险 | 🟢 低。主要是构建产物维护成本 |
+
+**更优解**：
+- **方案 A**：开发环境用 x86 Docker Desktop；生产环境强制 x86 服务器
+- **方案 B**：若有 CI/CD，用 GitHub Actions 在 ARM64 runner 上自动构建并推送到私有 registry
+
+#### 风险 6：版本快速迭代（API breaking change）
+
+**原措施**：锁定版本；适配器层兼容
+
+| 评估维度 | 结论 |
+|----------|------|
+| 可行性 | ✅ 可行。锁定版本是标准做法 |
+| 缓解效果 | ✅ 有效但被动。锁定版本意味着无法获得 bug 修复和新特性 |
+| 次生风险 | 🟡 安全漏洞累积：长期不升级会错过安全补丁 |
+
+**更优解**：
+- **方案 A（推荐）**：适配器层做 API 版本抽象。定义 `RAGFlowAPIVersion` 枚举，适配器根据版本号选择不同的端点路径和响应解析逻辑
+- **方案 B**：跟随 RAGFlow 的 minor 版本（如固定 v0.25.x），patch 版本自动升级
+
+#### 风险 7：数据模型映射损耗
+
+**原措施**：适配器层完整字段映射，特有字段入 metadata
+
+| 评估维度 | 结论 |
+|----------|------|
+| 可行性 | ✅ 完全可行。这是适配器模式的标准职责 |
+| 缓解效果 | ✅ 有效。QueryResult 的 metadata 字段是 dict，可容纳任意额外字段 |
+| 次生风险 | 🟢 极低 |
+
+**更优解**：当前方案已足够。增强建议：定义 `RAGFlowChunkMetadata` Pydantic 模型，结构化 RAGFlow 特有字段
+
+### 低风险项
+
+#### 风险 8：网络调用开销
+
+**原措施**：timeout + 降级
+
+| 评估维度 | 结论 |
+|----------|------|
+| 可行性 | ✅ 可行。SemanticMemory 已有 try/except 降级到空结果 |
+| 缓解效果 | ✅ 有效 |
+| 次生风险 | 🟡 降级静默化：检索失败返回空列表，用户无感知 |
+
+**更优解**：在降级时记录 metric，并向前端 WebSocket 推送 `error` 事件；增加 circuit breaker
+
+#### 风险 9 & 10：认证差异 / 功能重叠
+
+**评估**：两项措施均完全可行且无次生风险，无需更优解。
+
+### 综合评估矩阵
+
+| 风险 | 原措施可行性 | 次生风险 | 缓解效果 | 更优解收益 |
+|------|:---:|:---:|:---:|------|
+| 1. 资源占用 | ⚠️ 部分 | 🔴 高 | ⚠️ 部分 | 独立部署彻底解决 |
+| 2. 架构栈重叠 | ⚠️ 部分 | 🔴 高 | ⚠️ 仅10-15% | 独立Redis实例，不共享 |
+| 3. 异步解析 | ✅ 可行 | 🟡 中 | ✅ 有效 | Webhook回调消除轮询 |
+| 4. Embedding锁定 | ⚠️ 部分 | 🟡 中 | ⚠️ 有限 | 统一模型+网关治理 |
+| 5. ARM64 | ✅ 可行 | 🟢 低 | ✅ 有效 | CI自动构建 |
+| 6. 版本迭代 | ✅ 可行 | 🟡 中 | ⚠️ 被动 | API版本抽象层 |
+| 7. 数据映射 | ✅ 可行 | 🟢 极低 | ✅ 有效 | 已足够 |
+| 8. 网络开销 | ✅ 可行 | 🟡 低 | ✅ 有效 | metric+WS通知 |
+
+### 关键结论
+
+1. **原措施中 2 项有严重次生风险需立即调整**：
+   - 风险 1 的"外部 embedding API"方案 → 改为独立机器部署
+   - 风险 2 的"复用 Redis"方案 → 改为独立 Redis 实例
+
+2. **1 项有明确更优解**：
+   - 风险 3 的"轮询"→ 改用 RAGFlow Webhook 回调
+
+3. **最关键的系统性建议**：将 RAGFlow 视为完全独立的外部服务（而非与 Fischer 共享基础设施的组件），通过 HTTP API 松耦合
+
+4. **embedding 模型治理应统一到 Fischer 的 LLM Gateway**，避免模型碎片化和 score 不可比问题
+
+---
+
+## 五、独立部署配置方案（阿里云）
+
+### 架构拓扑
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│  阿里云 VPC (10.0.0.0/16)                                    │
+│                                                              │
+│  ┌──────────────────────┐         ┌───────────────────────┐ │
+│  │ Fischer ECS (已有)    │         │ RAGFlow ECS (新增)     │ │
+│  │ 10.0.1.10            │  HTTPS  │ 10.0.2.10             │ │
+│  │ agentkit serve:8001  │◄───────►│ ragflow:9380          │ │
+│  │ Redis:6379 (内嵌)    │         │ Infinity:23321        │ │
+│  │ PG:5432 (内嵌)       │         │ MySQL:3306 Redis:6380 │ │
+│  └──────────────────────┘         └───────────────────────┘ │
+│         │                                  │                 │
+│         │          ┌──────────────────────┐│                 │
+│         └─────────►│  阿里云 NAS / ESSD   │◄┘                 │
+│                    │  (文档持久化存储)     │                  │
+│                    └──────────────────────┘                  │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### 资源需求核算（修正后）
+
+RAGFlow 各组件实际内存占用：
+
+| 组件 | 最小可运行 | 推荐生产 | 说明 |
+|------|-----------|---------|------|
+| RAGFlow Server + DeepDoc | 2GB | 4GB | Python 进程，DeepDoc 推理时峰值 +2GB |
+| Elasticsearch | 4GB（heap 2g） | 8GB（heap 4g） | JVM heap = 容器内存 50% |
+| Infinity（替代ES） | 2GB | 3GB | Rust 实现，无 JVM 开销 |
+| MySQL | 1GB | 2GB | 元数据存储，数据量小 |
+| Redis | 512MB | 1GB | Celery broker + 缓存 |
+| MinIO | 512MB | 1GB | 文档对象存储 |
+| 系统开销 | 1GB | 2GB | OS + Docker daemon |
+
+### 按业务规模分档推荐
+
+#### 档位 1：POC / 小规模（< 1000 文档）
+
+| 配置 | 规格 | 月费 |
+|------|------|------|
+| 实例 | ecs.g7.xlarge（4c16g） | ¥450 |
+| 数据盘 | ESSD PL0 100GB | ¥45 |
+| **合计** | | **~¥500/月** |
+
+#### 档位 2：中小规模（1000-10000 文档）⭐ 推荐
+
+| 配置 | 规格 | 月费 |
+|------|------|------|
+| 实例 | ecs.g7.2xlarge（8c32g） | ¥900 |
+| 数据盘 | ESSD PL1 200GB | ¥150 |
+| **合计** | | **~¥1,050/月** |
+
+资源分配：
+```
+RAGFlow Server:  4GB
+ES (heap 4g):    8GB  或  Infinity: 3GB
+MySQL:           2GB
+Redis:           1GB
+MinIO:           1GB
+系统余量:        16GB（含 DeepDoc 推理峰值）
+```
+
+#### 档位 3：中大规模（1万-10万文档）
+
+| 配置 | 规格 | 月费 |
+|------|------|------|
+| 实例 | ecs.g7.3xlarge（12c48g） | ¥1,350 |
+| 数据盘 | ESSD PL1 500GB | ¥375 |
+| **合计** | | **~¥1,725/月** |
+
+### 降配三个手段
+
+1. **用 Infinity 替代 Elasticsearch**（省 4-6GB）：Rust 实现，无 JVM 开销
+2. **slim 镜像 + 外部 Embedding**（省 4GB）：embedding 走阿里云百炼 `text-embedding-v2`
+3. **MySQL/Redis 用 RAGFlow 自带**（不共享，彻底隔离）
+
+### 综合最优方案（性价比最高）
+
+| 项目 | 选择 | 理由 |
+|------|------|------|
+| 实例 | ecs.g7.2xlarge（8c32g） | 留足 DeepDoc 推理余量 |
+| 文档引擎 | Infinity（非 ES） | 省 5GB 内存 |
+| 镜像 | v0.25.6-slim | 不含 embedding 模型 |
+| Embedding | 阿里云百炼 text-embedding-v2 | 外部 API，按量付费极低 |
+| MySQL/Redis | RAGFlow 自带 | 不共享，彻底隔离 |
+| 数据盘 | ESSD PL1 200GB | IOPS 够用 |
+| **月费** | **~¥1,050** | |
+
+资源实际占用预估：
+```
+RAGFlow Server (slim):  2GB
+DeepDoc 推理峰值:        +2GB（间歇性）
+Infinity:               3GB
+MySQL:                  2GB
+Redis:                  1GB
+MinIO:                  1GB
+系统:                   2GB
+────────────────────────────
+总计:                   ~13GB（32GB 机器余量充足）
+```
+
+### 极限降配方案（仅 POC 验证用）
+
+| 项目 | 选择 | 月费 |
+|------|------|------|
+| 实例 | ecs.g7.xlarge（4c16g） | ¥450 |
+| 文档引擎 | Infinity | |
+| 镜像 | slim + 外部 embedding | |
+| **月费** | | **~¥500** |
+
+⚠️ 4c16g 下 DeepDoc 解析大 PDF（>50页）可能触发 OOM，仅适合验证检索效果。
+
+---
+
+## 六、RAGFlow 侧 Docker Compose 配置
+
+```yaml
+# /opt/ragflow/docker/docker-compose.yml（基于官方 v0.25.6 调整）
+services:
+  ragflow-server:
+    image: registry.cn-hangzhou.aliyuncs.com/infiniflow/ragflow:v0.25.6-slim
+    container_name: ragflow-server
+    ports:
+      - "10.0.2.10:9380:9380"        # 仅绑定内网 IP，不暴露公网
+      - "10.0.2.10:80:80"
+    environment:
+      - SVR_HTTP_PORT=9380
+      - MYSQL_HOST=mysql
+      - MYSQL_PORT=3306
+      - REDIS_HOST=redis
+      - REDIS_PORT=6380               # 与 Fischer Redis 端口隔离
+      - DOC_ENGINE=infinity            # 使用 Infinity 替代 ES
+      - MINIO_HOST=minio:9000
+    volumes:
+      - /data/ragflow/ragflow-logs:/ragflow/logs
+      - /data/ragflow/ragflow-data:/ragflow/data
+    depends_on:
+      - mysql
+      - redis
+      - minio
+      - infinity
+    restart: unless-stopped
+    deploy:
+      resources:
+        limits:
+          memory: 8G
+
+  infinity:
+    image: infiniflow/infinity:v0.6.0
+    container_name: ragflow-infinity
+    volumes:
+      - /data/ragflow/infinity-data:/infinity/data
+    deploy:
+      resources:
+        limits:
+          memory: 4G
+
+  mysql:
+    image: mysql:8.0
+    environment:
+      - MYSQL_ROOT_PASSWORD=${MYSQL_ROOT_PASSWORD}
+      - MYSQL_DATABASE=ragflow
+    volumes:
+      - /data/ragflow/mysql-data:/var/lib/mysql
+    deploy:
+      resources:
+        limits:
+          memory: 2G
+
+  redis:
+    image: redis:7-alpine
+    command: redis-server --port 6380 --maxmemory 1gb --maxmemory-policy allkeys-lru
+    volumes:
+      - /data/ragflow/redis-data:/data
+    deploy:
+      resources:
+        limits:
+          memory: 2G
+
+  minio:
+    image: minio/minio:latest
+    command: server /data --console-address ":9001"
+    environment:
+      - MINIO_ROOT_USER=${MINIO_ROOT_USER}
+      - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD}
+    volumes:
+      - /data/ragflow/minio-data:/data
+    deploy:
+      resources:
+        limits:
+          memory: 1G
+```
+
+### 系统级配置（必做）
+
+```bash
+# 1. 内核参数（ES 必须，Infinity 建议也设置）
+sudo sysctl -w vm.max_map_count=262144
+echo "vm.max_map_count=262144" | sudo tee -a /etc/sysctl.conf
+
+# 2. 创建数据目录
+sudo mkdir -p /data/ragflow/{infinity-data,mysql-data,redis-data,minio-data,ragflow-data,ragflow-logs}
+sudo chown -R 1000:1000 /data/ragflow
+
+# 3. Docker 镜像加速（阿里云容器镜像服务）
+sudo mkdir -p /etc/docker
+sudo tee /etc/docker/daemon.json <<EOF
+{
+  "registry-mirrors": ["https://your-acs-mirror.mirror.aliyuncs.com"]
+}
+EOF
+sudo systemctl restart docker
+```
+
+---
+
+## 七、Fischer 侧 agentkit.yaml 配置
+
+在现有 `agentkit.yaml` 中新增 memory 段：
+
+```yaml
+# 在现有 agentkit.yaml 末尾追加
+memory:
+  semantic:
+    enabled: true
+    adapter: "ragflow"                    # 新增 adapter 类型标识
+    base_url: "http://10.0.2.10:9380"     # RAGFlow ECS 内网 IP
+    api_key: "${RAGFLOW_API_KEY}"         # 从环境变量读取
+    timeout: 30
+    knowledge_base_ids:                   # RAGFlow dataset IDs
+      - "dataset_industry_kb"
+      - "dataset_enterprise_kb"
+    # RAGFlow 检索参数
+    retrieval:
+      similarity_threshold: 0.2           # 低于此分数不返回
+      vector_similarity_weight: 0.3       # 向量权重 (0-1)
+      top_k: 10                           # 单次召回数
+      rerank_id: "BAAI/bge-reranker-v2-m3"  # 可选 rerank 模型
+    # 异步 ingest 配置
+    ingest:
+      mode: "async"                       # async | sync
+      poll_interval: 5                    # 轮询间隔(秒)
+      poll_timeout: 600                   # 最大等待(秒)
+    # 健康检查
+    health_check:
+      interval: 60                        # 检查间隔(秒)
+      fail_threshold: 3                   # 连续失败 N 次熔断
+```
+
+### 环境变量（Fischer ECS 的 .env）
+
+```bash
+# RAGFlow API Key（在 RAGFlow Web UI > 设置 > API Key 生成）
+RAGFLOW_API_KEY=ragflow-xxxxxxxxxxxxxxxxxxxxxxxx
+
+# 可选：embedding 模型走阿里云百炼
+DASHSCOPE_API_KEY=sk-xxxxxxxxxxxx
+```
+
+---
+
+## 八、阿里云网络与安全配置
+
+### VPC 安全组规则
+
+**RAGFlow ECS 安全组**（仅允许 Fischer 内网访问）：
+
+| 方向 | 协议 | 端口 | 源/目标 | 用途 |
+|------|------|------|---------|------|
+| 入 | TCP | 9380 | 10.0.1.10/32 (Fischer) | RAGFlow API |
+| 入 | TCP | 80 | 10.0.1.10/32 (Fischer) | RAGFlow Web（可选） |
+| 入 | TCP | 22 | 管理跳板机 IP | SSH 运维 |
+| 出 | TCP | 443 | 0.0.0.0/0 | 拉取镜像、调用外部 LLM |
+| 入 | TCP | * | 0.0.0.0/0 | **拒绝**（默认） |
+
+### API Key 安全
+
+RAGFlow API Key 通过阿里云 KMS 加密存储，运行时解密注入：
+
+```bash
+# 加密 API Key 到 KMS
+aliyun kms Encrypt \
+  --KeyId key-ragflow \
+  --Plaintext "$(echo -n 'ragflow-xxx' | base64)"
+
+# Fischer 启动脚本中解密
+export RAGFLOW_API_KEY=$(aliyun kms Decrypt \
+  --CiphertextBlob "encrypted-blob" \
+  --query Plaintext | base64 -d)
+```
+
+---
+
+## 九、可选阿里云托管服务替代
+
+| RAGFlow 组件 | 阿里云替代 | 优势 | 成本 |
+|--------------|-----------|------|------|
+| Elasticsearch | 阿里云 ES | 免运维、自动备份、监控 | ~¥800/月（2核4G） |
+| MySQL | RDS MySQL | 高可用、自动备份 | ~¥200/月（1核2G） |
+| Redis | Tair/Redis 实例 | 免运维、持久化 | ~¥150/月（1G） |
+| MinIO（文档存储） | OSS | 11个9 持久性、低成本 | ~¥0.12/GB/月 |
+| 服务器 | ACK（K8s） | 弹性伸缩、滚动升级 | 节点费 + 管理费 |
+
+### 成本对比
+
+| 方案 | 月费 | 适用场景 |
+|------|------|---------|
+| 全自管 ECS（8c32g + Infinity + slim） | ~¥1,050 | 推荐起步 |
+| ECS + 托管服务混合 | ~¥2,062 | 免运维需求 |
+| 经济型 POC（4c16g） | ~¥500 | 仅验证 |
+
+---
+
+## 十、实施步骤
+
+1. **POC 验证**：用 `docker compose` 起一个 RAGFlow slim + Infinity 实例，上传 1 个 PDF，调用 `/api/v1/retrieval` 验证检索效果
+2. **实现 RAGFlowAdapter**：参考 `generic_http.py` 模式，重点处理异步解析和字段映射
+3. **配置集成**：扩展 `agentkit.yaml` schema 和 SemanticMemory 初始化逻辑
+4. **单元测试**：参考现有适配器测试，mock RAGFlow API 响应
+5. **集成测试**：验证 RAGFlow + LocalRAGService 多源检索聚合
+
+### 部署 Checklist
+
+- [ ] RAGFlow ECS 创建并加入与 Fischer 相同的 VPC
+- [ ] 安全组仅放行 Fischer 内网 IP 到 9380 端口
+- [ ] `vm.max_map_count=262144` 永久生效
+- [ ] 数据盘挂载到 `/data/ragflow/` 并设置正确权限
+- [ ] RAGFlow docker-compose 启动，`docker logs -f ragflow-server` 显示就绪
+- [ ] RAGFlow Web UI 创建 dataset，记录 dataset_id
+- [ ] 生成 RAGFlow API Key，通过 KMS 加密存储
+- [ ] Fischer `.env` 配置 `RAGFLOW_API_KEY`
+- [ ] Fischer `agentkit.yaml` 配置 memory.semantic 段
+- [ ] 从 Fischer ECS 执行 `curl http://10.0.2.10:9380/api/v1/datasets` 验证连通
+- [ ] 上传测试文档，验证 ingest 异步解析完成
+- [ ] 执行检索测试，验证 QueryResult 字段映射正确
+
+---
+
+## 十一、核心判断
+
+RAGFlow 的价值在于 **DeepDoc 深度文档解析**这一项 Fischer 现有栈（pgvector + TextChunker）明显薄弱的能力。
+
+- **如果业务场景涉及大量 PDF/扫描件/复杂表格** → 引入值得
+- **若仅处理纯文本** → 现有的 `LocalRAGService` 已够用，不必承担 RAGFlow 的运维复杂度
+
+### 关键决策点
+
+1. RAGFlow 视为**完全独立的外部服务**，通过 HTTP API 松耦合，不共享基础设施
+2. embedding 模型治理统一到 Fischer 的 LLM Gateway
+3. 起步用全自管 ECS（8c32g + Infinity + slim，~¥1,050/月），验证业务价值后再评估是否迁移到托管服务
+4. ECS 规格选 g7.2xlarge 是经过 RAGFlow 全栈资源分配计算的安全值，不建议低于 32GB RAM（生产环境）
+
+---
+
+## 参考资料
+
+- RAGFlow 官网: https://ragflow.org/
+- RAGFlow GitHub: https://github.com/infiniflow/ragflow
+- RAGFlow HTTP API: https://ragflow.io/docs/dev/http_api_reference
+- RAGFlow Python API: https://ragflow.io/docs/dev/python_api_reference
+- RAGFlow 发布说明: https://ragflow.io/docs/release_notes
+- Infinity 数据库: https://github.com/infiniflow/infinity
+- RAGFlow 系统架构: https://deepwiki.com/infiniflow/ragflow/3-system-architecture
diff --git a/scripts/deploy.sh b/scripts/deploy.sh
new file mode 100755
index 0000000..ed07862
--- /dev/null
+++ b/scripts/deploy.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+# 服务器侧部署脚本：构建镜像并滚动更新服务
+# 由 Gitea Actions workflow 在 /opt/agentkit/repo 目录下调用
+# Usage: bash scripts/deploy.sh
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+COMPOSE_FILE="${COMPOSE_FILE:-docker-compose.deploy.yaml}"
+DEPLOY_DIR="${DEPLOY_DIR:-/opt/agentkit}"
+
+cd "$PROJECT_ROOT"
+
+if [ ! -f "$COMPOSE_FILE" ]; then
+    echo "❌ 未找到 $COMPOSE_FILE"
+    exit 1
+fi
+
+if [ ! -f ".env" ]; then
+    echo "❌ 未找到 .env 文件，请先通过 Gitea Secrets 生成"
+    exit 1
+fi
+
+echo "==> 部署目录: $PROJECT_ROOT"
+echo "==> Compose 文件: $COMPOSE_FILE"
+echo "==> 开始构建镜像..."
+docker compose -f "$COMPOSE_FILE" build --pull
+
+echo "==> 滚动重启服务（保留数据卷）..."
+docker compose -f "$COMPOSE_FILE" up -d --remove-orphans
+
+echo "==> 当前服务状态:"
+docker compose -f "$COMPOSE_FILE" ps
+
+echo "==> 部署完成 ✅"
diff --git a/src/agentkit/cli/benchmark.py b/src/agentkit/cli/benchmark.py
index 10ba2cb..0a50bc6 100644
--- a/src/agentkit/cli/benchmark.py
+++ b/src/agentkit/cli/benchmark.py
@@ -22,6 +22,7 @@ Tests core AgentKit components:
 - event_model: SubmissionQueue / EventQueue lifecycle [Mock]
 - spec_management: SpecManager CRUD operations [Mock]
 - verification: VerificationLoop execute/retry behavior [Mock]
+- board_meeting: BoardRouter @board prefix routing & validation [Mock]
 - llm_reasoning: Real LLM intent/tool/multi-step/code/error [LLM]
 - gui_integration: agentkit gui end-to-end (API/WS/frontend) [GUI]
 
@@ -85,6 +86,7 @@ class BenchmarkDimension(str, Enum):
     EVENT_MODEL = "event_model"
     SPEC_MANAGEMENT = "spec_management"
     VERIFICATION = "verification"
+    BOARD_MEETING = "board_meeting"
     LLM_REASONING = "llm_reasoning"
     GUI_INTEGRATION = "gui_integration"
     ALL = "all"
@@ -114,6 +116,7 @@ _MOCK_DIMENSIONS: list[BenchmarkDimension] = [
     BenchmarkDimension.EVENT_MODEL,
     BenchmarkDimension.SPEC_MANAGEMENT,
     BenchmarkDimension.VERIFICATION,
+    BenchmarkDimension.BOARD_MEETING,
 ]
 
 
@@ -349,6 +352,62 @@ TASK_SET: list[BenchmarkTask] = [
         "passed", ["timeout"], "超时检测"),
     BenchmarkTask("vf-005", "verification", "multi", "medium", "multi_command",
         "passed", ["multi"], "多命令验证"),
+    # === Board Meeting (18 tasks) ===
+    BenchmarkTask("bd-001", "board_meeting", "default_template", "easy",
+        "@board 讨论是否应该进入东南亚市场",
+        "board", ["board", "default"], "@board 前缀应路由到 board 模式"),
+    BenchmarkTask("bd-002", "board_meeting", "default_template", "easy",
+        "@board AI产品定价策略应该怎么做",
+        "board", ["board", "default"], "@board 前缀应路由到 board 模式"),
+    BenchmarkTask("bd-003", "board_meeting", "default_template", "medium",
+        "@board:private_board 讨论创业公司融资节奏",
+        "board", ["board", "template"], "显式 private_board 模板应路由到 board 模式"),
+    BenchmarkTask("bd-004", "board_meeting", "explicit_experts", "medium",
+        "@board:elon_musk,jeff_bezos 讨论火星殖民的商业化路径",
+        "board", ["board", "explicit"], "指定专家应路由到 board 模式"),
+    BenchmarkTask("bd-005", "board_meeting", "explicit_experts", "medium",
+        "@board:charlie_munger,warren_buffett 价值投资在AI时代的适用性",
+        "board", ["board", "explicit"], "指定多位专家应路由到 board 模式"),
+    BenchmarkTask("bd-006", "board_meeting", "explicit_experts", "medium",
+        "@board:elon_musk,jeff_bezos,allenzhang 产品设计哲学",
+        "board", ["board", "explicit", "multi"], "三位专家应路由到 board 模式"),
+    BenchmarkTask("bd-007", "board_meeting", "topic_extraction", "easy",
+        "@board 讨论是否应该进入东南亚市场",
+        "讨论是否应该进入东南亚市场", ["board", "topic"], "应正确提取讨论主题"),
+    BenchmarkTask("bd-008", "board_meeting", "topic_extraction", "easy",
+        "@board:elon_musk,jeff_bezos 火星商业化方案",
+        "火星商业化方案", ["board", "topic"], "应从显式专家格式提取主题"),
+    BenchmarkTask("bd-009", "board_meeting", "topic_extraction", "easy",
+        "@board",
+        "", ["board", "topic", "empty"], "空主题应返回空字符串"),
+    BenchmarkTask("bd-010", "board_meeting", "no_match", "easy",
+        "讨论一下市场策略",
+        "not_board", ["board", "edge"], "无 @board 前缀不应路由到 board 模式"),
+    BenchmarkTask("bd-011", "board_meeting", "no_match", "easy",
+        "@team:analyst,writer 协作完成任务",
+        "not_board", ["board", "edge"], "@team 前缀不应路由到 board 模式"),
+    BenchmarkTask("bd-012", "board_meeting", "no_match", "easy",
+        "@skill:react_agent 查看ip",
+        "not_board", ["board", "edge"], "@skill 前缀不应路由到 board 模式"),
+    BenchmarkTask("bd-013", "board_meeting", "name_validation", "medium",
+        "@board:elon_musk,jeff_bezos 主题",
+        "2_valid", ["board", "validation"], "两个有效专家名应被接受"),
+    BenchmarkTask("bd-014", "board_meeting", "name_validation", "medium",
+        "@board:@#$ 主题",
+        "default_fallback", ["board", "validation", "invalid"],
+        "全部无效专家名时应回退到默认模板"),
+    BenchmarkTask("bd-015", "board_meeting", "name_validation", "medium",
+        "@board:a,b,c,d,e,f,g,h,i,j,k 主题",
+        "10_capped", ["board", "validation", "cap"], "超过 MAX_EXPERTS=10 应被截断"),
+    BenchmarkTask("bd-016", "board_meeting", "stop_command", "easy",
+        "/stop",
+        "is_stop", ["board", "stop"], "/stop 应被识别为停止命令"),
+    BenchmarkTask("bd-017", "board_meeting", "stop_command", "easy",
+        "停止讨论",
+        "is_stop", ["board", "stop"], "中文停止讨论应被识别"),
+    BenchmarkTask("bd-018", "board_meeting", "stop_command", "easy",
+        "继续讨论",
+        "not_stop", ["board", "stop"], "非停止命令不应被误判"),
 ]
 # fmt: on
 
@@ -359,6 +418,7 @@ _FAST_CORE_IDS: set[str] = {
     "eff-001", "eff-004", "ts-001", "ts-003", "ts-008", "ts-010",
     "ev-001", "ev-004", "ev-005", "sm-001", "sm-002", "sm-006", "sm-004",
     "vf-001", "vf-002", "vf-003", "llm-001", "llm-003", "gui-001", "gui-002", "gui-004",
+    "bd-001", "bd-004", "bd-007", "bd-010", "bd-013", "bd-016",
 }
 # fmt: on
 
@@ -1751,6 +1811,134 @@ async def _exec_verification(task: BenchmarkTask, ctx: BenchmarkContext) -> Exec
     )
 
 
+async def _exec_board_meeting(task: BenchmarkTask, ctx: BenchmarkContext) -> ExecutionResult:
+    """Execute board meeting benchmark task.
+
+    Tests BoardRouter prefix matching, topic extraction, expert name
+    validation, and stop command detection — all without LLM calls.
+
+    Categories:
+    - default_template: @board or @board:private_board → board mode
+    - explicit_experts: @board:expert1,expert2 → board mode
+    - topic_extraction: verify topic string is correctly extracted
+    - no_match: non-@board inputs should NOT route to board mode
+    - name_validation: expert name format and MAX_EXPERTS cap
+    - stop_command: /stop and 停止讨论 detection
+    """
+    from agentkit.experts.board_router import (
+        MAX_EXPERTS,
+        BoardRouter,
+    )
+    from agentkit.experts.registry import ExpertTemplateRegistry
+
+    start = time.perf_counter()
+
+    # Build a BoardRouter with an empty registry (tests pure routing logic)
+    registry = ExpertTemplateRegistry()
+    router = BoardRouter(template_registry=registry)
+
+    # --- Stop command detection (bd-016, bd-017, bd-018) ---
+    if task.category == "stop_command":
+        from agentkit.experts.board_orchestrator import BoardOrchestrator
+
+        is_stop = task.input.strip() in BoardOrchestrator.STOP_COMMANDS
+        actual = "is_stop" if is_stop else "not_stop"
+        passed = actual == task.expected
+        elapsed = (time.perf_counter() - start) * 1000
+        return ExecutionResult(
+            actual=actual,
+            passed=passed,
+            duration_ms=round(elapsed, 4),
+            detail=f"input={task.input!r} stop_commands={BoardOrchestrator.STOP_COMMANDS}",
+        )
+
+    # --- All other categories: use BoardRouter.resolve() ---
+    result = router.resolve(task.input)
+    elapsed = (time.perf_counter() - start) * 1000
+
+    if task.category == "default_template":
+        # Expect board_mode=True and use_default_template=True
+        actual = "board" if (result.matched and result.board_mode) else "not_board"
+        passed = actual == task.expected
+        return ExecutionResult(
+            actual=actual,
+            passed=passed,
+            duration_ms=round(elapsed, 4),
+            detail=(
+                f"matched={result.matched} board_mode={result.board_mode} "
+                f"use_default={result.use_default_template} topic={result.topic!r}"
+            ),
+        )
+
+    if task.category == "explicit_experts":
+        actual = "board" if (result.matched and result.board_mode) else "not_board"
+        passed = actual == task.expected
+        return ExecutionResult(
+            actual=actual,
+            passed=passed,
+            duration_ms=round(elapsed, 4),
+            detail=(
+                f"matched={result.matched} experts={result.specified_experts} "
+                f"use_default={result.use_default_template}"
+            ),
+        )
+
+    if task.category == "topic_extraction":
+        # Compare extracted topic (normalized: strip + collapse whitespace)
+        actual = " ".join(result.topic.split())
+        passed = actual == task.expected
+        return ExecutionResult(
+            actual=actual,
+            passed=passed,
+            duration_ms=round(elapsed, 4),
+            detail=f"input={task.input!r} topic={result.topic!r} matched={result.matched}",
+        )
+
+    if task.category == "no_match":
+        # Expect board_mode=False
+        actual = "not_board" if not result.board_mode else "board"
+        passed = actual == task.expected
+        return ExecutionResult(
+            actual=actual,
+            passed=passed,
+            duration_ms=round(elapsed, 4),
+            detail=f"input={task.input!r} matched={result.matched} board_mode={result.board_mode}",
+        )
+
+    if task.category == "name_validation":
+        # Count valid expert names (after validation)
+        valid_count = len(result.specified_experts)
+        if task.expected == "2_valid":
+            actual = f"{valid_count}_valid"
+            passed = valid_count == 2
+        elif task.expected == "default_fallback":
+            # All names invalid → should fall back to default template
+            actual = "default_fallback" if result.use_default_template else "no_fallback"
+            passed = result.use_default_template and valid_count > 0
+        elif task.expected == "10_capped":
+            actual = f"{valid_count}_capped"
+            passed = valid_count == MAX_EXPERTS
+        else:
+            actual = f"{valid_count}_valid"
+            passed = False
+        return ExecutionResult(
+            actual=actual,
+            passed=passed,
+            duration_ms=round(elapsed, 4),
+            detail=(
+                f"input={task.input!r} experts={result.specified_experts} "
+                f"max={MAX_EXPERTS}"
+            ),
+        )
+
+    return ExecutionResult(
+        actual="unknown_category",
+        passed=False,
+        duration_ms=round(elapsed, 4),
+        detail=f"Unknown board_meeting category: {task.category}",
+    )
+
+
 _EXECUTORS: dict[
     str,
     Callable[[BenchmarkTask, BenchmarkContext], Awaitable[ExecutionResult]],
@@ -1762,6 +1950,7 @@ _EXECUTORS: dict[
     "event_model": _exec_event_model,
     "spec_management": _exec_spec_management,
     "verification": _exec_verification,
+    "board_meeting": _exec_board_meeting,
 }
 
 
@@ -1963,8 +2152,9 @@ def _generate_markdown_report(
         "event_model": "5. 事件模型 (Event Model) [Mock]",
         "spec_management": "6. 规格管理 (Spec Management) [Mock]",
         "verification": "7. 验证循环 (Verification Loop) [Mock]",
-        "llm_reasoning": "8. LLM 推理能力 (LLM Reasoning) [LLM]",
-        "gui_integration": "9. GUI 集成测试 (GUI Integration) [GUI]",
+        "board_meeting": "8. 私董会路由 (Board Meeting Routing) [Mock]",
+        "llm_reasoning": "9. LLM 推理能力 (LLM Reasoning) [LLM]",
+        "gui_integration": "10. GUI 集成测试 (GUI Integration) [GUI]",
     }
 
     lines.append("## 维度结果")
diff --git a/src/agentkit/experts/board.py b/src/agentkit/experts/board.py
new file mode 100644
index 0000000..d82e28f
--- /dev/null
+++ b/src/agentkit/experts/board.py
@@ -0,0 +1,377 @@
+"""BoardTeam - 私董会讨论模式容器
+
+管理私董会的专家生命周期、讨论状态和事件广播。
+与 ExpertTeam（hub-and-spoke 任务分解）并列，专注于多轮群聊式讨论。
+
+核心差异（vs ExpertTeam）：
+- 讨论模式：多轮全员发言 + 主持人小结，非任务分解
+- 专家通信：基于共享讨论历史，非独立子任务
+- 终止机制：最大轮次 + 用户干预，非任务完成
+- 主持人角色：首位专家，负责开场/小结/最终总结
+"""
+
+from __future__ import annotations
+
+import enum
+import logging
+import time
+import uuid
+from typing import Any
+
+from .config import ExpertConfig
+from .expert import Expert
+from .registry import ExpertTemplateRegistry
+from ..core.handoff_transport import InProcessHandoffTransport
+from ..core.shared_workspace import SharedWorkspace
+from ..core.agent_pool import AgentPool
+
+logger = logging.getLogger(__name__)
+
+
+class BoardStatus(str, enum.Enum):
+    """BoardTeam lifecycle states.
+
+    Flow: FORMING → DISCUSSING → CONCLUDING → COMPLETED → DISSOLVED
+    """
+
+    FORMING = "forming"
+    DISCUSSING = "discussing"
+    CONCLUDING = "concluding"
+    COMPLETED = "completed"
+    DISSOLVED = "dissolved"
+
+
+class BoardTeam:
+    """Container managing a board of Experts in discussion mode.
+
+    In board meeting mode:
+    - Moderator (lead expert) opens the discussion, summarizes each round,
+      and gives final decision advice
+    - Member experts give speeches each round based on shared discussion history
+    - All experts see the full discussion history (shared context)
+    - Discussion terminates after max_rounds or user intervention
+    """
+
+    def __init__(
+        self,
+        team_id: str | None = None,
+        workspace: SharedWorkspace | None = None,
+        pool: AgentPool | None = None,
+        template_registry: ExpertTemplateRegistry | None = None,
+        max_rounds: int = 5,
+    ):
+        self.team_id = team_id or str(uuid.uuid4())
+        self._workspace = workspace or SharedWorkspace()
+        self._pool = pool
+        self._template_registry = template_registry or ExpertTemplateRegistry()
+        self._handoff_transport = InProcessHandoffTransport()
+        self._experts: dict[str, Expert] = {}
+        self._moderator_name: str | None = None
+        self._status = BoardStatus.FORMING
+        self._team_channel = f"board:{self.team_id}"
+
+        # Discussion state
+        self._topic: str = ""
+        self._history: list[dict[str, Any]] = []
+        self._current_round: int = 0
+        self._max_rounds: int = max_rounds
+        self._user_interventions: list[str] = []  # Pending user messages
+
+    @property
+    def status(self) -> BoardStatus:
+        return self._status
+
+    @property
+    def moderator(self) -> Expert | None:
+        if self._moderator_name:
+            return self._experts.get(self._moderator_name)
+        return None
+
+    @property
+    def experts(self) -> list[Expert]:
+        return list(self._experts.values())
+
+    @property
+    def active_experts(self) -> list[Expert]:
+        return [e for e in self._experts.values() if e.is_active]
+
+    @property
+    def member_experts(self) -> list[Expert]:
+        """Non-moderator experts."""
+        return [e for e in self._experts.values() if e.config.name != self._moderator_name]
+
+    @property
+    def workspace(self) -> SharedWorkspace:
+        return self._workspace
+
+    @property
+    def handoff_transport(self):
+        return self._handoff_transport
+
+    @property
+    def team_channel(self) -> str:
+        return self._team_channel
+
+    @property
+    def topic(self) -> str:
+        return self._topic
+
+    @property
+    def current_round(self) -> int:
+        return self._current_round
+
+    @property
+    def max_rounds(self) -> int:
+        return self._max_rounds
+
+    @property
+    def history(self) -> list[dict[str, Any]]:
+        return self._history.copy()
+
+    def get_expert(self, name: str) -> Expert | None:
+        return self._experts.get(name)
+
+    def set_status(self, status: BoardStatus) -> None:
+        self._status = status
+
+    async def create_board(
+        self,
+        topic: str,
+        expert_configs: list[ExpertConfig],
+    ) -> None:
+        """Create a board with a moderator and member experts.
+
+        Args:
+            topic: Discussion topic
+            expert_configs: List of ExpertConfig, first is moderator
+        """
+        if not self._pool:
+            raise RuntimeError("AgentPool not configured")
+
+        if not expert_configs:
+            raise ValueError("At least one expert config is required")
+
+        self._topic = topic
+
+        # Build board context for all experts
+        board_context = self._build_board_context(expert_configs)
+
+        # Create experts
+        for i, config in enumerate(expert_configs):
+            expert = await Expert.create(
+                config=config,
+                pool=self._pool,
+                handoff_transport=self._handoff_transport,
+                workspace=self._workspace,
+                team_context=board_context,
+            )
+            expert.team_id = self.team_id
+            self._experts[config.name] = expert
+
+            # First expert is moderator
+            if i == 0:
+                self._moderator_name = config.name
+
+        self._status = BoardStatus.DISCUSSING
+
+    def _build_board_context(self, expert_configs: list[ExpertConfig]) -> str:
+        """Build board context string for injection into Expert system prompts.
+
+        Emphasizes discussion mode, role differentiation, and discussion rules.
+        """
+        lines = ["You are part of a Board Meeting (private board discussion mode)."]
+
+        for i, config in enumerate(expert_configs):
+            role = "Moderator" if i == 0 else "Member"
+            lines.append(
+                f"{role}: {config.name} ({config.persona[:100]}...)"
+                f" — Thinking: {config.thinking_style}"
+                f" — Speaking: {config.speaking_style}"
+                f" — Framework: {config.decision_framework}"
+            )
+
+        lines.append("")
+        lines.append("Board meeting rules:")
+        lines.append("- Each round, all members give speeches based on their persona and framework")
+        lines.append("- Moderator opens the discussion, summarizes each round, and gives final advice")
+        lines.append("- All experts see the full discussion history (shared context)")
+        lines.append("- Stay in character: think and speak as your persona would")
+        lines.append("- Be concise but insightful: 2-4 paragraphs per speech")
+        lines.append("- Build on or respectfully challenge previous speakers' points")
+        return "\n".join(lines)
+
+    async def add_to_history(
+        self,
+        round: int,
+        expert_name: str,
+        content: str,
+        role: str = "expert",
+    ) -> None:
+        """Add a speech to the discussion history.
+
+        Args:
+            round: Round number (1-indexed)
+            expert_name: Name of the expert (or "user" for interventions)
+            content: Speech content
+            role: "expert" | "moderator" | "user"
+        """
+        entry = {
+            "round": round,
+            "expert_name": expert_name,
+            "content": content,
+            "timestamp": time.time(),
+            "role": role,
+        }
+        self._history.append(entry)
+
+    def get_history_text(self, up_to_round: int | None = None) -> str:
+        """Get formatted discussion history for LLM prompt injection.
+
+        Args:
+            up_to_round: If provided, only include history up to this round (inclusive)
+
+        Returns:
+            Formatted history text
+        """
+        if not self._history:
+            return ""
+
+        lines: list[str] = []
+        for entry in self._history:
+            if up_to_round is not None and entry["round"] > up_to_round:
+                continue
+
+            role_label = {
+                "moderator": "主持人小结",
+                "user": "用户干预",
+                "expert": "专家发言",
+            }.get(entry["role"], entry["role"])
+
+            lines.append(
+                f"[第{entry['round']}轮 | {entry['expert_name']} | {role_label}]\n"
+                f"{entry['content']}"
+            )
+
+        return "\n\n---\n\n".join(lines)
+
+    async def compress_history(self, moderator: Expert, llm_gateway: Any) -> None:
+        """Compress discussion history when it exceeds token threshold.
+
+        The moderator summarizes each round's key points, replacing
+        verbose speeches with concise summaries.
+
+        Args:
+            moderator: Moderator expert
+            llm_gateway: LLM gateway for compression
+        """
+        if not self._history or len(self._history) < 10:
+            return
+
+        # Group by round
+        rounds: dict[int, list[dict]] = {}
+        for entry in self._history:
+            rounds.setdefault(entry["round"], []).append(entry)
+
+        # Build compression prompt
+        history_text = self.get_history_text()
+        prompt = (
+            "你是私董会主持人。请压缩以下讨论历史，保留每轮的关键观点和核心论点，"
+            "去除冗余内容。每轮压缩为 2-3 句话。\n\n"
+            f"讨论历史:\n{history_text}\n\n"
+            "请输出压缩后的历史，保持原有的轮次结构和专家名。格式:\n"
+            "[第X轮 | 专家名] 压缩后的观点"
+        )
+
+        try:
+            response = await llm_gateway.chat(
+                messages=[{"role": "user", "content": prompt}],
+                model="default",
+            )
+            compressed = response.content.strip()
+
+            # Parse compressed history back to entries
+            # This is a best-effort compression; if parsing fails, keep original
+            new_history: list[dict[str, Any]] = []
+            current_round = 0
+            for line in compressed.split("\n"):
+                line = line.strip()
+                if not line:
+                    continue
+                if line.startswith("[第") and "轮" in line:
+                    # Parse round number
+                    try:
+                        round_str = line.split("第")[1].split("轮")[0]
+                        current_round = int(round_str)
+                        name_part = line.split("|")[1].strip() if "|" in line else "unknown"
+                        content_part = line.split("]", 1)[1].strip() if "]" in line else line
+                        new_history.append({
+                            "round": current_round,
+                            "expert_name": name_part,
+                            "content": content_part,
+                            "timestamp": time.time(),
+                            "role": "expert",
+                        })
+                    except (ValueError, IndexError):
+                        continue
+
+            if new_history:
+                self._history = new_history
+                logger.info(f"History compressed: {len(rounds)} rounds, {len(new_history)} entries")
+        except Exception as e:
+            logger.warning(f"History compression failed: {e}, keeping original history")
+
+    async def add_user_intervention(self, content: str) -> None:
+        """Add a user intervention message to the discussion.
+
+        The message will be visible to all experts in the next round.
+
+        Args:
+            content: User's intervention message
+        """
+        self._user_interventions.append(content)
+        await self.add_to_history(
+            round=self._current_round,
+            expert_name="user",
+            content=content,
+            role="user",
+        )
+
+        # Broadcast user intervention event
+        await self._handoff_transport.send(
+            self._team_channel,
+            {
+                "type": "user_intervention",
+                "content": content,
+                "round": self._current_round,
+            },
+        )
+
+    def consume_user_interventions(self) -> list[str]:
+        """Get and clear pending user interventions.
+
+        Returns:
+            List of user intervention messages
+        """
+        interventions = self._user_interventions.copy()
+        self._user_interventions.clear()
+        return interventions
+
+    def increment_round(self) -> int:
+        """Increment the current round counter.
+
+        Returns:
+            The new round number
+        """
+        self._current_round += 1
+        return self._current_round
+
+    async def dissolve(self) -> None:
+        """Dissolve the board. Experts are recycled, outputs preserved."""
+        for expert in self._experts.values():
+            if expert.is_active and self._pool:
+                await expert.destroy(self._pool)
+
+        self._experts.clear()
+        self._moderator_name = None
+        self._status = BoardStatus.DISSOLVED
+        self._handoff_transport.close()
diff --git a/src/agentkit/experts/board_orchestrator.py b/src/agentkit/experts/board_orchestrator.py
new file mode 100644
index 0000000..a6b2276
--- /dev/null
+++ b/src/agentkit/experts/board_orchestrator.py
@@ -0,0 +1,523 @@
+"""BoardOrchestrator - 私董会讨论引擎
+
+驱动 BoardTeam 执行多轮群聊式讨论：
+
+1. 主持人开场介绍议题和讨论规则
+2. 循环 max_rounds 轮：
+   - 所有非主持人专家并行生成发言（基于共享讨论历史 + 角色 prompt）
+   - 主持人小结本轮要点
+   - 检查用户干预和停止命令
+3. 主持人最终总结（决策建议、共识点、分歧点）
+
+终止条件：
+- 正常终止：达到最大轮次
+- 用户终止：用户发送 /stop
+- 异常终止：LLM 不可用或所有专家发言失败
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from typing import Any
+
+from .expert import Expert
+from .board import BoardTeam, BoardStatus
+
+logger = logging.getLogger(__name__)
+
+
+class BoardOrchestrator:
+    """Board meeting orchestration engine.
+
+    The moderator (lead expert) facilitates the discussion:
+    - Opens with topic introduction
+    - Summarizes each round
+    - Gives final decision advice
+
+    Member experts give speeches each round based on shared history.
+    """
+
+    STOP_COMMANDS = frozenset({"/stop", "停止讨论", "stop", "结束讨论"})
+
+    def __init__(self, team: BoardTeam) -> None:
+        self._team = team
+
+    async def execute(self, topic: str) -> dict[str, Any]:
+        """Execute a board meeting discussion.
+
+        Flow:
+        1. Broadcast board_started event
+        2. Moderator opens the discussion
+        3. Loop max_rounds times:
+           - Parallel generate member speeches
+           - Moderator summarizes the round
+           - Check for user intervention / stop
+        4. Moderator gives final conclusion
+        5. Broadcast board_concluded event
+
+        Returns:
+            Dict with status, summary, decision_advice, total_rounds,
+            consensus_points, dissent_points
+        """
+        moderator = self._team.moderator
+        if not moderator or not moderator.is_active:
+            active = self._team.active_experts
+            if not active:
+                return {
+                    "status": "failed",
+                    "summary": "",
+                    "decision_advice": "",
+                    "total_rounds": 0,
+                    "consensus_points": [],
+                    "dissent_points": [],
+                    "error": "No active expert available",
+                }
+            # Promote first active expert to moderator
+            self._team._moderator_name = active[0].config.name
+            moderator = active[0]
+            logger.warning(
+                f"Moderator not available, falling back to '{moderator.config.name}'"
+            )
+
+        self._team.set_status(BoardStatus.DISCUSSING)
+
+        # 1. Broadcast board_started event
+        await self._broadcast_event(
+            "board_started",
+            {
+                "team_id": self._team.team_id,
+                "topic": topic,
+                "experts": [
+                    {
+                        "name": e.config.name,
+                        "avatar": e.config.avatar,
+                        "color": e.config.color,
+                        "is_moderator": e.config.name == self._team._moderator_name,
+                        "persona": e.config.persona[:100],
+                    }
+                    for e in self._team.active_experts
+                ],
+                "max_rounds": self._team.max_rounds,
+            },
+        )
+
+        try:
+            # 2. Moderator opens the discussion
+            opening = await self._generate_moderator_opening(moderator, topic)
+            if opening:
+                await self._team.add_to_history(0, moderator.config.name, opening, "moderator")
+                await self._broadcast_event(
+                    "expert_speech",
+                    {
+                        "expert_name": moderator.config.name,
+                        "expert_avatar": moderator.config.avatar,
+                        "expert_color": moderator.config.color,
+                        "content": opening,
+                        "round": 0,
+                        "role": "moderator",
+                    },
+                )
+
+            # 3. Discussion rounds
+            for round_num in range(1, self._team.max_rounds + 1):
+                self._team.increment_round()
+
+                # Check for stop command before starting the round
+                interventions = self._team.consume_user_interventions()
+                if self._has_stop_command(interventions):
+                    logger.info(f"Discussion stopped by user at round {round_num}")
+                    break
+
+                # Generate member speeches in parallel
+                members = self._team.member_experts
+                if members:
+                    speech_results = await asyncio.gather(
+                        *[self._generate_expert_speech(e, round_num) for e in members],
+                        return_exceptions=True,
+                    )
+
+                    # Broadcast speeches in order (not parallel broadcast)
+                    for expert, result in zip(members, speech_results):
+                        if isinstance(result, Exception):
+                            logger.warning(
+                                f"Expert '{expert.config.name}' speech failed: {result}"
+                            )
+                            continue
+
+                        await self._team.add_to_history(
+                            round_num, expert.config.name, result, "expert"
+                        )
+                        await self._broadcast_event(
+                            "expert_speech",
+                            {
+                                "expert_name": expert.config.name,
+                                "expert_avatar": expert.config.avatar,
+                                "expert_color": expert.config.color,
+                                "content": result,
+                                "round": round_num,
+                                "role": "expert",
+                            },
+                        )
+
+                # Moderator summarizes the round
+                summary = await self._generate_moderator_summary(moderator, round_num)
+                if summary:
+                    await self._team.add_to_history(
+                        round_num, moderator.config.name, summary, "moderator"
+                    )
+                    await self._broadcast_event(
+                        "round_summary",
+                        {
+                            "moderator_name": moderator.config.name,
+                            "content": summary,
+                            "round": round_num,
+                            "continue": round_num < self._team.max_rounds,
+                        },
+                    )
+
+                # Check history length and compress if needed
+                gateway = self._get_llm_gateway(moderator)
+                if gateway and len(self._team.history) > 20:
+                    await self._team.compress_history(moderator, gateway)
+
+            # 4. Final conclusion
+            self._team.set_status(BoardStatus.CONCLUDING)
+            conclusion = await self._generate_final_conclusion(moderator, topic)
+
+            self._team.set_status(BoardStatus.COMPLETED)
+
+            # 5. Broadcast board_concluded event
+            await self._broadcast_event(
+                "board_concluded",
+                {
+                    "summary": conclusion.get("summary", ""),
+                    "decision_advice": conclusion.get("decision_advice", ""),
+                    "total_rounds": self._team.current_round,
+                    "consensus_points": conclusion.get("consensus_points", []),
+                    "dissent_points": conclusion.get("dissent_points", []),
+                },
+            )
+
+            return {
+                "status": "completed",
+                "summary": conclusion.get("summary", ""),
+                "decision_advice": conclusion.get("decision_advice", ""),
+                "total_rounds": self._team.current_round,
+                "consensus_points": conclusion.get("consensus_points", []),
+                "dissent_points": conclusion.get("dissent_points", []),
+            }
+
+        except Exception as e:
+            logger.error(f"Board meeting execution failed: {e}")
+            self._team.set_status(BoardStatus.DISSOLVED)
+
+            # Try to give a fallback conclusion
+            fallback = await self._generate_fallback_conclusion(moderator, topic)
+
+            await self._broadcast_event(
+                "board_concluded",
+                {
+                    "summary": fallback.get("summary", ""),
+                    "decision_advice": fallback.get("decision_advice", ""),
+                    "total_rounds": self._team.current_round,
+                    "consensus_points": [],
+                    "dissent_points": [],
+                    "error": str(e),
+                },
+            )
+
+            return {
+                "status": "failed",
+                "summary": fallback.get("summary", ""),
+                "decision_advice": fallback.get("decision_advice", ""),
+                "total_rounds": self._team.current_round,
+                "consensus_points": [],
+                "dissent_points": [],
+                "error": str(e),
+            }
+
+    async def _generate_moderator_opening(self, moderator: Expert, topic: str) -> str:
+        """Generate moderator's opening speech.
+
+        The moderator introduces the topic and sets the stage for discussion.
+        """
+        gateway = self._get_llm_gateway(moderator)
+        if not gateway:
+            return f"欢迎来到私董会。今天的讨论主题是：{topic}。请各位专家发表看法。"
+
+        prompt = (
+            f"你是私董会主持人 {moderator.config.name}。\n"
+            f"你的角色：{moderator.config.persona}\n"
+            f"你的表达风格：{moderator.config.speaking_style}\n\n"
+            f"讨论主题：{topic}\n\n"
+            "请作为主持人开场，介绍议题并邀请各位专家发表看法。"
+            "开场应该简洁有力，2-3 段话，点明讨论的核心问题。"
+        )
+
+        try:
+            response = await gateway.chat(
+                messages=[{"role": "user", "content": prompt}],
+                model="default",
+            )
+            return response.content.strip()
+        except Exception as e:
+            logger.warning(f"Moderator opening generation failed: {e}")
+            return f"欢迎来到私董会。今天的讨论主题是：{topic}。请各位专家发表看法。"
+
+    async def _generate_expert_speech(self, expert: Expert, round: int) -> str:
+        """Generate an expert's speech for the current round.
+
+        The speech is based on:
+        - Expert's persona, thinking_style, speaking_style, decision_framework
+        - Full discussion history
+        - Current round / max rounds
+        """
+        gateway = self._get_llm_gateway(expert)
+        if not gateway:
+            return f"[{expert.config.name} 因 LLM 不可用无法发言]"
+
+        history_text = self._team.get_history_text()
+
+        prompt = (
+            f"你是 {expert.config.name}，正在参加私董会讨论。\n\n"
+            f"你的角色：{expert.config.persona}\n"
+            f"你的思维风格：{expert.config.thinking_style}\n"
+            f"你的表达风格：{expert.config.speaking_style}\n"
+            f"你的决策框架：{expert.config.decision_framework}\n\n"
+            f"讨论主题：{self._team.topic}\n"
+            f"当前轮次：第 {round} 轮 / 共 {self._team.max_rounds} 轮\n\n"
+        )
+
+        if history_text:
+            prompt += f"之前的讨论历史：\n{history_text}\n\n"
+
+        prompt += (
+            "请基于你的角色和决策框架，就当前讨论主题发表你的看法。"
+            "要求：\n"
+            "- 保持角色一致性，用你的思维方式和表达风格发言\n"
+            "- 2-4 段话，简洁但有洞察力\n"
+            "- 可以引用或反驳之前发言者的观点\n"
+            "- 给出明确的立场或建议\n"
+        )
+
+        response = await gateway.chat(
+            messages=[{"role": "user", "content": prompt}],
+            model="default",
+        )
+        return response.content.strip()
+
+    async def _generate_moderator_summary(self, moderator: Expert, round: int) -> str:
+        """Generate moderator's round summary.
+
+        The moderator summarizes the key points of the current round.
+        """
+        gateway = self._get_llm_gateway(moderator)
+        if not gateway:
+            return f"[第 {round} 轮小结因 LLM 不可用无法生成]"
+
+        # Get only current round's speeches
+        round_history = [
+            h for h in self._team.history if h["round"] == round
+        ]
+        if not round_history:
+            return ""
+
+        round_text = "\n\n".join(
+            f"[{h['expert_name']}]: {h['content']}" for h in round_history
+        )
+
+        prompt = (
+            f"你是私董会主持人 {moderator.config.name}。\n"
+            f"你的角色：{moderator.config.persona}\n"
+            f"你的表达风格：{moderator.config.speaking_style}\n\n"
+            f"讨论主题：{self._team.topic}\n"
+            f"当前轮次：第 {round} 轮 / 共 {self._team.max_rounds} 轮\n\n"
+            f"本轮发言：\n{round_text}\n\n"
+            "请作为主持人小结本轮讨论：\n"
+            "- 归纳各方核心观点（2-3 句话）\n"
+            "- 指出共识点和分歧点\n"
+            "- 提示下一轮可以深入的方向\n"
+            "- 保持简洁，3-5 句话\n"
+        )
+
+        try:
+            response = await gateway.chat(
+                messages=[{"role": "user", "content": prompt}],
+                model="default",
+            )
+            return response.content.strip()
+        except Exception as e:
+            logger.warning(f"Moderator summary generation failed: {e}")
+            return f"[第 {round} 轮讨论完成，主持人小结生成失败]"
+
+    async def _generate_final_conclusion(self, moderator: Expert, topic: str) -> dict[str, Any]:
+        """Generate moderator's final conclusion.
+
+        The moderator gives:
+        - Overall summary of the discussion
+        - Decision advice
+        - Consensus points
+        - Dissent points
+        """
+        gateway = self._get_llm_gateway(moderator)
+        if not gateway:
+            return {
+                "summary": "讨论已完成，但 LLM 不可用无法生成总结。",
+                "decision_advice": "建议参考讨论历史自行判断。",
+                "consensus_points": [],
+                "dissent_points": [],
+            }
+
+        history_text = self._team.get_history_text()
+
+        prompt = (
+            f"你是私董会主持人 {moderator.config.name}。\n"
+            f"你的角色：{moderator.config.persona}\n"
+            f"你的表达风格：{moderator.config.speaking_style}\n"
+            f"你的决策框架：{moderator.config.decision_framework}\n\n"
+            f"讨论主题：{topic}\n"
+            f"总轮次：{self._team.current_round}\n\n"
+            f"完整讨论历史：\n{history_text}\n\n"
+            "请作为主持人给出最终总结。输出 JSON 格式：\n"
+            "```json\n"
+            "{\n"
+            '  "summary": "整体讨论总结，3-5句话",\n'
+            '  "decision_advice": "基于讨论的决策建议，明确给出你的推荐",\n'
+            '  "consensus_points": ["共识点1", "共识点2"],\n'
+            '  "dissent_points": ["分歧点1", "分歧点2"]\n'
+            "}\n"
+            "```\n"
+            "只输出 JSON，不要其他文字。"
+        )
+
+        try:
+            import json
+            import re
+
+            response = await gateway.chat(
+                messages=[{"role": "user", "content": prompt}],
+                model="default",
+            )
+            content = response.content.strip()
+
+            # Extract JSON from response
+            json_match = re.search(r"\{.*\}", content, re.DOTALL)
+            if json_match:
+                result = json.loads(json_match.group(0))
+                return {
+                    "summary": result.get("summary", ""),
+                    "decision_advice": result.get("decision_advice", ""),
+                    "consensus_points": result.get("consensus_points", []),
+                    "dissent_points": result.get("dissent_points", []),
+                }
+
+            # If JSON parsing fails, return raw content as summary
+            return {
+                "summary": content,
+                "decision_advice": "",
+                "consensus_points": [],
+                "dissent_points": [],
+            }
+        except Exception as e:
+            logger.warning(f"Final conclusion generation failed: {e}")
+            return {
+                "summary": f"讨论已完成（{self._team.current_round}轮），总结生成失败。",
+                "decision_advice": "建议参考讨论历史自行判断。",
+                "consensus_points": [],
+                "dissent_points": [],
+            }
+
+    async def _generate_fallback_conclusion(self, moderator: Expert, topic: str) -> dict[str, Any]:
+        """Generate a fallback conclusion when execution fails.
+
+        Uses existing discussion history to provide a basic summary.
+        """
+        history_text = self._team.get_history_text()
+        if not history_text:
+            return {
+                "summary": "讨论未能正常完成，无历史记录。",
+                "decision_advice": "",
+            }
+
+        gateway = self._get_llm_gateway(moderator)
+        if not gateway:
+            # Return truncated history as summary
+            return {
+                "summary": f"讨论异常终止。已有历史（{len(self._team.history)}条）：\n"
+                + history_text[:500],
+                "decision_advice": "建议参考讨论历史自行判断。",
+            }
+
+        prompt = (
+            f"你是私董会主持人 {moderator.config.name}。\n"
+            f"讨论主题：{topic}\n"
+            f"讨论因异常终止，已完成 {self._team.current_round} 轮。\n\n"
+            f"已有讨论历史：\n{history_text}\n\n"
+            "请基于已有历史给出总结和决策建议。输出 JSON：\n"
+            "```json\n"
+            '{"summary": "...", "decision_advice": "..."}\n'
+            "```\n"
+        )
+
+        try:
+            import json
+            import re
+
+            response = await gateway.chat(
+                messages=[{"role": "user", "content": prompt}],
+                model="default",
+            )
+            content = response.content.strip()
+            json_match = re.search(r"\{.*\}", content, re.DOTALL)
+            if json_match:
+                result = json.loads(json_match.group(0))
+                return {
+                    "summary": result.get("summary", content),
+                    "decision_advice": result.get("decision_advice", ""),
+                }
+            return {"summary": content, "decision_advice": ""}
+        except Exception:
+            return {
+                "summary": f"讨论异常终止，已完成 {self._team.current_round} 轮。",
+                "decision_advice": "",
+            }
+
+    def _has_stop_command(self, interventions: list[str]) -> bool:
+        """Check if any user intervention contains a stop command."""
+        for msg in interventions:
+            msg_lower = msg.strip().lower()
+            if msg_lower in self.STOP_COMMANDS:
+                return True
+        return False
+
+    def _get_llm_gateway(self, expert: Expert | None = None) -> Any:
+        """Get LLM gateway from the given expert or the moderator's agent.
+
+        Falls back to other active experts if the primary target has no gateway.
+        """
+        target = expert or self._team.moderator
+        if target and hasattr(target, "agent") and hasattr(target.agent, "_llm_gateway"):
+            gateway = target.agent._llm_gateway
+            if gateway is not None:
+                return gateway
+        # Fallback: try first active expert with a gateway
+        for exp in self._team.active_experts:
+            if hasattr(exp, "agent") and hasattr(exp.agent, "_llm_gateway"):
+                gateway = exp.agent._llm_gateway
+                if gateway is not None:
+                    return gateway
+        return None
+
+    async def _broadcast_event(self, event_type: str, data: dict[str, Any]) -> None:
+        """Broadcast a board event to the team channel.
+
+        Events are emitted via handoff_transport for WebSocket relay.
+        """
+        if self._team.handoff_transport:
+            try:
+                await self._team.handoff_transport.send(
+                    self._team.team_channel, {"type": event_type, **data}
+                )
+            except Exception as e:
+                logger.warning(f"Failed to broadcast event '{event_type}': {e}")
diff --git a/src/agentkit/experts/board_router.py b/src/agentkit/experts/board_router.py
new file mode 100644
index 0000000..dc7a4a7
--- /dev/null
+++ b/src/agentkit/experts/board_router.py
@@ -0,0 +1,179 @@
+"""BoardRouter - 私董会讨论模式路由
+
+解析 @board 前缀，支持指定专家或使用默认私董会模板。
+
+支持格式：
+- @board:expert1,expert2 讨论主题 — 指定专家
+- @board 讨论主题 — 使用默认 private_board 模板
+- @board:private_board 讨论主题 — 显式使用默认模板
+"""
+
+from __future__ import annotations
+
+import copy
+import logging
+import re
+from dataclasses import dataclass, field
+
+from .config import ExpertConfig
+from .registry import ExpertTemplateRegistry
+
+logger = logging.getLogger(__name__)
+
+
+# Pattern to match @board or @board:expert1,expert2 prefix
+BOARD_PREFIX_PATTERN = re.compile(r"^@board(?::(\S+))?\s*(.*)", re.DOTALL)
+
+# Valid expert name: alphanumeric, underscore, hyphen, 1-64 chars
+_EXPERT_NAME_RE = re.compile(r"^[a-zA-Z0-9_-]{1,64}$")
+
+MAX_EXPERTS = 10  # Maximum number of experts in a board
+DEFAULT_TEMPLATE = "private_board"
+
+
+@dataclass
+class BoardRoutingResult:
+    """Result of board routing resolution.
+
+    Attributes:
+        matched: Whether the input matched @board prefix
+        board_mode: Whether board mode is activated
+        specified_experts: List of expert names specified by user
+        topic: Discussion topic extracted from input
+        use_default_template: Whether to use default private_board template
+        match_method: How the match was made ("explicit_board" | "default_template" | "")
+    """
+
+    matched: bool = False
+    board_mode: bool = False
+    specified_experts: list[str] = field(default_factory=list)
+    topic: str = ""
+    use_default_template: bool = False
+    match_method: str = ""
+
+
+class BoardRouter:
+    """Routes user input to Board Meeting mode via @board prefix.
+
+    Supports:
+    - @board prefix → trigger board mode with default private_board template
+    - @board:expert1,expert2 → specify board members by name
+    - @board:private_board → explicitly use default template
+    """
+
+    def __init__(self, template_registry: ExpertTemplateRegistry | None = None):
+        self._registry = template_registry or ExpertTemplateRegistry()
+
+    def resolve(self, content: str) -> BoardRoutingResult:
+        """Resolve user input to a BoardRoutingResult.
+
+        Args:
+            content: User's input message
+
+        Returns:
+            BoardRoutingResult with routing decision
+        """
+        result = BoardRoutingResult()
+
+        match = BOARD_PREFIX_PATTERN.match(content.strip())
+        if not match:
+            result.matched = False
+            result.board_mode = False
+            result.topic = content.strip()
+            return result
+
+        expert_list_str = match.group(1)  # e.g., "expert1,expert2" or None
+        topic = match.group(2).strip()  # The actual topic content
+
+        result.matched = True
+        result.board_mode = True
+        result.topic = topic if topic else ""
+        result.match_method = "explicit_board"
+
+        if expert_list_str:
+            # Check if user specified the default template name
+            if expert_list_str.strip() == DEFAULT_TEMPLATE:
+                result.use_default_template = True
+                result.specified_experts = self._load_default_template_members()
+            else:
+                # User specified expert names — validate and limit
+                raw_names = [name.strip() for name in expert_list_str.split(",")]
+                valid_names = [n for n in raw_names if _EXPERT_NAME_RE.match(n)]
+                if len(valid_names) != len(raw_names):
+                    invalid = set(raw_names) - set(valid_names)
+                    logger.warning(f"Invalid expert names rejected: {invalid}")
+                if valid_names:
+                    result.specified_experts = valid_names[:MAX_EXPERTS]
+                    result.use_default_template = False
+                else:
+                    # All names invalid — fall back to default template
+                    logger.warning(
+                        "All expert names invalid, falling back to default template"
+                    )
+                    result.use_default_template = True
+                    result.specified_experts = self._load_default_template_members()
+        else:
+            # No specific experts — use default template
+            result.use_default_template = True
+            result.specified_experts = self._load_default_template_members()
+
+        return result
+
+    def resolve_expert_configs(self, specified_experts: list[str]) -> list[ExpertConfig]:
+        """Resolve expert names to ExpertConfig instances.
+
+        For names that match templates, use the template config.
+        For names that don't match, create a dynamic ExpertConfig.
+        The first expert is designated as moderator (is_lead=True).
+        """
+        configs: list[ExpertConfig] = []
+        for i, name in enumerate(specified_experts):
+            if not _EXPERT_NAME_RE.match(name):
+                logger.warning(f"Skipping invalid expert name: {name}")
+                continue
+
+            template = self._registry.get(name)
+            if template:
+                # Deep-copy to avoid mutating the shared template config
+                config = copy.deepcopy(template.config)
+                # Override is_lead: first expert is moderator
+                config.is_lead = i == 0
+                configs.append(config)
+            else:
+                # Dynamic generation — create a basic ExpertConfig
+                config = ExpertConfig(
+                    name=name,
+                    agent_type="expert",
+                    persona=f"Expert in {name}",
+                    thinking_style="analytical",
+                    bound_skills=[],
+                    is_lead=(i == 0),
+                    task_mode="llm_generate",
+                    prompt={"identity": f"Expert in {name}"},
+                )
+                configs.append(config)
+
+        # Ensure at least one expert is lead
+        if configs and not any(c.is_lead for c in configs):
+            configs[0].is_lead = True
+
+        return configs
+
+    def _load_default_template_members(self) -> list[str]:
+        """Load member list from the default private_board template.
+
+        The private_board template is stored as an ExpertTemplate with
+        a special 'members' field in its config metadata.
+
+        Falls back to a hardcoded list if the template is not found.
+        """
+        template = self._registry.get(DEFAULT_TEMPLATE)
+        if template:
+            # The private_board template stores members in config.bound_skills
+            # as a reuse of existing field (avoids schema changes)
+            members = template.config.bound_skills
+            if members:
+                return members[:MAX_EXPERTS]
+
+        # Fallback default members
+        return ["elon_musk", "jeff_bezos", "allenzhang", "charlie_munger", "paul_graham"]
diff --git a/src/agentkit/experts/config.py b/src/agentkit/experts/config.py
index 79b8615..b6b9a42 100644
--- a/src/agentkit/experts/config.py
+++ b/src/agentkit/experts/config.py
@@ -39,6 +39,9 @@ class ExpertConfig(AgentConfig):
         avatar: str = "",
         color: str = "#1890ff",
         is_lead: bool = False,
+        # Board Meeting 模式字段
+        speaking_style: str = "",
+        decision_framework: str = "",
     ):
         super().__init__(
             name=name,
@@ -63,6 +66,8 @@ class ExpertConfig(AgentConfig):
         self.avatar = avatar
         self.color = color
         self.is_lead = is_lead
+        self.speaking_style = speaking_style
+        self.decision_framework = decision_framework
 
     @classmethod
     def from_dict(cls, data: dict[str, Any]) -> ExpertConfig:
@@ -89,6 +94,8 @@ class ExpertConfig(AgentConfig):
             avatar=data.get("avatar", ""),
             color=data.get("color", "#1890ff"),
             is_lead=data.get("is_lead", False),
+            speaking_style=data.get("speaking_style", ""),
+            decision_framework=data.get("decision_framework", ""),
         )
 
     def to_dict(self) -> dict[str, Any]:
@@ -101,6 +108,8 @@ class ExpertConfig(AgentConfig):
         d["avatar"] = self.avatar
         d["color"] = self.color
         d["is_lead"] = self.is_lead
+        d["speaking_style"] = self.speaking_style
+        d["decision_framework"] = self.decision_framework
         return d
 
 
diff --git a/src/agentkit/server/app.py b/src/agentkit/server/app.py
index 7a0c5e7..e2543c1 100644
--- a/src/agentkit/server/app.py
+++ b/src/agentkit/server/app.py
@@ -326,6 +326,50 @@ async def lifespan(app: FastAPI):
             memory_store._on_change = _on_memory_change
             app.state.memory_store = memory_store
 
+    # Load ExpertTemplates from configured paths (supports @board meeting mode)
+    # This runs regardless of GUI mode so @board works in API-only mode too.
+    try:
+        from agentkit.experts.registry import ExpertTemplateRegistry
+
+        expert_registry = ExpertTemplateRegistry()
+
+        # Always try to load from the default configs/experts/ directory
+        default_experts_dir = os.path.join(
+            os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))),
+            "configs",
+            "experts",
+        )
+        expert_dirs: list[str] = [default_experts_dir]
+
+        # Append user-configured paths from agentkit.yaml
+        if server_config and getattr(server_config, "expert_paths", None):
+            expert_dirs.extend(server_config.expert_paths)
+
+        total_loaded = 0
+        for experts_dir in expert_dirs:
+            if not experts_dir:
+                continue
+            from pathlib import Path as _P
+
+            p = _P(experts_dir)
+            if p.is_dir():
+                loaded = expert_registry.load_from_directory(str(p))
+                if loaded:
+                    logger.info(
+                        f"Loaded {len(loaded)} ExpertTemplates from {p}"
+                    )
+                    total_loaded += len(loaded)
+
+        app.state.expert_template_registry = expert_registry
+        if total_loaded:
+            logger.info(f"Total {total_loaded} ExpertTemplates registered for @board mode")
+    except Exception as e:
+        logger.warning(f"Failed to load ExpertTemplates: {e}")
+        # Ensure app.state.expert_template_registry always exists (empty registry)
+        from agentkit.experts.registry import ExpertTemplateRegistry
+
+        app.state.expert_template_registry = ExpertTemplateRegistry()
+
     yield
 
     # Shutdown
@@ -595,6 +639,11 @@ def create_app(
     )
     app.state.request_preprocessor = request_preprocessor
 
+    # Initialize ExpertTemplateRegistry (populated in lifespan with YAML configs)
+    from agentkit.experts.registry import ExpertTemplateRegistry
+
+    app.state.expert_template_registry = ExpertTemplateRegistry()
+
     # Initialize OrganizationContext from AgentPool + SkillRegistry
     from agentkit.org.context import OrganizationContext
 
diff --git a/src/agentkit/server/config.py b/src/agentkit/server/config.py
index ebcbb03..ef07bf9 100644
--- a/src/agentkit/server/config.py
+++ b/src/agentkit/server/config.py
@@ -114,6 +114,8 @@ class ServerConfig:
         usage_store: dict[str, Any] | None = None,
         cascade_store: dict[str, Any] | None = None,
         evolution: dict[str, Any] | None = None,
+        expert_paths: list[str] | None = None,
+        board: dict[str, Any] | None = None,
         on_change: Callable[["ServerConfig"], None] | None = None,
     ):
         self.host = host
@@ -140,6 +142,8 @@ class ServerConfig:
         self.usage_store = usage_store or {}
         self.cascade_store = cascade_store or {}
         self.evolution = evolution or {}
+        self.expert_paths = expert_paths or []
+        self.board = board or {}
         self.on_change = on_change
 
         # Config watching state
@@ -216,6 +220,13 @@ class ServerConfig:
         # Evolution store config
         evolution_data = data.get("evolution", {})
 
+        # Expert templates config (paths to YAML files defining ExpertTemplates)
+        experts_data = data.get("experts", {})
+        expert_paths = experts_data.get("paths", [])
+
+        # Board meeting config (max_rounds, default_template, etc.)
+        board_data = data.get("board", {})
+
         return cls(
             host=server.get("host", "0.0.0.0"),
             port=server.get("port", 8001),
@@ -241,6 +252,8 @@ class ServerConfig:
             usage_store=usage_store_data,
             cascade_store=cascade_store_data,
             evolution=evolution_data,
+            expert_paths=expert_paths,
+            board=board_data,
         )
 
     @staticmethod
@@ -436,6 +449,8 @@ class ServerConfig:
         self.marketplace = new_config.marketplace
         self.alignment = new_config.alignment
         self.router = new_config.router
+        self.expert_paths = new_config.expert_paths
+        self.board = new_config.board
         self._last_mtime = new_config._last_mtime
 
         logger.info(f"Config reloaded from {path}")
diff --git a/src/agentkit/server/frontend/components.d.ts b/src/agentkit/server/frontend/components.d.ts
index 2d5ed7b..42231c0 100644
--- a/src/agentkit/server/frontend/components.d.ts
+++ b/src/agentkit/server/frontend/components.d.ts
@@ -88,6 +88,7 @@ declare module 'vue' {
     SplashScreen: typeof import('./src/components/layout/SplashScreen.vue')['default']
     SplitPane: typeof import('./src/components/layout/SplitPane.vue')['default']
     TerminalEmulator: typeof import('./src/components/terminal/TerminalEmulator.vue')['default']
+    ThinkingBlock: typeof import('./src/components/chat/ThinkingBlock.vue')['default']
     TitleBar: typeof import('./src/components/layout/TitleBar.vue')['default']
     ToolCallCard: typeof import('./src/components/chat/ToolCallCard.vue')['default']
     ToolCallIndicator: typeof import('./src/components/chat/ToolCallIndicator.vue')['default']
diff --git a/src/agentkit/server/frontend/src/api/types.ts b/src/agentkit/server/frontend/src/api/types.ts
index 06f0353..fe641c3 100644
--- a/src/agentkit/server/frontend/src/api/types.ts
+++ b/src/agentkit/server/frontend/src/api/types.ts
@@ -44,7 +44,10 @@ export interface IChatMessage {
   expert_id?: string
   expert_name?: string
   expert_color?: string
-  message_type?: 'chat' | 'handoff' | 'assist_request' | 'plan_update' | 'milestone'
+  expert_avatar?: string
+  message_type?: 'chat' | 'handoff' | 'assist_request' | 'plan_update' | 'milestone' | 'board_speech' | 'board_summary' | 'board_conclusion'
+  board_round?: number
+  board_role?: 'moderator' | 'expert' | 'user' | 'summary'
 }
 
 /** Conversation with messages */
@@ -103,6 +106,12 @@ export type WsServerMessage =
   | { type: 'plan_update'; data: { plan_phases: ITeamPlanPhase[] } }
   | { type: 'team_synthesis'; data: { content: string } }
   | { type: 'team_dissolved'; data: { team_id: string } }
+  // Board Meeting 模式事件
+  | { type: 'board_started'; data: IBoardStartedData }
+  | { type: 'expert_speech'; data: IExpertSpeechData }
+  | { type: 'round_summary'; data: IRoundSummaryData }
+  | { type: 'user_intervention'; data: IUserInterventionData }
+  | { type: 'board_concluded'; data: IBoardConcludedData }
 
 /** Expert info within a team */
 export interface IExpertInfo {
@@ -135,6 +144,74 @@ export interface IExpertTeamState {
   lead_expert: string
 }
 
+// ── Board Meeting 模式类型 ────────────────────────────────────────────
+
+/** Board meeting expert info (lighter than IExpertInfo) */
+export interface IBoardExpert {
+  name: string
+  avatar: string
+  color: string
+  is_moderator: boolean
+  persona: string
+}
+
+/** board_started event payload */
+export interface IBoardStartedData {
+  team_id: string
+  topic: string
+  experts: IBoardExpert[]
+  max_rounds: number
+}
+
+/** expert_speech event payload */
+export interface IExpertSpeechData {
+  expert_name: string
+  expert_avatar: string
+  expert_color: string
+  content: string
+  round: number
+  role: 'moderator' | 'expert'
+}
+
+/** round_summary event payload */
+export interface IRoundSummaryData {
+  moderator_name: string
+  content: string
+  round: number
+  continue: boolean
+}
+
+/** user_intervention event payload */
+export interface IUserInterventionData {
+  content: string
+  round: number
+}
+
+/** board_concluded event payload */
+export interface IBoardConcludedData {
+  summary: string
+  decision_advice: string
+  total_rounds: number
+  consensus_points: string[]
+  dissent_points: string[]
+  error?: string
+}
+
+/** Board meeting status (matches backend BoardStatus enum) */
+export type BoardStatus = 'forming' | 'discussing' | 'concluding' | 'completed' | 'dissolved'
+
+/** Board message entry for group chat display */
+export interface IBoardMessage {
+  id: string
+  expert_name: string
+  expert_avatar: string
+  expert_color: string
+  content: string
+  round: number
+  role: 'moderator' | 'expert' | 'user' | 'summary'
+  timestamp: number
+}
+
 /** API error */
 export interface IApiError {
   status: number
diff --git a/src/agentkit/server/frontend/src/components/chat/BoardStatusView.vue b/src/agentkit/server/frontend/src/components/chat/BoardStatusView.vue
new file mode 100644
index 0000000..ce14dd7
--- /dev/null
+++ b/src/agentkit/server/frontend/src/components/chat/BoardStatusView.vue
@@ -0,0 +1,146 @@
+<template>
+  <div v-if="chatStore.boardState" class="board-status-view">
+    <div class="board-status-view__header">
+      <div class="board-status-view__title">
+        <span class="board-status-view__icon">🏛️</span>
+        <span class="board-status-view__label">私董会</span>
+        <a-tag v-if="chatStore.boardState.status === 'discussing'" color="processing" size="small">
+          讨论中
+        </a-tag>
+        <a-tag v-else-if="chatStore.boardState.status === 'concluding'" color="warning" size="small">
+          总结中
+        </a-tag>
+        <a-tag v-else-if="chatStore.boardState.status === 'completed'" color="success" size="small">
+          已完成
+        </a-tag>
+      </div>
+      <div class="board-status-view__topic">
+        {{ chatStore.boardState.topic }}
+      </div>
+    </div>
+
+    <div class="board-status-view__experts">
+      <div
+        v-for="expert in chatStore.boardState.experts"
+        :key="expert.name"
+        class="board-status-view__expert-chip"
+        :style="{ borderColor: expert.color }"
+      >
+        <span class="board-status-view__expert-avatar">{{ expert.avatar }}</span>
+        <span class="board-status-view__expert-name">{{ expert.name }}</span>
+        <a-tag v-if="expert.is_moderator" color="purple" size="small">主持人</a-tag>
+      </div>
+    </div>
+
+    <div class="board-status-view__progress">
+      <span class="board-status-view__round-info">
+        第 {{ chatStore.boardState.current_round }} / {{ chatStore.boardState.max_rounds }} 轮
+      </span>
+      <a-progress
+        :percent="progressPercent"
+        size="small"
+        :show-info="false"
+        :stroke-color="'#8E44AD'"
+      />
+    </div>
+  </div>
+</template>
+
+<script setup lang="ts">
+import { computed } from 'vue'
+import { Progress as AProgress, Tag as ATag } from 'ant-design-vue'
+import { useChatStore } from '@/stores/chat'
+
+const chatStore = useChatStore()
+
+const progressPercent = computed(() => {
+  if (!chatStore.boardState) return 0
+  const { current_round, max_rounds } = chatStore.boardState
+  if (max_rounds <= 0) return 0
+  return Math.min(100, Math.round((current_round / max_rounds) * 100))
+})
+</script>
+
+<style scoped>
+.board-status-view {
+  margin: 0 var(--space-4);
+  padding: var(--space-3) var(--space-4);
+  background: linear-gradient(135deg, rgba(142, 68, 173, 0.08), rgba(142, 68, 173, 0.03));
+  border: 1px solid rgba(142, 68, 173, 0.2);
+  border-radius: var(--radius-lg);
+  margin-bottom: var(--space-2);
+}
+
+.board-status-view__header {
+  display: flex;
+  flex-direction: column;
+  gap: var(--space-1);
+  margin-bottom: var(--space-2);
+}
+
+.board-status-view__title {
+  display: flex;
+  align-items: center;
+  gap: var(--space-2);
+}
+
+.board-status-view__icon {
+  font-size: 18px;
+}
+
+.board-status-view__label {
+  font-weight: var(--font-weight-semibold);
+  font-size: var(--font-base);
+  color: var(--text-primary);
+}
+
+.board-status-view__topic {
+  font-size: var(--font-sm);
+  color: var(--text-secondary);
+  padding-left: 26px;
+}
+
+.board-status-view__experts {
+  display: flex;
+  flex-wrap: wrap;
+  gap: var(--space-2);
+  margin-bottom: var(--space-2);
+}
+
+.board-status-view__expert-chip {
+  display: flex;
+  align-items: center;
+  gap: 4px;
+  padding: 2px 8px;
+  border: 1px solid var(--border-color);
+  border-radius: var(--radius-full);
+  background: var(--bg-primary);
+  font-size: var(--font-xs);
+}
+
+.board-status-view__expert-avatar {
+  font-size: 16px;
+}
+
+.board-status-view__expert-name {
+  color: var(--text-primary);
+  font-weight: var(--font-weight-medium);
+}
+
+.board-status-view__progress {
+  display: flex;
+  align-items: center;
+  gap: var(--space-2);
+}
+
+.board-status-view__round-info {
+  font-size: var(--font-xs);
+  color: var(--text-tertiary);
+  white-space: nowrap;
+  min-width: 80px;
+}
+
+.board-status-view :deep(.ant-progress) {
+  flex: 1;
+}
+</style>
diff --git a/src/agentkit/server/frontend/src/components/chat/ChatMessage.vue b/src/agentkit/server/frontend/src/components/chat/ChatMessage.vue
index f6dedb5..b236b5c 100644
--- a/src/agentkit/server/frontend/src/components/chat/ChatMessage.vue
+++ b/src/agentkit/server/frontend/src/components/chat/ChatMessage.vue
@@ -19,10 +19,13 @@
     <div class="chat-message__body">
       <!-- Expert message wrapper -->
       <ExpertMessage
-        v-if="message.expert_id"
+        v-if="message.expert_id || message.expert_name"
         :expert-name="message.expert_name || ''"
         :expert-color="message.expert_color || '#1890ff'"
+        :expert-avatar="message.expert_avatar || ''"
         :is-lead="false"
+        :is-moderator="message.board_role === 'moderator'"
+        :board-round="message.board_round"
         :message-type="message.message_type || 'chat'"
       >
         <div class="chat-message__content chat-message__content--assistant">
diff --git a/src/agentkit/server/frontend/src/components/chat/ExpertMessage.vue b/src/agentkit/server/frontend/src/components/chat/ExpertMessage.vue
index 05f4b57..fb5f52f 100644
--- a/src/agentkit/server/frontend/src/components/chat/ExpertMessage.vue
+++ b/src/agentkit/server/frontend/src/components/chat/ExpertMessage.vue
@@ -1,12 +1,15 @@
 <template>
   <div class="expert-message" :style="{ borderLeftColor: expertColor }">
     <div v-if="showExpertHeader" class="expert-message__header">
-      <div class="expert-message__avatar" :style="{ backgroundColor: expertColor }">
+      <div v-if="expertAvatar" class="expert-message__emoji-avatar">{{ expertAvatar }}</div>
+      <div v-else class="expert-message__avatar" :style="{ backgroundColor: expertColor }">
         {{ expertInitial }}
       </div>
       <span class="expert-message__name">{{ expertName }}</span>
       <a-tag v-if="isLead" color="gold" size="small">Lead</a-tag>
-      <a-tag v-if="messageType !== 'chat'" :color="messageTypeColor" size="small">
+      <a-tag v-if="isModerator" color="purple" size="small">主持人</a-tag>
+      <a-tag v-if="boardRound" :color="messageTypeColor" size="small">第{{ boardRound }}轮</a-tag>
+      <a-tag v-else-if="messageType !== 'chat'" :color="messageTypeColor" size="small">
         {{ messageTypeLabel }}
       </a-tag>
     </div>
@@ -23,11 +26,17 @@ import { Tag as ATag } from 'ant-design-vue'
 const props = withDefaults(defineProps<{
   expertName: string
   expertColor: string
+  expertAvatar?: string
   isLead?: boolean
-  messageType?: 'chat' | 'handoff' | 'assist_request' | 'plan_update' | 'milestone'
+  isModerator?: boolean
+  boardRound?: number
+  messageType?: 'chat' | 'handoff' | 'assist_request' | 'plan_update' | 'milestone' | 'board_speech' | 'board_summary' | 'board_conclusion'
   showExpertHeader?: boolean
 }>(), {
+  expertAvatar: '',
   isLead: false,
+  isModerator: false,
+  boardRound: undefined,
   messageType: 'chat',
   showExpertHeader: true,
 })
@@ -40,6 +49,9 @@ const messageTypeLabel = computed(() => {
     assist_request: '请求协助',
     plan_update: '计划更新',
     milestone: '里程碑',
+    board_speech: '发言',
+    board_summary: '小结',
+    board_conclusion: '总结',
   }
   return labels[props.messageType] || ''
 })
@@ -50,6 +62,9 @@ const messageTypeColor = computed(() => {
     assist_request: 'green',
     plan_update: 'orange',
     milestone: 'purple',
+    board_speech: 'cyan',
+    board_summary: 'geekblue',
+    board_conclusion: 'magenta',
   }
   return colors[props.messageType] || 'default'
 })
@@ -82,6 +97,16 @@ const messageTypeColor = computed(() => {
   flex-shrink: 0;
 }
 
+.expert-message__emoji-avatar {
+  width: 28px;
+  height: 28px;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  font-size: 20px;
+  flex-shrink: 0;
+}
+
 .expert-message__name {
   font-weight: var(--font-weight-medium);
   font-size: var(--font-sm);
diff --git a/src/agentkit/server/frontend/src/stores/chat.ts b/src/agentkit/server/frontend/src/stores/chat.ts
index 49b0a97..d112591 100644
--- a/src/agentkit/server/frontend/src/stores/chat.ts
+++ b/src/agentkit/server/frontend/src/stores/chat.ts
@@ -8,6 +8,11 @@ import type {
   IChatRequest,
   WsClientMessage,
   IExpertTeamState,
+  IBoardStartedData,
+  IExpertSpeechData,
+  IRoundSummaryData,
+  IUserInterventionData,
+  IBoardConcludedData,
 } from '@/api/types'
 
 function generateId(): string {
@@ -23,6 +28,17 @@ export const useChatStore = defineStore('chat', () => {
   const ws = ref<WebSocket | null>(null)
   const streamingSteps = ref<string[]>([])
 
+  // Board Meeting state (transient, only active during a board discussion)
+  const boardState = ref<{
+    topic: string
+    experts: Array<{ name: string; avatar: string; color: string; is_moderator: boolean }>
+    max_rounds: number
+    current_round: number
+    status: 'discussing' | 'concluding' | 'completed' | 'dissolved'
+  } | null>(null)
+
+  const isBoardMode = computed(() => boardState.value !== null && boardState.value.status === 'discussing')
+
   // --- Getters ---
   const currentConversation = computed<IConversation | undefined>(() => {
     return conversations.value.find((c) => c.id === currentConversationId.value)
@@ -644,6 +660,117 @@ export const useChatStore = defineStore('chat', () => {
         streamingSteps.value.push('专家团队已解散')
         break
       }
+
+      // ── Board Meeting 模式事件 ────────────────────────────────────────
+
+      case 'board_started': {
+        const data = payload as IBoardStartedData
+        // Initialize board state
+        boardState.value = {
+          topic: data.topic,
+          experts: data.experts.map((e) => ({
+            name: e.name,
+            avatar: e.avatar,
+            color: e.color,
+            is_moderator: e.is_moderator,
+          })),
+          max_rounds: data.max_rounds,
+          current_round: 0,
+          status: 'discussing',
+        }
+        streamingSteps.value.push(
+          `私董会已开启: 主题「${data.topic}」, ${data.experts.length} 位专家, 最多 ${data.max_rounds} 轮`
+        )
+        // Push a system-style message to indicate board start
+        const conversationId = currentConversationId.value
+        if (conversationId) {
+          const startMsg: IChatMessage = {
+            id: generateId(),
+            role: 'assistant',
+            content: `🏛️ **私董会开始**\n\n**主题**: ${data.topic}\n**专家**: ${data.experts
+              .map((e) => `${e.avatar} ${e.name}${e.is_moderator ? ' (主持人)' : ''}`)
+              .join(', ')}\n**最大轮次**: ${data.max_rounds}`,
+            timestamp: new Date().toISOString(),
+            status: 'completed',
+            message_type: 'milestone',
+          }
+          appendMessage(conversationId, startMsg)
+        }
+        break
+      }
+
+      case 'expert_speech': {
+        const data = payload as IExpertSpeechData
+        // Update current round in board state
+        if (boardState.value && data.round > boardState.value.current_round) {
+          boardState.value.current_round = data.round
+        }
+        const conversationId = currentConversationId.value
+        if (!conversationId) break
+        const speechMsg: IChatMessage = {
+          id: generateId(),
+          role: 'assistant',
+          content: data.content || '',
+          timestamp: new Date().toISOString(),
+          status: 'completed',
+          expert_name: data.expert_name,
+          expert_color: data.expert_color,
+          expert_avatar: data.expert_avatar,
+          message_type: 'board_speech',
+          board_round: data.round,
+          board_role: data.role,
+        }
+        appendMessage(conversationId, speechMsg)
+        streamingSteps.value.push(
+          `${data.expert_avatar} ${data.expert_name} (第${data.round}轮${data.role === 'moderator' ? '·主持' : ''})`
+        )
+        break
+      }
+
+      case 'round_summary': {
+        const data = payload as IRoundSummaryData
+        const conversationId = currentConversationId.value
+        if (!conversationId) break
+        const summaryMsg: IChatMessage = {
+          id: generateId(),
+          role: 'assistant',
+          content: data.content || '',
+          timestamp: new Date().toISOString(),
+          status: 'completed',
+          expert_name: data.moderator_name,
+          message_type: 'board_summary',
+          board_round: data.round,
+          board_role: 'summary',
+        }
+        appendMessage(conversationId, summaryMsg)
+        streamingSteps.value.push(`第${data.round}轮小结${data.continue ? '（继续讨论）' : '（即将结束）'}`)
+        break
+      }
+
+      case 'user_intervention': {
+        const data = payload as IUserInterventionData
+        streamingSteps.value.push(`用户干预: ${data.content.slice(0, 50)}...`)
+        break
+      }
+
+      case 'board_concluded': {
+        const data = payload as IBoardConcludedData
+        // Update board state to completed
+        if (boardState.value) {
+          boardState.value.status = 'completed'
+        }
+        streamingSteps.value.push(
+          `私董会结束: ${data.total_rounds} 轮讨论${data.error ? ' (异常)' : ''}`
+        )
+        // The final_answer event will carry the formatted conclusion,
+        // so we don't need to add a separate message here.
+        // The conclusion is already persisted by the backend.
+        // Clear board state after a short delay to allow UI to update
+        setTimeout(() => {
+          boardState.value = null
+        }, 1000)
+        break
+      }
     }
   }
 
@@ -678,9 +805,11 @@ export const useChatStore = defineStore('chat', () => {
     isLoading,
     isWsConnected,
     streamingSteps,
+    boardState,
     // Getters
     currentConversation,
     currentMessages,
+    isBoardMode,
     // Actions
     loadConversations,
     selectConversation,
diff --git a/src/agentkit/server/frontend/src/views/ChatView.vue b/src/agentkit/server/frontend/src/views/ChatView.vue
index 761265f..eaef338 100644
--- a/src/agentkit/server/frontend/src/views/ChatView.vue
+++ b/src/agentkit/server/frontend/src/views/ChatView.vue
@@ -17,6 +17,7 @@
       </div>
       <template v-else>
         <ExpertTeamView />
+        <BoardStatusView />
         <div class="chat-view__messages" ref="messagesContainer">
           <div v-if="chatStore.currentMessages.length === 0" class="chat-view__welcome">
             <div class="chat-view__welcome-inner">
@@ -73,6 +74,7 @@ import ChatSidebar from '@/components/chat/ChatSidebar.vue'
 import ChatMessage from '@/components/chat/ChatMessage.vue'
 import ChatInput from '@/components/chat/ChatInput.vue'
 import ExpertTeamView from '@/components/chat/ExpertTeamView.vue'
+import BoardStatusView from '@/components/chat/BoardStatusView.vue'
 
 const ATypographyText = ATypography.Text
 
@@ -83,6 +85,7 @@ const welcomeHints = [
   '智能路由 — 自动匹配最优技能',
   '工具调用 — 读写文件、执行命令',
   '流式响应 — 实时查看推理过程',
+  '私董会 — 输入 @board 召集专家团讨论',
 ]
 
 onMounted(async () => {
@@ -245,6 +248,7 @@ function handleSend(message: string, model?: string): void {
 .chat-view__hint:nth-child(1) { animation-delay: 0.3s; }
 .chat-view__hint:nth-child(2) { animation-delay: 0.4s; }
 .chat-view__hint:nth-child(3) { animation-delay: 0.5s; }
+.chat-view__hint:nth-child(4) { animation-delay: 0.6s; }
 
 .chat-view__hint-icon {
   font-size: 14px;
diff --git a/src/agentkit/server/routes/chat.py b/src/agentkit/server/routes/chat.py
index 9546944..52fdcd9 100644
--- a/src/agentkit/server/routes/chat.py
+++ b/src/agentkit/server/routes/chat.py
@@ -110,6 +110,12 @@ _VALID_TEAM_EVENT_TYPES = frozenset(
         "phase_completed",
         "phase_failed",
         "replanning",
+        # Board Meeting 模式事件
+        "board_started",
+        "expert_speech",
+        "round_summary",
+        "user_intervention",
+        "board_concluded",
     }
 )
 
@@ -146,6 +152,161 @@ def _get_session_manager(request: Request) -> SessionManager:
     return request.app.state.session_manager
 
 
+async def _execute_board_meeting(
+    websocket: WebSocket,
+    session_id: str,
+    content: str,
+    sm: SessionManager,
+) -> bool:
+    """Intercept @board prefix and execute a board meeting discussion.
+
+    Returns True if the input was handled as a board meeting (caller should return),
+    False if the input should continue through the normal chat pipeline.
+
+    Flow:
+        1. Resolve @board routing via BoardRouter
+        2. Create BoardTeam with expert configs
+        3. Register handoff_transport handler to relay events to WebSocket
+        4. Execute BoardOrchestrator
+        5. Send final conclusion as final_answer
+        6. Persist user topic + final summary to session history
+    """
+    from agentkit.experts.board_router import BoardRouter
+    from agentkit.experts.board import BoardTeam
+    from agentkit.experts.board_orchestrator import BoardOrchestrator
+
+    app_state = websocket.app.state
+
+    # Resolve ExpertTemplateRegistry from app.state (loaded at startup)
+    template_registry = getattr(app_state, "expert_template_registry", None)
+    if template_registry is None:
+        from agentkit.experts.registry import ExpertTemplateRegistry
+
+        template_registry = ExpertTemplateRegistry()
+
+    board_router = BoardRouter(template_registry=template_registry)
+    routing_result = board_router.resolve(content)
+
+    if not routing_result.matched:
+        return False  # Not a @board input, continue normal pipeline
+
+    if not routing_result.topic:
+        await websocket.send_json(
+            {"type": "error", "data": {"message": "私董会需要一个讨论主题，例如：@board 如何看待 AI 未来"}}
+        )
+        return True
+
+    # Resolve expert configs from specified experts or default template
+    expert_configs = board_router.resolve_expert_configs(routing_result.specified_experts)
+    if not expert_configs:
+        await websocket.send_json(
+            {"type": "error", "data": {"message": "无法解析私董会成员，请检查专家名称或模板配置"}}
+        )
+        return True
+
+    # Read board config from server_config if available
+    max_rounds = 5
+    server_config = getattr(app_state, "server_config", None)
+    if server_config is not None:
+        board_cfg = getattr(server_config, "board", None) or {}
+        if isinstance(board_cfg, dict):
+            max_rounds = int(board_cfg.get("max_rounds", 5))
+
+    # Create BoardTeam
+    team = BoardTeam(
+        pool=app_state.agent_pool,
+        template_registry=template_registry,
+        max_rounds=max_rounds,
+    )
+
+    # Register handoff_transport handler to relay board events to WebSocket
+    async def _relay_board_event(message: dict) -> None:
+        msg_type = message.get("type")
+        if not msg_type:
+            return
+        # Strip internal fields, keep only event data
+        event_data = {k: v for k, v in message.items() if k != "type"}
+        await emit_team_event(websocket, msg_type, event_data)
+
+    team.handoff_transport.register_handler(team.team_channel, _relay_board_event)
+
+    # Append user topic to session history
+    await sm.append_message(
+        session_id=session_id,
+        role=MessageRole.USER,
+        content=content,
+    )
+
+    try:
+        await team.create_board(topic=routing_result.topic, expert_configs=expert_configs)
+        orchestrator = BoardOrchestrator(team=team)
+        result = await orchestrator.execute(routing_result.topic)
+    except Exception as e:
+        logger.error(f"Board meeting failed for session {session_id}: {e}", exc_info=True)
+        await websocket.send_json(
+            {"type": "error", "data": {"message": f"私董会执行失败: {str(e)[:200]}"}}
+        )
+        try:
+            await team.dissolve()
+        except Exception:
+            pass
+        return True
+    finally:
+        # Always remove handler to avoid leaks
+        try:
+            team.handoff_transport._handlers.pop(team.team_channel, None)
+        except Exception:
+            pass
+
+    # Build final answer text from conclusion
+    summary = result.get("summary", "")
+    decision_advice = result.get("decision_advice", "")
+    consensus_points = result.get("consensus_points", []) or []
+    dissent_points = result.get("dissent_points", []) or []
+    total_rounds = result.get("total_rounds", 0)
+
+    final_parts: list[str] = []
+    if summary:
+        final_parts.append(f"## 讨论总结\n\n{summary}")
+    if decision_advice:
+        final_parts.append(f"## 决策建议\n\n{decision_advice}")
+    if consensus_points:
+        final_parts.append(
+            "## 共识点\n\n" + "\n".join(f"- {p}" for p in consensus_points)
+        )
+    if dissent_points:
+        final_parts.append(
+            "## 分歧点\n\n" + "\n".join(f"- {p}" for p in dissent_points)
+        )
+    final_parts.append(f"\n\n_共进行 {total_rounds} 轮讨论_")
+
+    final_content = "\n\n".join(final_parts)
+
+    await websocket.send_json(
+        {
+            "type": "final_answer",
+            "content": final_content,
+            "is_final": True,
+        }
+    )
+
+    # Persist final summary as assistant message
+    await sm.append_message(
+        session_id=session_id,
+        role=MessageRole.ASSISTANT,
+        content=final_content,
+        agent_name="board_meeting",
+    )
+
+    # Dissolve the team to release expert agents
+    try:
+        await team.dissolve()
+    except Exception as e:
+        logger.warning(f"Board team dissolve failed: {e}")
+
+    return True
+
+
 def _session_to_response(session) -> SessionResponse:
     return SessionResponse(
         session_id=session.session_id,
@@ -473,9 +634,16 @@ async def _handle_chat_message(
     """Handle a user message: append to session, execute Agent, stream events.
 
     Uses RequestPreprocessor for minimal preprocessing: @skill prefix + greeting regex + REACT.
+
+    Board Meeting mode: @board prefix is intercepted before RequestPreprocessor
+    and routed to BoardOrchestrator for multi-round group discussion.
     """
     from agentkit.chat.request_preprocessor import RequestPreprocessor
 
+    # Board Meeting mode: intercept @board prefix before any other preprocessing
+    if await _execute_board_meeting(websocket, session_id, content, sm):
+        return
+
     # Resolve Agent first (needed for default tools/prompt)
     pool = websocket.app.state.agent_pool
     session = await sm.get_session(session_id)
diff --git a/src/agentkit/server/static/index.html b/src/agentkit/server/static/index.html
index 339aa4a..c65e425 100644
--- a/src/agentkit/server/static/index.html
+++ b/src/agentkit/server/static/index.html
@@ -5,7 +5,7 @@
     <link rel="icon" type="image/svg+xml" href="/vite.svg" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
     <title>Fischer AgentKit</title>
-    <script type="module" crossorigin src="/assets/index-DtvSr7Lz.js"></script>
+    <script type="module" crossorigin src="/assets/index-CnfHmcYr.js"></script>
     <link rel="stylesheet" crossorigin href="/assets/index-DnToQpcu.css">
   </head>
   <body>
diff --git a/test-results/benchmark/benchmark_report.json b/test-results/benchmark/benchmark_report.json
index 1ca55a6..88cdaca 100644
--- a/test-results/benchmark/benchmark_report.json
+++ b/test-results/benchmark/benchmark_report.json
@@ -1,13 +1,13 @@
 {
-  "timestamp": "2026-06-17T05:29:35.443678+00:00",
+  "timestamp": "2026-06-17T15:47:33.591101+00:00",
   "version": "0.1.0",
-  "mode": "all",
+  "mode": "mock",
   "runs": 1,
   "fast": false,
-  "overall_accuracy": 0.9841,
-  "overall_accuracy_mean": 0.9841,
+  "overall_accuracy": 1.0,
+  "overall_accuracy_mean": 1.0,
   "overall_accuracy_std": 0.0,
-  "summary": "62/63 tests passed (1 failed) across 9 dimensions.",
+  "summary": "All 71 tests passed across 8 dimensions.",
   "dimensions": {
     "preprocessing": {
       "metrics": {
@@ -15,9 +15,9 @@
         "precision": 1.0,
         "recall": 1.0,
         "f1": 1.0,
-        "latency_p50_ms": 0.0152,
-        "latency_p95_ms": 0.072,
-        "latency_p99_ms": 0.1317,
+        "latency_p50_ms": 0.0072,
+        "latency_p95_ms": 0.0697,
+        "latency_p99_ms": 0.1071,
         "consistency": 1.0,
         "total": 15,
         "passed": 15,
@@ -33,9 +33,9 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0187,
-          "latency_p95_ms": 0.0331,
-          "latency_p99_ms": 0.0347,
+          "latency_p50_ms": 0.0105,
+          "latency_p95_ms": 0.0441,
+          "latency_p99_ms": 0.0485,
           "consistency": 1.0,
           "total": 4,
           "passed": 4,
@@ -50,9 +50,9 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.014,
-          "latency_p95_ms": 0.016,
-          "latency_p99_ms": 0.0162,
+          "latency_p50_ms": 0.0048,
+          "latency_p95_ms": 0.0085,
+          "latency_p99_ms": 0.0089,
           "consistency": 1.0,
           "total": 5,
           "passed": 5,
@@ -67,9 +67,9 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.04,
-          "latency_p95_ms": 0.1359,
-          "latency_p99_ms": 0.1445,
+          "latency_p50_ms": 0.0195,
+          "latency_p95_ms": 0.1068,
+          "latency_p99_ms": 0.1146,
           "consistency": 1.0,
           "total": 3,
           "passed": 3,
@@ -84,9 +84,9 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0136,
-          "latency_p95_ms": 0.0139,
-          "latency_p99_ms": 0.0139,
+          "latency_p50_ms": 0.0045,
+          "latency_p95_ms": 0.0069,
+          "latency_p99_ms": 0.0071,
           "consistency": 1.0,
           "total": 3,
           "passed": 3,
@@ -103,9 +103,9 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0155,
-          "latency_p95_ms": 0.0325,
-          "latency_p99_ms": 0.0346,
+          "latency_p50_ms": 0.0081,
+          "latency_p95_ms": 0.0423,
+          "latency_p99_ms": 0.0481,
           "consistency": 1.0,
           "total": 5,
           "passed": 5,
@@ -120,9 +120,9 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0148,
-          "latency_p95_ms": 0.0351,
-          "latency_p99_ms": 0.039,
+          "latency_p50_ms": 0.0065,
+          "latency_p95_ms": 0.0178,
+          "latency_p99_ms": 0.0192,
           "consistency": 1.0,
           "total": 7,
           "passed": 7,
@@ -137,9 +137,9 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0139,
-          "latency_p95_ms": 0.1333,
-          "latency_p99_ms": 0.1439,
+          "latency_p50_ms": 0.0072,
+          "latency_p95_ms": 0.1056,
+          "latency_p99_ms": 0.1143,
           "consistency": 1.0,
           "total": 3,
           "passed": 3,
@@ -159,7 +159,7 @@
           "passed": true,
           "expected": "direct_chat",
           "actual": "direct_chat",
-          "duration_ms": 0.0351,
+          "duration_ms": 0.0496,
           "root_cause": "none",
           "detail": "input='你好' method=regex_direct",
           "consistency": 1.0
@@ -172,7 +172,7 @@
           "passed": true,
           "expected": "direct_chat",
           "actual": "direct_chat",
-          "duration_ms": 0.022,
+          "duration_ms": 0.0129,
           "root_cause": "none",
           "detail": "input='hello' method=regex_direct",
           "consistency": 1.0
@@ -185,7 +185,7 @@
           "passed": true,
           "expected": "direct_chat",
           "actual": "direct_chat",
-          "duration_ms": 0.0152,
+          "duration_ms": 0.0081,
           "root_cause": "none",
           "detail": "input='谢谢' method=regex_direct",
           "consistency": 1.0
@@ -198,7 +198,7 @@
           "passed": true,
           "expected": "direct_chat",
           "actual": "direct_chat",
-          "duration_ms": 0.0155,
+          "duration_ms": 0.0064,
           "root_cause": "none",
           "detail": "input='你是谁' method=regex_direct",
           "consistency": 1.0
@@ -211,7 +211,7 @@
           "passed": true,
           "expected": "react",
           "actual": "react",
-          "duration_ms": 0.0163,
+          "duration_ms": 0.0065,
           "root_cause": "none",
           "detail": "input='搜索golang教程' method=default_react",
           "consistency": 1.0
@@ -224,7 +224,7 @@
           "passed": true,
           "expected": "react",
           "actual": "react",
-          "duration_ms": 0.014,
+          "duration_ms": 0.0048,
           "root_cause": "none",
           "detail": "input='执行ls命令' method=default_react",
           "consistency": 1.0
@@ -237,7 +237,7 @@
           "passed": true,
           "expected": "react",
           "actual": "react",
-          "duration_ms": 0.0148,
+          "duration_ms": 0.0042,
           "root_cause": "none",
           "detail": "input='翻译hello为中文' method=default_react",
           "consistency": 1.0
@@ -250,7 +250,7 @@
           "passed": true,
           "expected": "react",
           "actual": "react",
-          "duration_ms": 0.0139,
+          "duration_ms": 0.009,
           "root_cause": "none",
           "detail": "input='什么是机器学习' method=default_react",
           "consistency": 1.0
@@ -263,7 +263,7 @@
           "passed": true,
           "expected": "react",
           "actual": "react",
-          "duration_ms": 0.0136,
+          "duration_ms": 0.0043,
           "root_cause": "none",
           "detail": "input='帮我分析数据' method=default_react",
           "consistency": 1.0
@@ -276,7 +276,7 @@
           "passed": true,
           "expected": "skill_react",
           "actual": "skill_react",
-          "duration_ms": 0.04,
+          "duration_ms": 0.0195,
           "root_cause": "none",
           "detail": "input='@skill:react_agent 查看ip' method=skill_prefix",
           "consistency": 1.0
@@ -289,7 +289,7 @@
           "passed": true,
           "expected": "direct_chat",
           "actual": "direct_chat",
-          "duration_ms": 0.0236,
+          "duration_ms": 0.0137,
           "root_cause": "none",
           "detail": "input='@skill:chat_only 你好' method=skill_prefix",
           "consistency": 1.0
@@ -302,7 +302,7 @@
           "passed": true,
           "expected": "react",
           "actual": "react",
-          "duration_ms": 0.1466,
+          "duration_ms": 0.1165,
           "root_cause": "none",
           "detail": "input='@skill:nonexistent 做点什么' method=skill_not_found_fallback",
           "consistency": 1.0
@@ -315,7 +315,7 @@
           "passed": true,
           "expected": "react",
           "actual": "react",
-          "duration_ms": 0.0139,
+          "duration_ms": 0.0072,
           "root_cause": "none",
           "detail": "input='帮我分析这个数据并生成报告' method=default_react",
           "consistency": 1.0
@@ -328,7 +328,7 @@
           "passed": true,
           "expected": "react",
           "actual": "react",
-          "duration_ms": 0.0133,
+          "duration_ms": 0.0045,
           "root_cause": "none",
           "detail": "input='随便聊聊' method=default_react",
           "consistency": 1.0
@@ -341,7 +341,7 @@
           "passed": true,
           "expected": "react",
           "actual": "react",
-          "duration_ms": 0.0136,
+          "duration_ms": 0.0043,
           "root_cause": "none",
           "detail": "input='请帮我完成以下任务：1. 查询天气 2. 生成报告' method=default_react",
           "consistency": 1.0
@@ -354,9 +354,9 @@
         "precision": 1.0,
         "recall": 1.0,
         "f1": 1.0,
-        "latency_p50_ms": 0.0363,
-        "latency_p95_ms": 0.0465,
-        "latency_p99_ms": 0.0473,
+        "latency_p50_ms": 0.0132,
+        "latency_p95_ms": 0.0327,
+        "latency_p99_ms": 0.0347,
         "consistency": 1.0,
         "total": 5,
         "passed": 5,
@@ -372,9 +372,9 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0475,
-          "latency_p95_ms": 0.0475,
-          "latency_p99_ms": 0.0475,
+          "latency_p50_ms": 0.0352,
+          "latency_p95_ms": 0.0352,
+          "latency_p99_ms": 0.0352,
           "consistency": 1.0,
           "total": 1,
           "passed": 1,
@@ -389,9 +389,9 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0363,
-          "latency_p95_ms": 0.0363,
-          "latency_p99_ms": 0.0363,
+          "latency_p50_ms": 0.0132,
+          "latency_p95_ms": 0.0132,
+          "latency_p99_ms": 0.0132,
           "consistency": 1.0,
           "total": 1,
           "passed": 1,
@@ -406,9 +406,9 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0425,
-          "latency_p95_ms": 0.0425,
-          "latency_p99_ms": 0.0425,
+          "latency_p50_ms": 0.0228,
+          "latency_p95_ms": 0.0228,
+          "latency_p99_ms": 0.0228,
           "consistency": 1.0,
           "total": 1,
           "passed": 1,
@@ -423,9 +423,9 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0283,
-          "latency_p95_ms": 0.0283,
-          "latency_p99_ms": 0.0283,
+          "latency_p50_ms": 0.0124,
+          "latency_p95_ms": 0.0124,
+          "latency_p99_ms": 0.0124,
           "consistency": 1.0,
           "total": 1,
           "passed": 1,
@@ -440,9 +440,9 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0277,
-          "latency_p95_ms": 0.0277,
-          "latency_p99_ms": 0.0277,
+          "latency_p50_ms": 0.0117,
+          "latency_p95_ms": 0.0117,
+          "latency_p99_ms": 0.0117,
           "consistency": 1.0,
           "total": 1,
           "passed": 1,
@@ -459,9 +459,9 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0363,
-          "latency_p95_ms": 0.0464,
-          "latency_p99_ms": 0.0473,
+          "latency_p50_ms": 0.0132,
+          "latency_p95_ms": 0.033,
+          "latency_p99_ms": 0.0348,
           "consistency": 1.0,
           "total": 3,
           "passed": 3,
@@ -476,9 +476,9 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0425,
-          "latency_p95_ms": 0.0425,
-          "latency_p99_ms": 0.0425,
+          "latency_p50_ms": 0.0228,
+          "latency_p95_ms": 0.0228,
+          "latency_p99_ms": 0.0228,
           "consistency": 1.0,
           "total": 1,
           "passed": 1,
@@ -493,9 +493,9 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0277,
-          "latency_p95_ms": 0.0277,
-          "latency_p99_ms": 0.0277,
+          "latency_p50_ms": 0.0117,
+          "latency_p95_ms": 0.0117,
+          "latency_p99_ms": 0.0117,
           "consistency": 1.0,
           "total": 1,
           "passed": 1,
@@ -515,7 +515,7 @@
           "passed": true,
           "expected": "react",
           "actual": "react",
-          "duration_ms": 0.0475,
+          "duration_ms": 0.0352,
           "root_cause": "none",
           "detail": "paraphrases=5 modes=['react', 'react', 'react', 'react', 'react']",
           "consistency": 1.0
@@ -528,7 +528,7 @@
           "passed": true,
           "expected": "react",
           "actual": "react",
-          "duration_ms": 0.0363,
+          "duration_ms": 0.0132,
           "root_cause": "none",
           "detail": "paraphrases=3 modes=['react', 'react', 'react']",
           "consistency": 1.0
@@ -541,7 +541,7 @@
           "passed": true,
           "expected": "direct_chat",
           "actual": "direct_chat",
-          "duration_ms": 0.0425,
+          "duration_ms": 0.0228,
           "root_cause": "none",
           "detail": "paraphrases=5 modes=['direct_chat', 'direct_chat', 'direct_chat', 'direct_chat', 'direct_chat']",
           "consistency": 1.0
@@ -554,7 +554,7 @@
           "passed": true,
           "expected": "react",
           "actual": "react",
-          "duration_ms": 0.0283,
+          "duration_ms": 0.0124,
           "root_cause": "none",
           "detail": "paraphrases=3 modes=['react', 'react', 'react']",
           "consistency": 1.0
@@ -567,7 +567,7 @@
           "passed": true,
           "expected": "react",
           "actual": "react",
-          "duration_ms": 0.0277,
+          "duration_ms": 0.0117,
           "root_cause": "none",
           "detail": "paraphrases=3 modes=['react', 'react', 'react']",
           "consistency": 1.0
@@ -580,9 +580,9 @@
         "precision": 0.0,
         "recall": 0.0,
         "f1": 0.0,
-        "latency_p50_ms": 0.43,
-        "latency_p95_ms": 0.792,
-        "latency_p99_ms": 0.8464,
+        "latency_p50_ms": 0.33,
+        "latency_p95_ms": 0.642,
+        "latency_p99_ms": 0.6724,
         "consistency": 1.0,
         "total": 5,
         "passed": 5,
@@ -598,9 +598,9 @@
           "precision": 0.0,
           "recall": 0.0,
           "f1": 0.0,
-          "latency_p50_ms": 0.43,
-          "latency_p95_ms": 0.511,
-          "latency_p99_ms": 0.5182,
+          "latency_p50_ms": 0.33,
+          "latency_p95_ms": 0.474,
+          "latency_p99_ms": 0.4868,
           "consistency": 1.0,
           "total": 3,
           "passed": 3,
@@ -615,9 +615,9 @@
           "precision": 0.0,
           "recall": 0.0,
           "f1": 0.0,
-          "latency_p50_ms": 0.455,
-          "latency_p95_ms": 0.8195,
-          "latency_p99_ms": 0.8519,
+          "latency_p50_ms": 0.36,
+          "latency_p95_ms": 0.648,
+          "latency_p99_ms": 0.6736,
           "consistency": 1.0,
           "total": 2,
           "passed": 2,
@@ -634,9 +634,9 @@
           "precision": 0.0,
           "recall": 0.0,
           "f1": 0.0,
-          "latency_p50_ms": 0.24,
-          "latency_p95_ms": 0.411,
-          "latency_p99_ms": 0.4262,
+          "latency_p50_ms": 0.17,
+          "latency_p95_ms": 0.287,
+          "latency_p99_ms": 0.2974,
           "consistency": 1.0,
           "total": 2,
           "passed": 2,
@@ -651,9 +651,9 @@
           "precision": 0.0,
           "recall": 0.0,
           "f1": 0.0,
-          "latency_p50_ms": 0.52,
-          "latency_p95_ms": 0.826,
-          "latency_p99_ms": 0.8532,
+          "latency_p50_ms": 0.49,
+          "latency_p95_ms": 0.661,
+          "latency_p99_ms": 0.6762,
           "consistency": 1.0,
           "total": 3,
           "passed": 3,
@@ -672,10 +672,10 @@
           "difficulty": "easy",
           "passed": true,
           "expected": "<=50ms",
-          "actual": "0.004ms",
-          "duration_ms": 0.43,
+          "actual": "0.003ms",
+          "duration_ms": 0.3,
           "root_cause": "none",
-          "detail": "iterations=100 avg=0.004ms threshold=50.0ms",
+          "detail": "iterations=100 avg=0.003ms threshold=50.0ms",
           "consistency": 1.0
         },
         {
@@ -685,10 +685,10 @@
           "difficulty": "medium",
           "passed": true,
           "expected": "<=50ms",
-          "actual": "0.004ms",
-          "duration_ms": 0.41,
+          "actual": "0.003ms",
+          "duration_ms": 0.33,
           "root_cause": "none",
-          "detail": "iterations=100 avg=0.004ms threshold=50.0ms",
+          "detail": "iterations=100 avg=0.003ms threshold=50.0ms",
           "consistency": 1.0
         },
         {
@@ -699,7 +699,7 @@
           "passed": true,
           "expected": "<=50ms",
           "actual": "0.005ms",
-          "duration_ms": 0.52,
+          "duration_ms": 0.49,
           "root_cause": "none",
           "detail": "iterations=100 avg=0.005ms threshold=50.0ms",
           "consistency": 1.0
@@ -711,10 +711,10 @@
           "difficulty": "medium",
           "passed": true,
           "expected": "<=10ms",
-          "actual": "0.009ms",
-          "duration_ms": 0.86,
+          "actual": "0.007ms",
+          "duration_ms": 0.68,
           "root_cause": "none",
-          "detail": "iterations=100 avg=0.009ms threshold=10.0ms",
+          "detail": "iterations=100 avg=0.007ms threshold=10.0ms",
           "consistency": 1.0
         },
         {
@@ -724,10 +724,10 @@
           "difficulty": "easy",
           "passed": true,
           "expected": "<=5ms",
-          "actual": "0.001ms",
-          "duration_ms": 0.05,
+          "actual": "0.000ms",
+          "duration_ms": 0.04,
           "root_cause": "none",
-          "detail": "iterations=100 avg=0.001ms threshold=5.0ms",
+          "detail": "iterations=100 avg=0.000ms threshold=5.0ms",
           "consistency": 1.0
         }
       ]
@@ -738,9 +738,9 @@
         "precision": 0.8333,
         "recall": 0.8333,
         "f1": 0.8333,
-        "latency_p50_ms": 0.0253,
-        "latency_p95_ms": 0.03,
-        "latency_p99_ms": 0.0306,
+        "latency_p50_ms": 0.0107,
+        "latency_p95_ms": 0.0193,
+        "latency_p99_ms": 0.0222,
         "consistency": 1.0,
         "total": 10,
         "passed": 10,
@@ -756,9 +756,9 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0258,
-          "latency_p95_ms": 0.0305,
-          "latency_p99_ms": 0.0307,
+          "latency_p50_ms": 0.0125,
+          "latency_p95_ms": 0.0213,
+          "latency_p99_ms": 0.0226,
           "consistency": 1.0,
           "total": 5,
           "passed": 5,
@@ -773,9 +773,9 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0255,
-          "latency_p95_ms": 0.0256,
-          "latency_p99_ms": 0.0256,
+          "latency_p50_ms": 0.01,
+          "latency_p95_ms": 0.0102,
+          "latency_p99_ms": 0.0102,
           "consistency": 1.0,
           "total": 2,
           "passed": 2,
@@ -790,9 +790,9 @@
           "precision": 0.0,
           "recall": 0.0,
           "f1": 0.0,
-          "latency_p50_ms": 0.0093,
-          "latency_p95_ms": 0.0151,
-          "latency_p99_ms": 0.0156,
+          "latency_p50_ms": 0.0039,
+          "latency_p95_ms": 0.0062,
+          "latency_p99_ms": 0.0064,
           "consistency": 1.0,
           "total": 2,
           "passed": 2,
@@ -807,9 +807,9 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0192,
-          "latency_p95_ms": 0.0192,
-          "latency_p99_ms": 0.0192,
+          "latency_p50_ms": 0.008,
+          "latency_p95_ms": 0.008,
+          "latency_p99_ms": 0.008,
           "consistency": 1.0,
           "total": 1,
           "passed": 1,
@@ -826,9 +826,9 @@
           "precision": 0.8333,
           "recall": 0.8333,
           "f1": 0.8333,
-          "latency_p50_ms": 0.0253,
-          "latency_p95_ms": 0.0303,
-          "latency_p99_ms": 0.0307,
+          "latency_p50_ms": 0.0114,
+          "latency_p95_ms": 0.0205,
+          "latency_p99_ms": 0.0224,
           "consistency": 1.0,
           "total": 7,
           "passed": 7,
@@ -843,9 +843,9 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0253,
-          "latency_p95_ms": 0.0256,
-          "latency_p99_ms": 0.0256,
+          "latency_p50_ms": 0.0099,
+          "latency_p95_ms": 0.0102,
+          "latency_p99_ms": 0.0102,
           "consistency": 1.0,
           "total": 3,
           "passed": 3,
@@ -865,7 +865,7 @@
           "passed": true,
           "expected": "read_file",
           "actual": "read_file",
-          "duration_ms": 0.0291,
+          "duration_ms": 0.0229,
           "root_cause": "none",
           "detail": "query='read file' top_k=5 results=2",
           "consistency": 1.0
@@ -878,7 +878,7 @@
           "passed": true,
           "expected": "write_file",
           "actual": "write_file",
-          "duration_ms": 0.0308,
+          "duration_ms": 0.0148,
           "root_cause": "none",
           "detail": "query='write file content' top_k=5 results=2",
           "consistency": 1.0
@@ -891,7 +891,7 @@
           "passed": true,
           "expected": "web_search",
           "actual": "web_search",
-          "duration_ms": 0.0253,
+          "duration_ms": 0.0125,
           "root_cause": "none",
           "detail": "query='search web information' top_k=5 results=2",
           "consistency": 1.0
@@ -904,7 +904,7 @@
           "passed": true,
           "expected": "shell_exec",
           "actual": "shell_exec",
-          "duration_ms": 0.0232,
+          "duration_ms": 0.0112,
           "root_cause": "none",
           "detail": "query='execute shell command' top_k=5 results=1",
           "consistency": 1.0
@@ -917,7 +917,7 @@
           "passed": true,
           "expected": "http_request",
           "actual": "http_request",
-          "duration_ms": 0.0258,
+          "duration_ms": 0.0114,
           "root_cause": "none",
           "detail": "query='send http request url' top_k=5 results=1",
           "consistency": 1.0
@@ -930,7 +930,7 @@
           "passed": true,
           "expected": "read_file",
           "actual": "read_file",
-          "duration_ms": 0.0256,
+          "duration_ms": 0.0102,
           "root_cause": "none",
           "detail": "query='io file' top_k=5 results=2",
           "consistency": 1.0
@@ -943,7 +943,7 @@
           "passed": true,
           "expected": "web_search",
           "actual": "web_search",
-          "duration_ms": 0.0253,
+          "duration_ms": 0.0099,
           "root_cause": "none",
           "detail": "query='search query engine' top_k=5 results=1",
           "consistency": 1.0
@@ -956,7 +956,7 @@
           "passed": true,
           "expected": "__none__",
           "actual": "[]",
-          "duration_ms": 0.0029,
+          "duration_ms": 0.0014,
           "root_cause": "none",
           "detail": "query='' top_k=5 results=0",
           "consistency": 1.0
@@ -969,7 +969,7 @@
           "passed": true,
           "expected": "__none__",
           "actual": "[]",
-          "duration_ms": 0.0157,
+          "duration_ms": 0.0065,
           "root_cause": "none",
           "detail": "query='zzzznonexistent' top_k=5 results=0",
           "consistency": 1.0
@@ -982,7 +982,7 @@
           "passed": true,
           "expected": "read_file",
           "actual": "read_file",
-          "duration_ms": 0.0192,
+          "duration_ms": 0.008,
           "root_cause": "none",
           "detail": "query='file' top_k=1 results=1",
           "consistency": 1.0
@@ -995,9 +995,9 @@
         "precision": 0.0,
         "recall": 0.0,
         "f1": 0.0,
-        "latency_p50_ms": 0.074,
-        "latency_p95_ms": 15.4858,
-        "latency_p99_ms": 19.5794,
+        "latency_p50_ms": 0.0524,
+        "latency_p95_ms": 15.8743,
+        "latency_p99_ms": 20.0787,
         "consistency": 1.0,
         "total": 6,
         "passed": 6,
@@ -1013,9 +1013,9 @@
           "precision": 0.0,
           "recall": 0.0,
           "f1": 0.0,
-          "latency_p50_ms": 0.0576,
-          "latency_p95_ms": 0.1273,
-          "latency_p99_ms": 0.1335,
+          "latency_p50_ms": 0.0436,
+          "latency_p95_ms": 0.1013,
+          "latency_p99_ms": 0.1064,
           "consistency": 1.0,
           "total": 3,
           "passed": 3,
@@ -1030,9 +1030,9 @@
           "precision": 0.0,
           "recall": 0.0,
           "f1": 0.0,
-          "latency_p50_ms": 0.0903,
-          "latency_p95_ms": 18.5515,
-          "latency_p99_ms": 20.1925,
+          "latency_p50_ms": 0.0613,
+          "latency_p95_ms": 19.0229,
+          "latency_p99_ms": 20.7084,
           "consistency": 1.0,
           "total": 3,
           "passed": 3,
@@ -1049,9 +1049,9 @@
           "precision": 0.0,
           "recall": 0.0,
           "f1": 0.0,
-          "latency_p50_ms": 0.074,
-          "latency_p95_ms": 15.4858,
-          "latency_p99_ms": 19.5794,
+          "latency_p50_ms": 0.0524,
+          "latency_p95_ms": 15.8743,
+          "latency_p99_ms": 20.0787,
           "consistency": 1.0,
           "total": 6,
           "passed": 6,
@@ -1071,9 +1071,9 @@
           "passed": true,
           "expected": "passed",
           "actual": "drained=['hello']",
-          "duration_ms": 0.135,
+          "duration_ms": 0.1077,
           "root_cause": "none",
-          "detail": "task_id=aad09581...",
+          "detail": "task_id=0fd87910...",
           "consistency": 1.0
         },
         {
@@ -1084,7 +1084,7 @@
           "passed": true,
           "expected": "passed",
           "actual": "cancelled=True",
-          "duration_ms": 0.0576,
+          "duration_ms": 0.0436,
           "root_cause": "none",
           "detail": "",
           "consistency": 1.0
@@ -1097,7 +1097,7 @@
           "passed": true,
           "expected": "passed",
           "actual": "raised=True closed=True",
-          "duration_ms": 0.0169,
+          "duration_ms": 0.0097,
           "root_cause": "none",
           "detail": "",
           "consistency": 1.0
@@ -1110,7 +1110,7 @@
           "passed": true,
           "expected": "passed",
           "actual": "received=1",
-          "duration_ms": 0.0903,
+          "duration_ms": 0.0613,
           "root_cause": "none",
           "detail": "",
           "consistency": 1.0
@@ -1123,7 +1123,7 @@
           "passed": true,
           "expected": "passed",
           "actual": "events=1 closed=True",
-          "duration_ms": 20.6028,
+          "duration_ms": 21.1298,
           "root_cause": "none",
           "detail": "",
           "consistency": 1.0
@@ -1136,7 +1136,7 @@
           "passed": true,
           "expected": "passed",
           "actual": "subscribers=0",
-          "duration_ms": 0.0085,
+          "duration_ms": 0.0079,
           "root_cause": "none",
           "detail": "",
           "consistency": 1.0
@@ -1149,9 +1149,9 @@
         "precision": 0.0,
         "recall": 0.0,
         "f1": 0.0,
-        "latency_p50_ms": 1.6599,
-        "latency_p95_ms": 3.5383,
-        "latency_p99_ms": 3.8439,
+        "latency_p50_ms": 1.9377,
+        "latency_p95_ms": 2.9432,
+        "latency_p99_ms": 3.2494,
         "consistency": 1.0,
         "total": 7,
         "passed": 7,
@@ -1167,9 +1167,9 @@
           "precision": 0.0,
           "recall": 0.0,
           "f1": 0.0,
-          "latency_p50_ms": 1.6599,
-          "latency_p95_ms": 3.5245,
-          "latency_p99_ms": 3.8411,
+          "latency_p50_ms": 2.0343,
+          "latency_p95_ms": 3.0707,
+          "latency_p99_ms": 3.2749,
           "consistency": 1.0,
           "total": 5,
           "passed": 5,
@@ -1184,9 +1184,9 @@
           "precision": 0.0,
           "recall": 0.0,
           "f1": 0.0,
-          "latency_p50_ms": 1.3841,
-          "latency_p95_ms": 2.5206,
-          "latency_p99_ms": 2.6216,
+          "latency_p50_ms": 0.9924,
+          "latency_p95_ms": 1.8432,
+          "latency_p99_ms": 1.9188,
           "consistency": 1.0,
           "total": 2,
           "passed": 2,
@@ -1203,9 +1203,9 @@
           "precision": 0.0,
           "recall": 0.0,
           "f1": 0.0,
-          "latency_p50_ms": 1.6263,
-          "latency_p95_ms": 3.4255,
-          "latency_p99_ms": 3.8213,
+          "latency_p50_ms": 1.7803,
+          "latency_p95_ms": 3.0069,
+          "latency_p99_ms": 3.2621,
           "consistency": 1.0,
           "total": 6,
           "passed": 6,
@@ -1220,9 +1220,9 @@
           "precision": 0.0,
           "recall": 0.0,
           "f1": 0.0,
-          "latency_p50_ms": 2.6469,
-          "latency_p95_ms": 2.6469,
-          "latency_p99_ms": 2.6469,
+          "latency_p50_ms": 1.9377,
+          "latency_p95_ms": 1.9377,
+          "latency_p99_ms": 1.9377,
           "consistency": 1.0,
           "total": 1,
           "passed": 1,
@@ -1242,9 +1242,9 @@
           "passed": true,
           "expected": "passed",
           "actual": "exists=True",
-          "duration_ms": 1.9412,
+          "duration_ms": 2.0343,
           "root_cause": "none",
-          "detail": "path=/var/folders/6b/ljk5bdq50yxcsth24frf05200000gn/T/agentkit-benchmark-khsi9el8/run-0/specs/sm-001/test-spec.yaml",
+          "detail": "path=/var/folders/6b/ljk5bdq50yxcsth24frf05200000gn/T/agentkit-benchmark-idcioepn/run-0/specs/sm-001/test-spec.yaml",
           "consistency": 1.0
         },
         {
@@ -1255,7 +1255,7 @@
           "passed": true,
           "expected": "passed",
           "actual": "steps=2",
-          "duration_ms": 1.5928,
+          "duration_ms": 2.0501,
           "root_cause": "none",
           "detail": "",
           "consistency": 1.0
@@ -1268,7 +1268,7 @@
           "passed": true,
           "expected": "passed",
           "actual": "goal=Updated goal",
-          "duration_ms": 1.6599,
+          "duration_ms": 1.5264,
           "root_cause": "none",
           "detail": "",
           "consistency": 1.0
@@ -1281,7 +1281,7 @@
           "passed": true,
           "expected": "passed",
           "actual": "deleted=True remaining=0",
-          "duration_ms": 1.2623,
+          "duration_ms": 1.3234,
           "root_cause": "none",
           "detail": "",
           "consistency": 1.0
@@ -1294,7 +1294,7 @@
           "passed": true,
           "expected": "passed",
           "actual": "count=2",
-          "duration_ms": 3.9203,
+          "duration_ms": 3.3259,
           "root_cause": "none",
           "detail": "",
           "consistency": 1.0
@@ -1307,7 +1307,7 @@
           "passed": true,
           "expected": "passed",
           "actual": "status=confirmed",
-          "duration_ms": 2.6469,
+          "duration_ms": 1.9377,
           "root_cause": "none",
           "detail": "",
           "consistency": 1.0
@@ -1320,7 +1320,7 @@
           "passed": true,
           "expected": "passed",
           "actual": "result=None",
-          "duration_ms": 0.1212,
+          "duration_ms": 0.0472,
           "root_cause": "none",
           "detail": "",
           "consistency": 1.0
@@ -1333,9 +1333,9 @@
         "precision": 0.0,
         "recall": 0.0,
         "f1": 0.0,
-        "latency_p50_ms": 21.3605,
-        "latency_p95_ms": 47.9633,
-        "latency_p99_ms": 50.7743,
+        "latency_p50_ms": 22.2216,
+        "latency_p95_ms": 47.7927,
+        "latency_p99_ms": 50.9297,
         "consistency": 1.0,
         "total": 5,
         "passed": 5,
@@ -1351,9 +1351,9 @@
           "precision": 0.0,
           "recall": 0.0,
           "f1": 0.0,
-          "latency_p50_ms": 13.962,
-          "latency_p95_ms": 14.5982,
-          "latency_p99_ms": 14.6548,
+          "latency_p50_ms": 16.9399,
+          "latency_p95_ms": 18.6778,
+          "latency_p99_ms": 18.8323,
           "consistency": 1.0,
           "total": 2,
           "passed": 2,
@@ -1368,9 +1368,9 @@
           "precision": 0.0,
           "recall": 0.0,
           "f1": 0.0,
-          "latency_p50_ms": 51.477,
-          "latency_p95_ms": 51.477,
-          "latency_p99_ms": 51.477,
+          "latency_p50_ms": 51.714,
+          "latency_p95_ms": 51.714,
+          "latency_p99_ms": 51.714,
           "consistency": 1.0,
           "total": 1,
           "passed": 1,
@@ -1402,9 +1402,9 @@
           "precision": 0.0,
           "recall": 0.0,
           "f1": 0.0,
-          "latency_p50_ms": 28.052,
-          "latency_p95_ms": 28.052,
-          "latency_p99_ms": 28.052,
+          "latency_p50_ms": 25.5723,
+          "latency_p95_ms": 25.5723,
+          "latency_p99_ms": 25.5723,
           "consistency": 1.0,
           "total": 1,
           "passed": 1,
@@ -1421,9 +1421,9 @@
           "precision": 0.0,
           "recall": 0.0,
           "f1": 0.0,
-          "latency_p50_ms": 13.962,
-          "latency_p95_ms": 14.5982,
-          "latency_p99_ms": 14.6548,
+          "latency_p50_ms": 16.9399,
+          "latency_p95_ms": 18.6778,
+          "latency_p99_ms": 18.8323,
           "consistency": 1.0,
           "total": 2,
           "passed": 2,
@@ -1438,9 +1438,9 @@
           "precision": 0.0,
           "recall": 0.0,
           "f1": 0.0,
-          "latency_p50_ms": 39.7645,
-          "latency_p95_ms": 50.3057,
-          "latency_p99_ms": 51.2428,
+          "latency_p50_ms": 38.6431,
+          "latency_p95_ms": 50.4069,
+          "latency_p99_ms": 51.4526,
           "consistency": 1.0,
           "total": 3,
           "passed": 3,
@@ -1460,7 +1460,7 @@
           "passed": true,
           "expected": "passed",
           "actual": "passed=True attempts=1",
-          "duration_ms": 14.6689,
+          "duration_ms": 18.8709,
           "root_cause": "none",
           "detail": "",
           "consistency": 1.0
@@ -1473,7 +1473,7 @@
           "passed": true,
           "expected": "passed",
           "actual": "passed=False errors=1",
-          "duration_ms": 13.255,
+          "duration_ms": 15.0089,
           "root_cause": "none",
           "detail": "",
           "consistency": 1.0
@@ -1486,7 +1486,7 @@
           "passed": true,
           "expected": "passed",
           "actual": "attempts=3 callbacks=2",
-          "duration_ms": 51.477,
+          "duration_ms": 51.714,
           "root_cause": "none",
           "detail": "",
           "consistency": 1.0
@@ -1499,7 +1499,7 @@
           "passed": true,
           "expected": "passed",
           "actual": "passed=False errors=1",
-          "duration_ms": 508.0547,
+          "duration_ms": 509.6538,
           "root_cause": "none",
           "detail": "timeout errors=['Command timed out after 0.5s: sleep 10']",
           "consistency": 1.0
@@ -1512,324 +1512,132 @@
           "passed": true,
           "expected": "passed",
           "actual": "passed=False",
-          "duration_ms": 28.052,
+          "duration_ms": 25.5723,
           "root_cause": "none",
           "detail": "",
           "consistency": 1.0
         }
       ]
     },
-    "llm_reasoning": {
-      "metrics": {
-        "accuracy": 0.8,
-        "precision": 0.0,
-        "recall": 0.0,
-        "f1": 0.0,
-        "latency_p50_ms": 37450.2869,
-        "latency_p95_ms": 41462.6612,
-        "latency_p99_ms": 41970.7996,
-        "consistency": 1.0,
-        "total": 5,
-        "passed": 4,
-        "failed": 1,
-        "accuracy_mean": 0.8,
-        "accuracy_std": 0.0,
-        "ci_lower": 0.3755,
-        "ci_upper": 0.9638
-      },
-      "by_category": {
-        "intent_understanding": {
-          "accuracy": 0.0,
-          "precision": 0.0,
-          "recall": 0.0,
-          "f1": 0.0,
-          "latency_p50_ms": 20001.7786,
-          "latency_p95_ms": 20001.7786,
-          "latency_p99_ms": 20001.7786,
-          "consistency": 1.0,
-          "total": 1,
-          "passed": 0,
-          "failed": 1,
-          "accuracy_mean": 0.0,
-          "accuracy_std": 0.0,
-          "ci_lower": 0.0,
-          "ci_upper": 0.7935
-        },
-        "tool_selection": {
-          "accuracy": 1.0,
-          "precision": 0.0,
-          "recall": 0.0,
-          "f1": 0.0,
-          "latency_p50_ms": 4584.2609,
-          "latency_p95_ms": 4584.2609,
-          "latency_p99_ms": 4584.2609,
-          "consistency": 1.0,
-          "total": 1,
-          "passed": 1,
-          "failed": 0,
-          "accuracy_mean": 1.0,
-          "accuracy_std": 0.0,
-          "ci_lower": 0.2065,
-          "ci_upper": 1.0
-        },
-        "multi_step": {
-          "accuracy": 1.0,
-          "precision": 0.0,
-          "recall": 0.0,
-          "f1": 0.0,
-          "latency_p50_ms": 42097.8342,
-          "latency_p95_ms": 42097.8342,
-          "latency_p99_ms": 42097.8342,
-          "consistency": 1.0,
-          "total": 1,
-          "passed": 1,
-          "failed": 0,
-          "accuracy_mean": 1.0,
-          "accuracy_std": 0.0,
-          "ci_lower": 0.2065,
-          "ci_upper": 1.0
-        },
-        "code_generation": {
-          "accuracy": 1.0,
-          "precision": 0.0,
-          "recall": 0.0,
-          "f1": 0.0,
-          "latency_p50_ms": 37450.2869,
-          "latency_p95_ms": 37450.2869,
-          "latency_p99_ms": 37450.2869,
-          "consistency": 1.0,
-          "total": 1,
-          "passed": 1,
-          "failed": 0,
-          "accuracy_mean": 1.0,
-          "accuracy_std": 0.0,
-          "ci_lower": 0.2065,
-          "ci_upper": 1.0
-        },
-        "error_recovery": {
-          "accuracy": 1.0,
-          "precision": 0.0,
-          "recall": 0.0,
-          "f1": 0.0,
-          "latency_p50_ms": 38921.9691,
-          "latency_p95_ms": 38921.9691,
-          "latency_p99_ms": 38921.9691,
-          "consistency": 1.0,
-          "total": 1,
-          "passed": 1,
-          "failed": 0,
-          "accuracy_mean": 1.0,
-          "accuracy_std": 0.0,
-          "ci_lower": 0.2065,
-          "ci_upper": 1.0
-        }
-      },
-      "by_difficulty": {
-        "easy": {
-          "accuracy": 0.0,
-          "precision": 0.0,
-          "recall": 0.0,
-          "f1": 0.0,
-          "latency_p50_ms": 20001.7786,
-          "latency_p95_ms": 20001.7786,
-          "latency_p99_ms": 20001.7786,
-          "consistency": 1.0,
-          "total": 1,
-          "passed": 0,
-          "failed": 1,
-          "accuracy_mean": 0.0,
-          "accuracy_std": 0.0,
-          "ci_lower": 0.0,
-          "ci_upper": 0.7935
-        },
-        "medium": {
-          "accuracy": 1.0,
-          "precision": 0.0,
-          "recall": 0.0,
-          "f1": 0.0,
-          "latency_p50_ms": 21017.2739,
-          "latency_p95_ms": 35806.9856,
-          "latency_p99_ms": 37121.6266,
-          "consistency": 1.0,
-          "total": 2,
-          "passed": 2,
-          "failed": 0,
-          "accuracy_mean": 1.0,
-          "accuracy_std": 0.0,
-          "ci_lower": 0.3424,
-          "ci_upper": 1.0
-        },
-        "hard": {
-          "accuracy": 1.0,
-          "precision": 0.0,
-          "recall": 0.0,
-          "f1": 0.0,
-          "latency_p50_ms": 40509.9016,
-          "latency_p95_ms": 41939.0409,
-          "latency_p99_ms": 42066.0755,
-          "consistency": 1.0,
-          "total": 2,
-          "passed": 2,
-          "failed": 0,
-          "accuracy_mean": 1.0,
-          "accuracy_std": 0.0,
-          "ci_lower": 0.3424,
-          "ci_upper": 1.0
-        }
-      },
-      "cases": [
-        {
-          "task_id": "llm-001",
-          "dimension": "llm_reasoning",
-          "category": "intent_understanding",
-          "difficulty": "easy",
-          "passed": false,
-          "expected": "react",
-          "actual": "timeout",
-          "duration_ms": 20001.7786,
-          "root_cause": "timeout",
-          "detail": "LLM call timed out after 20.0s",
-          "consistency": 1.0
-        },
-        {
-          "task_id": "llm-002",
-          "dimension": "llm_reasoning",
-          "category": "tool_selection",
-          "difficulty": "medium",
-          "passed": true,
-          "expected": "react",
-          "actual": "mode=react tokens=133 len=111",
-          "duration_ms": 4584.2609,
-          "root_cause": "none",
-          "detail": "mode=react keywords=['search', '搜索', 'web', '论文', 'paper', 'agent'] stream=False",
-          "consistency": 1.0
-        },
-        {
-          "task_id": "llm-003",
-          "dimension": "llm_reasoning",
-          "category": "multi_step",
-          "difficulty": "hard",
-          "passed": true,
-          "expected": "react",
-          "actual": "mode=react tokens=0 len=26",
-          "duration_ms": 42097.8342,
-          "root_cause": "none",
-          "detail": "mode=react keywords=['fib', '递归', '优化', '缓存', 'memo', '迭代', '动态规划', '性能'] stream=True",
-          "consistency": 1.0
-        },
-        {
-          "task_id": "llm-004",
-          "dimension": "llm_reasoning",
-          "category": "code_generation",
-          "difficulty": "medium",
-          "passed": true,
-          "expected": "react",
-          "actual": "mode=react tokens=2055 len=1485",
-          "duration_ms": 37450.2869,
-          "root_cause": "none",
-          "detail": "mode=react keywords=['def', 'fib', 'return', 'python'] stream=False",
-          "consistency": 1.0
-        },
-        {
-          "task_id": "llm-005",
-          "dimension": "llm_reasoning",
-          "category": "error_recovery",
-          "difficulty": "hard",
-          "passed": true,
-          "expected": "react",
-          "actual": "mode=react tokens=0 len=52",
-          "duration_ms": 38921.9691,
-          "root_cause": "none",
-          "detail": "mode=react keywords=['pip', 'install', 'agentkit', '安装', '模块'] stream=True",
-          "consistency": 1.0
-        }
-      ]
-    },
-    "gui_integration": {
+    "board_meeting": {
       "metrics": {
         "accuracy": 1.0,
         "precision": 1.0,
         "recall": 1.0,
         "f1": 1.0,
-        "latency_p50_ms": 0.0,
-        "latency_p95_ms": 0.0,
-        "latency_p99_ms": 0.0,
+        "latency_p50_ms": 0.0107,
+        "latency_p95_ms": 0.3934,
+        "latency_p99_ms": 1.1873,
         "consistency": 1.0,
-        "total": 5,
-        "passed": 5,
+        "total": 18,
+        "passed": 18,
         "failed": 0,
         "accuracy_mean": 1.0,
         "accuracy_std": 0.0,
-        "ci_lower": 0.5655,
+        "ci_lower": 0.8241,
         "ci_upper": 1.0
       },
       "by_category": {
-        "service_startup": {
+        "default_template": {
           "accuracy": 1.0,
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0,
-          "latency_p95_ms": 0.0,
-          "latency_p99_ms": 0.0,
+          "latency_p50_ms": 0.0141,
+          "latency_p95_ms": 0.031,
+          "latency_p99_ms": 0.0325,
           "consistency": 1.0,
-          "total": 1,
-          "passed": 1,
+          "total": 3,
+          "passed": 3,
           "failed": 0,
           "accuracy_mean": 1.0,
           "accuracy_std": 0.0,
-          "ci_lower": 0.2065,
+          "ci_lower": 0.4385,
           "ci_upper": 1.0
         },
-        "api_availability": {
+        "explicit_experts": {
           "accuracy": 1.0,
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0,
-          "latency_p95_ms": 0.0,
-          "latency_p99_ms": 0.0,
+          "latency_p50_ms": 0.0138,
+          "latency_p95_ms": 0.0178,
+          "latency_p99_ms": 0.0181,
           "consistency": 1.0,
-          "total": 2,
-          "passed": 2,
+          "total": 3,
+          "passed": 3,
           "failed": 0,
           "accuracy_mean": 1.0,
           "accuracy_std": 0.0,
-          "ci_lower": 0.3424,
+          "ci_lower": 0.4385,
           "ci_upper": 1.0
         },
-        "websocket": {
+        "topic_extraction": {
           "accuracy": 1.0,
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0,
-          "latency_p95_ms": 0.0,
-          "latency_p99_ms": 0.0,
+          "latency_p50_ms": 0.005,
+          "latency_p95_ms": 0.0073,
+          "latency_p99_ms": 0.0075,
           "consistency": 1.0,
-          "total": 1,
-          "passed": 1,
+          "total": 3,
+          "passed": 3,
           "failed": 0,
           "accuracy_mean": 1.0,
           "accuracy_std": 0.0,
-          "ci_lower": 0.2065,
+          "ci_lower": 0.4385,
           "ci_upper": 1.0
         },
-        "frontend": {
+        "no_match": {
           "accuracy": 1.0,
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0,
-          "latency_p95_ms": 0.0,
-          "latency_p99_ms": 0.0,
+          "latency_p50_ms": 0.0032,
+          "latency_p95_ms": 0.0032,
+          "latency_p99_ms": 0.0032,
           "consistency": 1.0,
-          "total": 1,
-          "passed": 1,
+          "total": 3,
+          "passed": 3,
           "failed": 0,
           "accuracy_mean": 1.0,
           "accuracy_std": 0.0,
-          "ci_lower": 0.2065,
+          "ci_lower": 0.4385,
+          "ci_upper": 1.0
+        },
+        "name_validation": {
+          "accuracy": 1.0,
+          "precision": 1.0,
+          "recall": 1.0,
+          "f1": 1.0,
+          "latency_p50_ms": 0.0168,
+          "latency_p95_ms": 0.1981,
+          "latency_p99_ms": 0.2143,
+          "consistency": 1.0,
+          "total": 3,
+          "passed": 3,
+          "failed": 0,
+          "accuracy_mean": 1.0,
+          "accuracy_std": 0.0,
+          "ci_lower": 0.4385,
+          "ci_upper": 1.0
+        },
+        "stop_command": {
+          "accuracy": 1.0,
+          "precision": 1.0,
+          "recall": 1.0,
+          "f1": 1.0,
+          "latency_p50_ms": 0.0102,
+          "latency_p95_ms": 1.2482,
+          "latency_p99_ms": 1.3583,
+          "consistency": 1.0,
+          "total": 3,
+          "passed": 3,
+          "failed": 0,
+          "accuracy_mean": 1.0,
+          "accuracy_std": 0.0,
+          "ci_lower": 0.4385,
           "ci_upper": 1.0
         }
       },
@@ -1839,16 +1647,16 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0,
-          "latency_p95_ms": 0.0,
-          "latency_p99_ms": 0.0,
+          "latency_p50_ms": 0.005,
+          "latency_p95_ms": 0.7093,
+          "latency_p99_ms": 1.2505,
           "consistency": 1.0,
-          "total": 2,
-          "passed": 2,
+          "total": 11,
+          "passed": 11,
           "failed": 0,
           "accuracy_mean": 1.0,
           "accuracy_std": 0.0,
-          "ci_lower": 0.3424,
+          "ci_lower": 0.7412,
           "ci_upper": 1.0
         },
         "medium": {
@@ -1856,162 +1664,255 @@
           "precision": 1.0,
           "recall": 1.0,
           "f1": 1.0,
-          "latency_p50_ms": 0.0,
-          "latency_p95_ms": 0.0,
-          "latency_p99_ms": 0.0,
+          "latency_p50_ms": 0.0138,
+          "latency_p95_ms": 0.1583,
+          "latency_p99_ms": 0.2063,
           "consistency": 1.0,
-          "total": 2,
-          "passed": 2,
+          "total": 7,
+          "passed": 7,
           "failed": 0,
           "accuracy_mean": 1.0,
           "accuracy_std": 0.0,
-          "ci_lower": 0.3424,
-          "ci_upper": 1.0
-        },
-        "hard": {
-          "accuracy": 1.0,
-          "precision": 1.0,
-          "recall": 1.0,
-          "f1": 1.0,
-          "latency_p50_ms": 0.0,
-          "latency_p95_ms": 0.0,
-          "latency_p99_ms": 0.0,
-          "consistency": 1.0,
-          "total": 1,
-          "passed": 1,
-          "failed": 0,
-          "accuracy_mean": 1.0,
-          "accuracy_std": 0.0,
-          "ci_lower": 0.2065,
+          "ci_lower": 0.6457,
           "ci_upper": 1.0
         }
       },
       "cases": [
         {
-          "task_id": "gui-001",
-          "dimension": "gui_integration",
-          "category": "service_startup",
+          "task_id": "bd-001",
+          "dimension": "board_meeting",
+          "category": "default_template",
           "difficulty": "easy",
           "passed": true,
-          "expected": "started",
-          "actual": "started",
-          "duration_ms": 0.0,
+          "expected": "board",
+          "actual": "board",
+          "duration_ms": 0.0329,
           "root_cause": "none",
-          "detail": "port=50772 pid=40232",
+          "detail": "matched=True board_mode=True use_default=True topic='讨论是否应该进入东南亚市场'",
           "consistency": 1.0
         },
         {
-          "task_id": "gui-002",
-          "dimension": "gui_integration",
-          "category": "api_availability",
-          "difficulty": "medium",
-          "passed": true,
-          "expected": "200",
-          "actual": "200",
-          "duration_ms": 0.0,
-          "root_cause": "none",
-          "detail": "health=200 skills=200",
-          "consistency": 1.0
-        },
-        {
-          "task_id": "gui-003",
-          "dimension": "gui_integration",
-          "category": "api_availability",
-          "difficulty": "medium",
-          "passed": true,
-          "expected": "reachable",
-          "actual": "reachable",
-          "duration_ms": 0.0,
-          "root_cause": "none",
-          "detail": "status=405",
-          "consistency": 1.0
-        },
-        {
-          "task_id": "gui-004",
-          "dimension": "gui_integration",
-          "category": "websocket",
-          "difficulty": "hard",
-          "passed": true,
-          "expected": "connected",
-          "actual": "connected",
-          "duration_ms": 0.0,
-          "root_cause": "none",
-          "detail": "connected+closed",
-          "consistency": 1.0
-        },
-        {
-          "task_id": "gui-005",
-          "dimension": "gui_integration",
-          "category": "frontend",
+          "task_id": "bd-002",
+          "dimension": "board_meeting",
+          "category": "default_template",
           "difficulty": "easy",
           "passed": true,
-          "expected": "html",
-          "actual": "html",
-          "duration_ms": 0.0,
+          "expected": "board",
+          "actual": "board",
+          "duration_ms": 0.0141,
           "root_cause": "none",
-          "detail": "status=200 len=465",
+          "detail": "matched=True board_mode=True use_default=True topic='AI产品定价策略应该怎么做'",
+          "consistency": 1.0
+        },
+        {
+          "task_id": "bd-003",
+          "dimension": "board_meeting",
+          "category": "default_template",
+          "difficulty": "medium",
+          "passed": true,
+          "expected": "board",
+          "actual": "board",
+          "duration_ms": 0.0113,
+          "root_cause": "none",
+          "detail": "matched=True board_mode=True use_default=True topic='讨论创业公司融资节奏'",
+          "consistency": 1.0
+        },
+        {
+          "task_id": "bd-004",
+          "dimension": "board_meeting",
+          "category": "explicit_experts",
+          "difficulty": "medium",
+          "passed": true,
+          "expected": "board",
+          "actual": "board",
+          "duration_ms": 0.0182,
+          "root_cause": "none",
+          "detail": "matched=True experts=['elon_musk', 'jeff_bezos'] use_default=False",
+          "consistency": 1.0
+        },
+        {
+          "task_id": "bd-005",
+          "dimension": "board_meeting",
+          "category": "explicit_experts",
+          "difficulty": "medium",
+          "passed": true,
+          "expected": "board",
+          "actual": "board",
+          "duration_ms": 0.0112,
+          "root_cause": "none",
+          "detail": "matched=True experts=['charlie_munger', 'warren_buffett'] use_default=False",
+          "consistency": 1.0
+        },
+        {
+          "task_id": "bd-006",
+          "dimension": "board_meeting",
+          "category": "explicit_experts",
+          "difficulty": "medium",
+          "passed": true,
+          "expected": "board",
+          "actual": "board",
+          "duration_ms": 0.0138,
+          "root_cause": "none",
+          "detail": "matched=True experts=['elon_musk', 'jeff_bezos', 'allenzhang'] use_default=False",
+          "consistency": 1.0
+        },
+        {
+          "task_id": "bd-007",
+          "dimension": "board_meeting",
+          "category": "topic_extraction",
+          "difficulty": "easy",
+          "passed": true,
+          "expected": "讨论是否应该进入东南亚市场",
+          "actual": "讨论是否应该进入东南亚市场",
+          "duration_ms": 0.005,
+          "root_cause": "none",
+          "detail": "input='@board 讨论是否应该进入东南亚市场' topic='讨论是否应该进入东南亚市场' matched=True",
+          "consistency": 1.0
+        },
+        {
+          "task_id": "bd-008",
+          "dimension": "board_meeting",
+          "category": "topic_extraction",
+          "difficulty": "easy",
+          "passed": true,
+          "expected": "火星商业化方案",
+          "actual": "火星商业化方案",
+          "duration_ms": 0.0076,
+          "root_cause": "none",
+          "detail": "input='@board:elon_musk,jeff_bezos 火星商业化方案' topic='火星商业化方案' matched=True",
+          "consistency": 1.0
+        },
+        {
+          "task_id": "bd-009",
+          "dimension": "board_meeting",
+          "category": "topic_extraction",
+          "difficulty": "easy",
+          "passed": true,
+          "expected": "",
+          "actual": "",
+          "duration_ms": 0.0049,
+          "root_cause": "none",
+          "detail": "input='@board' topic='' matched=True",
+          "consistency": 1.0
+        },
+        {
+          "task_id": "bd-010",
+          "dimension": "board_meeting",
+          "category": "no_match",
+          "difficulty": "easy",
+          "passed": true,
+          "expected": "not_board",
+          "actual": "not_board",
+          "duration_ms": 0.0032,
+          "root_cause": "none",
+          "detail": "input='讨论一下市场策略' matched=False board_mode=False",
+          "consistency": 1.0
+        },
+        {
+          "task_id": "bd-011",
+          "dimension": "board_meeting",
+          "category": "no_match",
+          "difficulty": "easy",
+          "passed": true,
+          "expected": "not_board",
+          "actual": "not_board",
+          "duration_ms": 0.0032,
+          "root_cause": "none",
+          "detail": "input='@team:analyst,writer 协作完成任务' matched=False board_mode=False",
+          "consistency": 1.0
+        },
+        {
+          "task_id": "bd-012",
+          "dimension": "board_meeting",
+          "category": "no_match",
+          "difficulty": "easy",
+          "passed": true,
+          "expected": "not_board",
+          "actual": "not_board",
+          "duration_ms": 0.0031,
+          "root_cause": "none",
+          "detail": "input='@skill:react_agent 查看ip' matched=False board_mode=False",
+          "consistency": 1.0
+        },
+        {
+          "task_id": "bd-013",
+          "dimension": "board_meeting",
+          "category": "name_validation",
+          "difficulty": "medium",
+          "passed": true,
+          "expected": "2_valid",
+          "actual": "2_valid",
+          "duration_ms": 0.0103,
+          "root_cause": "none",
+          "detail": "input='@board:elon_musk,jeff_bezos 主题' experts=['elon_musk', 'jeff_bezos'] max=10",
+          "consistency": 1.0
+        },
+        {
+          "task_id": "bd-014",
+          "dimension": "board_meeting",
+          "category": "name_validation",
+          "difficulty": "medium",
+          "passed": true,
+          "expected": "default_fallback",
+          "actual": "default_fallback",
+          "duration_ms": 0.2183,
+          "root_cause": "none",
+          "detail": "input='@board:@#$ 主题' experts=['elon_musk', 'jeff_bezos', 'allenzhang', 'charlie_munger', 'paul_graham'] max=10",
+          "consistency": 1.0
+        },
+        {
+          "task_id": "bd-015",
+          "dimension": "board_meeting",
+          "category": "name_validation",
+          "difficulty": "medium",
+          "passed": true,
+          "expected": "10_capped",
+          "actual": "10_capped",
+          "duration_ms": 0.0168,
+          "root_cause": "none",
+          "detail": "input='@board:a,b,c,d,e,f,g,h,i,j,k 主题' experts=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] max=10",
+          "consistency": 1.0
+        },
+        {
+          "task_id": "bd-016",
+          "dimension": "board_meeting",
+          "category": "stop_command",
+          "difficulty": "easy",
+          "passed": true,
+          "expected": "is_stop",
+          "actual": "is_stop",
+          "duration_ms": 1.3858,
+          "root_cause": "none",
+          "detail": "input='/stop' stop_commands=frozenset({'结束讨论', '停止讨论', 'stop', '/stop'})",
+          "consistency": 1.0
+        },
+        {
+          "task_id": "bd-017",
+          "dimension": "board_meeting",
+          "category": "stop_command",
+          "difficulty": "easy",
+          "passed": true,
+          "expected": "is_stop",
+          "actual": "is_stop",
+          "duration_ms": 0.0102,
+          "root_cause": "none",
+          "detail": "input='停止讨论' stop_commands=frozenset({'结束讨论', '停止讨论', 'stop', '/stop'})",
+          "consistency": 1.0
+        },
+        {
+          "task_id": "bd-018",
+          "dimension": "board_meeting",
+          "category": "stop_command",
+          "difficulty": "easy",
+          "passed": true,
+          "expected": "not_stop",
+          "actual": "not_stop",
+          "duration_ms": 0.0022,
+          "root_cause": "none",
+          "detail": "input='继续讨论' stop_commands=frozenset({'结束讨论', '停止讨论', 'stop', '/stop'})",
           "consistency": 1.0
         }
       ]
     }
-  },
-  "baseline_comparison": {
-    "status": "compared",
-    "dimensions": {
-      "preprocessing": {
-        "baseline_accuracy": 1.0,
-        "current_accuracy": 1.0,
-        "change": 0.0,
-        "direction": "—"
-      },
-      "overfitting": {
-        "baseline_accuracy": 1.0,
-        "current_accuracy": 1.0,
-        "change": 0.0,
-        "direction": "—"
-      },
-      "efficiency": {
-        "baseline_accuracy": 1.0,
-        "current_accuracy": 1.0,
-        "change": 0.0,
-        "direction": "—"
-      },
-      "tool_search": {
-        "baseline_accuracy": 1.0,
-        "current_accuracy": 1.0,
-        "change": 0.0,
-        "direction": "—"
-      },
-      "event_model": {
-        "baseline_accuracy": 1.0,
-        "current_accuracy": 1.0,
-        "change": 0.0,
-        "direction": "—"
-      },
-      "spec_management": {
-        "baseline_accuracy": 1.0,
-        "current_accuracy": 1.0,
-        "change": 0.0,
-        "direction": "—"
-      },
-      "verification": {
-        "baseline_accuracy": 1.0,
-        "current_accuracy": 1.0,
-        "change": 0.0,
-        "direction": "—"
-      },
-      "llm_reasoning": {
-        "baseline_accuracy": 0.0,
-        "current_accuracy": 0.8,
-        "change": 0.8,
-        "direction": "↑"
-      },
-      "gui_integration": {
-        "baseline_accuracy": 0.0,
-        "current_accuracy": 1.0,
-        "change": 1.0,
-        "direction": "↑"
-      }
-    }
   }
 }
\ No newline at end of file
diff --git a/test-results/benchmark/benchmark_report.md b/test-results/benchmark/benchmark_report.md
index fd51ea8..1107aa1 100644
--- a/test-results/benchmark/benchmark_report.md
+++ b/test-results/benchmark/benchmark_report.md
@@ -1,11 +1,11 @@
 # AgentKit 能力基准测试报告
 
 ## 测试概要
-- 时间: 2026-06-17T05:29:35.443678+00:00
+- 时间: 2026-06-17T15:47:33.591101+00:00
 - 版本: 0.1.0
-- 模式: all
+- 模式: mock
 - 运行次数: 1
-- 总体准确率: 98.4% ± 0.0%
+- 总体准确率: 100.0% ± 0.0%
 
 ## 与行业 Benchmark 对比
 
@@ -26,9 +26,9 @@
 | Precision | 100.0% |
 | Recall | 100.0% |
 | F1 | 100.0% |
-| Latency p50 | 0.02ms |
+| Latency p50 | 0.01ms |
 | Latency p95 | 0.07ms |
-| Latency p99 | 0.13ms |
+| Latency p99 | 0.11ms |
 | Consistency | 100.0% |
 | Total / Pass / Fail | 15 / 15 / 0 |
 
@@ -58,9 +58,9 @@
 | Precision | 100.0% |
 | Recall | 100.0% |
 | F1 | 100.0% |
-| Latency p50 | 0.04ms |
-| Latency p95 | 0.05ms |
-| Latency p99 | 0.05ms |
+| Latency p50 | 0.01ms |
+| Latency p95 | 0.03ms |
+| Latency p99 | 0.03ms |
 | Consistency | 100.0% |
 | Total / Pass / Fail | 5 / 5 / 0 |
 
@@ -91,9 +91,9 @@
 | Precision | 0.0% |
 | Recall | 0.0% |
 | F1 | 0.0% |
-| Latency p50 | 0.43ms |
-| Latency p95 | 0.79ms |
-| Latency p99 | 0.85ms |
+| Latency p50 | 0.33ms |
+| Latency p95 | 0.64ms |
+| Latency p99 | 0.67ms |
 | Consistency | 100.0% |
 | Total / Pass / Fail | 5 / 5 / 0 |
 
@@ -120,9 +120,9 @@
 | Precision | 83.3% |
 | Recall | 83.3% |
 | F1 | 83.3% |
-| Latency p50 | 0.03ms |
-| Latency p95 | 0.03ms |
-| Latency p99 | 0.03ms |
+| Latency p50 | 0.01ms |
+| Latency p95 | 0.02ms |
+| Latency p99 | 0.02ms |
 | Consistency | 100.0% |
 | Total / Pass / Fail | 10 / 10 / 0 |
 
@@ -151,9 +151,9 @@
 | Precision | 0.0% |
 | Recall | 0.0% |
 | F1 | 0.0% |
-| Latency p50 | 0.07ms |
-| Latency p95 | 15.49ms |
-| Latency p99 | 19.58ms |
+| Latency p50 | 0.05ms |
+| Latency p95 | 15.87ms |
+| Latency p99 | 20.08ms |
 | Consistency | 100.0% |
 | Total / Pass / Fail | 6 / 6 / 0 |
 
@@ -179,9 +179,9 @@
 | Precision | 0.0% |
 | Recall | 0.0% |
 | F1 | 0.0% |
-| Latency p50 | 1.66ms |
-| Latency p95 | 3.54ms |
-| Latency p99 | 3.84ms |
+| Latency p50 | 1.94ms |
+| Latency p95 | 2.94ms |
+| Latency p99 | 3.25ms |
 | Consistency | 100.0% |
 | Total / Pass / Fail | 7 / 7 / 0 |
 
@@ -208,9 +208,9 @@
 | Precision | 0.0% |
 | Recall | 0.0% |
 | F1 | 0.0% |
-| Latency p50 | 21.36ms |
-| Latency p95 | 47.96ms |
-| Latency p99 | 50.77ms |
+| Latency p50 | 22.22ms |
+| Latency p95 | 47.79ms |
+| Latency p99 | 50.93ms |
 | Consistency | 100.0% |
 | Total / Pass / Fail | 5 / 5 / 0 |
 
@@ -230,92 +230,39 @@
 | easy | 2 | 2 | 100.0% |
 | medium | 3 | 3 | 100.0% |
 
-### 8. LLM 推理能力 (LLM Reasoning) [LLM]
-
-| 指标 | 值 |
-|---|---|
-| Accuracy | 80.0% ± 0.0% |
-| 95% CI | [37.5%, 96.4%] |
-| Precision | 0.0% |
-| Recall | 0.0% |
-| F1 | 0.0% |
-| Latency p50 | 37450.29ms |
-| Latency p95 | 41462.66ms |
-| Latency p99 | 41970.80ms |
-| Consistency | 100.0% |
-| Total / Pass / Fail | 5 / 4 / 1 |
-
-#### 按类别分布
-
-| 类别 | 用例数 | 通过 | 准确率 |
-|---|---|---|---|
-| intent_understanding | 1 | 0 | 0.0% |
-| tool_selection | 1 | 1 | 100.0% |
-| multi_step | 1 | 1 | 100.0% |
-| code_generation | 1 | 1 | 100.0% |
-| error_recovery | 1 | 1 | 100.0% |
-
-#### 按难度分布
-
-| 难度 | 用例数 | 通过 | 准确率 |
-|---|---|---|---|
-| easy | 1 | 0 | 0.0% |
-| medium | 2 | 2 | 100.0% |
-| hard | 2 | 2 | 100.0% |
-
-#### 失败用例分析
-
-| 用例 ID | 类别 | 难度 | 期望 | 实际 | 根因 |
-|---|---|---|---|---|---|
-| llm-001 | intent_understanding | easy | react | timeout | timeout |
-
-### 9. GUI 集成测试 (GUI Integration) [GUI]
+### 8. 私董会路由 (Board Meeting Routing) [Mock]
 
 | 指标 | 值 |
 |---|---|
 | Accuracy | 100.0% ± 0.0% |
-| 95% CI | [56.5%, 100.0%] |
+| 95% CI | [82.4%, 100.0%] |
 | Precision | 100.0% |
 | Recall | 100.0% |
 | F1 | 100.0% |
-| Latency p50 | 0.00ms |
-| Latency p95 | 0.00ms |
-| Latency p99 | 0.00ms |
+| Latency p50 | 0.01ms |
+| Latency p95 | 0.39ms |
+| Latency p99 | 1.19ms |
 | Consistency | 100.0% |
-| Total / Pass / Fail | 5 / 5 / 0 |
+| Total / Pass / Fail | 18 / 18 / 0 |
 
 #### 按类别分布
 
 | 类别 | 用例数 | 通过 | 准确率 |
 |---|---|---|---|
-| service_startup | 1 | 1 | 100.0% |
-| api_availability | 2 | 2 | 100.0% |
-| websocket | 1 | 1 | 100.0% |
-| frontend | 1 | 1 | 100.0% |
+| default_template | 3 | 3 | 100.0% |
+| explicit_experts | 3 | 3 | 100.0% |
+| topic_extraction | 3 | 3 | 100.0% |
+| no_match | 3 | 3 | 100.0% |
+| name_validation | 3 | 3 | 100.0% |
+| stop_command | 3 | 3 | 100.0% |
 
 #### 按难度分布
 
 | 难度 | 用例数 | 通过 | 准确率 |
 |---|---|---|---|
-| easy | 2 | 2 | 100.0% |
-| medium | 2 | 2 | 100.0% |
-| hard | 1 | 1 | 100.0% |
-
-## 基线对比
-
-| 维度 | 基线准确率 | 当前准确率 | 变化 |
-|---|---|---|---|
-| preprocessing | 100.0% | 100.0% | — |
-| overfitting | 100.0% | 100.0% | — |
-| efficiency | 100.0% | 100.0% | — |
-| tool_search | 100.0% | 100.0% | — |
-| event_model | 100.0% | 100.0% | — |
-| spec_management | 100.0% | 100.0% | — |
-| verification | 100.0% | 100.0% | — |
-| llm_reasoning | 0.0% | 80.0% | ↑ |
-| gui_integration | 0.0% | 100.0% | ↑ |
+| easy | 11 | 11 | 100.0% |
+| medium | 7 | 7 | 100.0% |
 
 ## 问题总结与改进建议
 
-- **llm_reasoning**: 准确率 80.0% 低于 90%，建议检查失败用例并优化
-- **llm_reasoning**: P95 延迟 41462.66ms 较高，建议优化性能
+- 所有维度表现良好，无需特别改进。
diff --git a/test-results/e2e/capability_report.json b/test-results/e2e/capability_report.json
new file mode 100644
index 0000000..0bd22df
--- /dev/null
+++ b/test-results/e2e/capability_report.json
@@ -0,0 +1,3213 @@
+{
+  "generated_at": "2026-06-15T16:59:06.575194+00:00",
+  "total_observations": 82,
+  "overall_skill_recall": 0.9,
+  "overall_skill_precision": 0.9,
+  "overall_skill_f1": 0.9,
+  "overall_execution_mode_accuracy": 0.4038,
+  "overall_task_success_rate": 1.0,
+  "category_metrics": [
+    {
+      "category": "routing",
+      "subcategory": "explicit_prefix",
+      "total": 1,
+      "skill_correct": 1,
+      "skill_recall": 1.0,
+      "skill_precision": 1.0,
+      "skill_f1": 1.0,
+      "execution_mode_correct": 1,
+      "execution_mode_accuracy": 1.0,
+      "complexity_correct": 0,
+      "complexity_accuracy": 0.0,
+      "task_success_rate": 1.0,
+      "avg_response_time_ms": 0.05
+    },
+    {
+      "category": "routing",
+      "subcategory": "greeting",
+      "total": 2,
+      "skill_correct": 2,
+      "skill_recall": 1.0,
+      "skill_precision": 1.0,
+      "skill_f1": 1.0,
+      "execution_mode_correct": 2,
+      "execution_mode_accuracy": 1.0,
+      "complexity_correct": 2,
+      "complexity_accuracy": 1.0,
+      "task_success_rate": 1.0,
+      "avg_response_time_ms": 0.03
+    },
+    {
+      "category": "routing",
+      "subcategory": "identity",
+      "total": 1,
+      "skill_correct": 1,
+      "skill_recall": 1.0,
+      "skill_precision": 1.0,
+      "skill_f1": 1.0,
+      "execution_mode_correct": 1,
+      "execution_mode_accuracy": 1.0,
+      "complexity_correct": 1,
+      "complexity_accuracy": 1.0,
+      "task_success_rate": 1.0,
+      "avg_response_time_ms": 0.02
+    },
+    {
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "total": 62,
+      "skill_correct": 21,
+      "skill_recall": 0.6774,
+      "skill_precision": 0.6774,
+      "skill_f1": 0.6774,
+      "execution_mode_correct": 32,
+      "execution_mode_accuracy": 0.5161,
+      "complexity_correct": 22,
+      "complexity_accuracy": 0.3548,
+      "task_success_rate": 1.0,
+      "avg_response_time_ms": 4449.27
+    },
+    {
+      "category": "semantic_router",
+      "subcategory": "colloquial_match",
+      "total": 5,
+      "skill_correct": 5,
+      "skill_recall": 1.0,
+      "skill_precision": 1.0,
+      "skill_f1": 1.0,
+      "execution_mode_correct": 0,
+      "execution_mode_accuracy": 0.0,
+      "complexity_correct": 1,
+      "complexity_accuracy": 0.2,
+      "task_success_rate": 1.0,
+      "avg_response_time_ms": 2410.72
+    },
+    {
+      "category": "semantic_router",
+      "subcategory": "description_match",
+      "total": 8,
+      "skill_correct": 4,
+      "skill_recall": 1.0,
+      "skill_precision": 1.0,
+      "skill_f1": 1.0,
+      "execution_mode_correct": 3,
+      "execution_mode_accuracy": 0.375,
+      "complexity_correct": 2,
+      "complexity_accuracy": 0.25,
+      "task_success_rate": 1.0,
+      "avg_response_time_ms": 891.55
+    },
+    {
+      "category": "semantic_router",
+      "subcategory": "mixed_lang_match",
+      "total": 3,
+      "skill_correct": 2,
+      "skill_recall": 1.0,
+      "skill_precision": 1.0,
+      "skill_f1": 1.0,
+      "execution_mode_correct": 1,
+      "execution_mode_accuracy": 0.3333,
+      "complexity_correct": 2,
+      "complexity_accuracy": 0.6667,
+      "task_success_rate": 1.0,
+      "avg_response_time_ms": 0.89
+    }
+  ],
+  "overfitting_results": [
+    {
+      "benchmark_id": "route-kw-direct-001",
+      "original_correct": true,
+      "paraphrase_results": [
+        true,
+        true,
+        true
+      ],
+      "consistency_rate": 1.0,
+      "is_overfitted": false
+    },
+    {
+      "benchmark_id": "route-kw-direct-002",
+      "original_correct": true,
+      "paraphrase_results": [
+        false,
+        true,
+        true
+      ],
+      "consistency_rate": 0.6667,
+      "is_overfitted": false
+    },
+    {
+      "benchmark_id": "route-kw-direct-003",
+      "original_correct": true,
+      "paraphrase_results": [
+        true,
+        false,
+        true
+      ],
+      "consistency_rate": 0.6667,
+      "is_overfitted": false
+    },
+    {
+      "benchmark_id": "route-kw-react-001",
+      "original_correct": false,
+      "paraphrase_results": [
+        false,
+        false,
+        false
+      ],
+      "consistency_rate": 1.0,
+      "is_overfitted": false
+    },
+    {
+      "benchmark_id": "route-kw-react-002",
+      "original_correct": false,
+      "paraphrase_results": [
+        false,
+        false,
+        true
+      ],
+      "consistency_rate": 0.6667,
+      "is_overfitted": false
+    },
+    {
+      "benchmark_id": "route-kw-react-003",
+      "original_correct": false,
+      "paraphrase_results": [
+        false,
+        false,
+        false
+      ],
+      "consistency_rate": 1.0,
+      "is_overfitted": false
+    },
+    {
+      "benchmark_id": "route-kw-rewoo-001",
+      "original_correct": false,
+      "paraphrase_results": [
+        true,
+        false,
+        false
+      ],
+      "consistency_rate": 0.6667,
+      "is_overfitted": false
+    },
+    {
+      "benchmark_id": "route-kw-rewoo-002",
+      "original_correct": false,
+      "paraphrase_results": [
+        false,
+        false,
+        false
+      ],
+      "consistency_rate": 1.0,
+      "is_overfitted": false
+    },
+    {
+      "benchmark_id": "route-kw-reflex-001",
+      "original_correct": false,
+      "paraphrase_results": [
+        false,
+        false,
+        false
+      ],
+      "consistency_rate": 1.0,
+      "is_overfitted": false
+    },
+    {
+      "benchmark_id": "route-kw-reflex-002",
+      "original_correct": false,
+      "paraphrase_results": [
+        false,
+        false,
+        false
+      ],
+      "consistency_rate": 1.0,
+      "is_overfitted": false
+    }
+  ],
+  "overfitting_score": 0.2222,
+  "weaknesses": [
+    {
+      "dimension": "semantic_router",
+      "subcategory": "colloquial_match",
+      "severity": "high",
+      "description": "执行模式准确率过低 (0.00%)，子类别: colloquial_match",
+      "evidence": "正确数=0/5",
+      "suggestion": "检查复杂度估算和模式选择逻辑"
+    },
+    {
+      "dimension": "semantic_router",
+      "subcategory": "description_match",
+      "severity": "high",
+      "description": "执行模式准确率过低 (37.50%)，子类别: description_match",
+      "evidence": "正确数=3/8",
+      "suggestion": "检查复杂度估算和模式选择逻辑"
+    },
+    {
+      "dimension": "semantic_router",
+      "subcategory": "mixed_lang_match",
+      "severity": "high",
+      "description": "执行模式准确率过低 (33.33%)，子类别: mixed_lang_match",
+      "evidence": "正确数=1/3",
+      "suggestion": "检查复杂度估算和模式选择逻辑"
+    },
+    {
+      "dimension": "routing",
+      "subcategory": "keyword_match",
+      "severity": "medium",
+      "description": "技能路由F1偏低 (0.68)，子类别: keyword_match",
+      "evidence": "召回率=67.74%, 精确率=67.74%, 样本数=62",
+      "suggestion": "微调路由阈值或增加更多意图示例"
+    },
+    {
+      "dimension": "routing",
+      "subcategory": "keyword_match",
+      "severity": "medium",
+      "description": "执行模式准确率过低 (51.61%)，子类别: keyword_match",
+      "evidence": "正确数=32/62",
+      "suggestion": "检查复杂度估算和模式选择逻辑"
+    }
+  ],
+  "root_causes": [
+    {
+      "cause_type": "complexity_misjudge",
+      "cause_description": "复杂度估算偏差：倾向高估复杂度（将简单任务误判为需要多步推理）",
+      "confidence": 0.75,
+      "affected_cases": [
+        "route-kw-rewoo-001",
+        "route-kw-rewoo-002",
+        "route-kw-reflex-001",
+        "route-kw-reflex-002",
+        "route-kw-planexec-001",
+        "route-kw-coderev-001",
+        "route-kw-geo-001",
+        "route-kw-deai-001",
+        "route-kw-content-001",
+        "route-kw-citation-001"
+      ],
+      "detail": "共 31 个执行模式判断错误。低估复杂度 0 次，高估复杂度 1 次。受影响子类别: description_match, mixed_lang_match, colloquial_match, keyword_match"
+    },
+    {
+      "cause_type": "intent_ambiguous",
+      "cause_description": "意图歧义：不同技能的关键词/意图描述重叠，导致路由混淆",
+      "confidence": 0.7,
+      "affected_cases": [
+        "route-kw-rewoo-001",
+        "route-kw-rewoo-001",
+        "route-kw-reflex-001"
+      ],
+      "detail": "技能混淆对: rewoo_agent→competitor_analyzer(2次); reflexion_agent→code_reviewer(1次)"
+    },
+    {
+      "cause_type": "quality_threshold",
+      "cause_description": "质量门控阈值过低：任务虽成功完成但输出了错误结果",
+      "confidence": 0.6,
+      "affected_cases": [
+        "route-kw-rewoo-001",
+        "route-kw-rewoo-001",
+        "route-kw-reflex-001"
+      ],
+      "detail": "共 3 个任务虽然HTTP成功但路由到了错误技能。质量门控未能拦截这些错误路由的结果。"
+    }
+  ],
+  "improvement_plans": [
+    {
+      "weakness_description": "意图歧义：不同技能的关键词/意图描述重叠，导致路由混淆",
+      "root_causes": [
+        {
+          "cause_type": "intent_ambiguous",
+          "cause_description": "意图歧义：不同技能的关键词/意图描述重叠，导致路由混淆",
+          "confidence": 0.7,
+          "affected_cases": [
+            "route-kw-rewoo-001",
+            "route-kw-rewoo-001",
+            "route-kw-reflex-001"
+          ],
+          "detail": "技能混淆对: rewoo_agent→competitor_analyzer(2次); reflexion_agent→code_reviewer(1次)"
+        }
+      ],
+      "actions": [
+        {
+          "action_id": "ACT-001",
+          "title": "为易混淆技能添加互斥关键词",
+          "description": "在技能配置中为容易混淆的技能对添加互斥关键词（disambiguation_keywords），当用户输入同时匹配多个技能时，优先选择包含互斥关键词的技能。",
+          "target_module": "configs/skills/*.yaml → intent.disambiguation_keywords",
+          "priority": "P1",
+          "expected_impact": "预计提升精确率 10~25%，减少技能混淆",
+          "effort": "small",
+          "related_causes": [
+            "intent_ambiguous"
+          ],
+          "verification": "运行歧义消解回测，验证路由精确率提升"
+        },
+        {
+          "action_id": "ACT-002",
+          "title": "实现LLM二次分类消歧",
+          "description": "当 Layer 0/1 路由到多个候选技能时，调用 LLM quick_classify 进行二次意图判断，选择最匹配的技能。",
+          "target_module": "src/agentkit/chat/skill_routing.py → Layer 1",
+          "priority": "P2",
+          "expected_impact": "预计提升精确率 15~30%，但增加 ~500ms 延迟和 ~100 tokens",
+          "effort": "medium",
+          "related_causes": [
+            "intent_ambiguous"
+          ],
+          "verification": "运行歧义消解回测，对比延迟和精确率变化"
+        }
+      ],
+      "overall_strategy": "短期：添加互斥关键词消歧；中期：启用LLM二次分类；长期：训练专用意图分类模型替代规则匹配"
+    },
+    {
+      "weakness_description": "复杂度估算偏差：倾向高估复杂度（将简单任务误判为需要多步推理）",
+      "root_causes": [
+        {
+          "cause_type": "complexity_misjudge",
+          "cause_description": "复杂度估算偏差：倾向高估复杂度（将简单任务误判为需要多步推理）",
+          "confidence": 0.75,
+          "affected_cases": [
+            "route-kw-rewoo-001",
+            "route-kw-rewoo-002",
+            "route-kw-reflex-001",
+            "route-kw-reflex-002",
+            "route-kw-planexec-001",
+            "route-kw-coderev-001",
+            "route-kw-geo-001",
+            "route-kw-deai-001",
+            "route-kw-content-001",
+            "route-kw-citation-001"
+          ],
+          "detail": "共 31 个执行模式判断错误。低估复杂度 0 次，高估复杂度 1 次。受影响子类别: description_match, mixed_lang_match, colloquial_match, keyword_match"
+        }
+      ],
+      "actions": [
+        {
+          "action_id": "ACT-003",
+          "title": "优化复杂度估算启发式规则",
+          "description": "调整 HeuristicClassifier 的复杂度评分权重：增加任务动词（分析/研究/设计）的权重，降低简单问答动词（是什么/多少）的权重。",
+          "target_module": "src/agentkit/chat/skill_routing.py → HeuristicClassifier",
+          "priority": "P1",
+          "expected_impact": "预计提升执行模式准确率 10~20%",
+          "effort": "small",
+          "related_causes": [
+            "complexity_misjudge"
+          ],
+          "verification": "运行执行模式回测，验证准确率提升"
+        },
+        {
+          "action_id": "ACT-004",
+          "title": "引入任务复杂度校准数据集",
+          "description": "收集标注了复杂度等级的真实用户查询，构建校准数据集，定期评估和调整复杂度阈值。",
+          "target_module": "tests/e2e/benchmark_dataset.py",
+          "priority": "P2",
+          "expected_impact": "持续提升复杂度判断准确性",
+          "effort": "medium",
+          "related_causes": [
+            "complexity_misjudge"
+          ],
+          "verification": "每次调整后运行回测，对比前后F1变化"
+        }
+      ],
+      "overall_strategy": "短期：调整启发式规则权重；中期：构建复杂度校准数据集；长期：训练复杂度评估模型替代规则"
+    },
+    {
+      "weakness_description": "质量门控阈值过低：任务虽成功完成但输出了错误结果",
+      "root_causes": [
+        {
+          "cause_type": "quality_threshold",
+          "cause_description": "质量门控阈值过低：任务虽成功完成但输出了错误结果",
+          "confidence": 0.6,
+          "affected_cases": [
+            "route-kw-rewoo-001",
+            "route-kw-rewoo-001",
+            "route-kw-reflex-001"
+          ],
+          "detail": "共 3 个任务虽然HTTP成功但路由到了错误技能。质量门控未能拦截这些错误路由的结果。"
+        }
+      ],
+      "actions": [
+        {
+          "action_id": "ACT-005",
+          "title": "增强质量门控的技能匹配验证",
+          "description": "在QualityGate中增加技能匹配验证：检查输出是否与路由到的技能的能力范围一致，如果不一致则触发重试或降级。",
+          "target_module": "src/agentkit/quality/gate.py",
+          "priority": "P1",
+          "expected_impact": "减少错误路由导致的低质量输出",
+          "effort": "medium",
+          "related_causes": [
+            "quality_threshold"
+          ],
+          "verification": "运行质量门控回测，验证错误路由拦截率"
+        }
+      ],
+      "overall_strategy": "短期：增加技能匹配验证；中期：引入输出质量评分模型；长期：实现自动质量回归检测"
+    }
+  ],
+  "raw_observations": [
+    {
+      "benchmark_id": "route-edge-greet-001",
+      "test_name": "layer0_route-edge-greet-001",
+      "timestamp": "2026-06-15T16:50:48.898798+00:00",
+      "input_query": "你好",
+      "is_paraphrase": false,
+      "expected_skill": null,
+      "expected_execution_mode": "direct",
+      "expected_complexity": "low",
+      "actual_skill": null,
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "greeting",
+      "response_time_ms": 0.031158037018030882,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-edge-greet-002",
+      "test_name": "layer0_route-edge-greet-002",
+      "timestamp": "2026-06-15T16:50:48.900854+00:00",
+      "input_query": "Good morning!",
+      "is_paraphrase": false,
+      "expected_skill": null,
+      "expected_execution_mode": "direct",
+      "expected_complexity": "low",
+      "actual_skill": null,
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "greeting",
+      "response_time_ms": 0.02913799835368991,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-edge-identity-001",
+      "test_name": "layer0_route-edge-identity-001",
+      "timestamp": "2026-06-15T16:50:48.902316+00:00",
+      "input_query": "你是谁？",
+      "is_paraphrase": false,
+      "expected_skill": null,
+      "expected_execution_mode": "direct",
+      "expected_complexity": "low",
+      "actual_skill": null,
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "identity",
+      "response_time_ms": 0.020894978661090136,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-edge-explicit-001",
+      "test_name": "layer0_route-edge-explicit-001",
+      "timestamp": "2026-06-15T16:50:48.903641+00:00",
+      "input_query": "@skill:react_agent 搜索最新的AI新闻",
+      "is_paraphrase": false,
+      "expected_skill": "react_agent",
+      "expected_execution_mode": "react",
+      "expected_complexity": "high",
+      "actual_skill": "react_agent",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "explicit_prefix",
+      "response_time_ms": 0.049009977374225855,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-direct-001",
+      "test_name": "layer1_route-kw-direct-001",
+      "timestamp": "2026-06-15T16:50:48.905245+00:00",
+      "input_query": "翻译这段话",
+      "is_paraphrase": false,
+      "expected_skill": "direct_agent",
+      "expected_execution_mode": "direct",
+      "expected_complexity": "low",
+      "actual_skill": "direct_agent",
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 0.22018200252205133,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-direct-002",
+      "test_name": "layer1_route-kw-direct-002",
+      "timestamp": "2026-06-15T16:51:05.483207+00:00",
+      "input_query": "帮我总结一下",
+      "is_paraphrase": false,
+      "expected_skill": "direct_agent",
+      "expected_execution_mode": "direct",
+      "expected_complexity": "low",
+      "actual_skill": "direct_agent",
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 16576.47029601503,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-direct-003",
+      "test_name": "layer1_route-kw-direct-003",
+      "timestamp": "2026-06-15T16:51:05.485175+00:00",
+      "input_query": "什么是RAG？",
+      "is_paraphrase": false,
+      "expected_skill": "direct_agent",
+      "expected_execution_mode": "direct",
+      "expected_complexity": "low",
+      "actual_skill": "direct_agent",
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 0.1331570092588663,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-react-001",
+      "test_name": "layer1_route-kw-react-001",
+      "timestamp": "2026-06-15T16:51:05.489493+00:00",
+      "input_query": "搜索一下AI Agent市场数据",
+      "is_paraphrase": false,
+      "expected_skill": "react_agent",
+      "expected_execution_mode": "react",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": true,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 2.665565989445895,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-react-002",
+      "test_name": "layer1_route-kw-react-002",
+      "timestamp": "2026-06-15T16:51:17.983942+00:00",
+      "input_query": "帮我分析这个数据",
+      "is_paraphrase": false,
+      "expected_skill": "react_agent",
+      "expected_execution_mode": "react",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": true,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 12492.459080996923,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-react-003",
+      "test_name": "layer1_route-kw-react-003",
+      "timestamp": "2026-06-15T16:51:17.987215+00:00",
+      "input_query": "实时监控竞品动态",
+      "is_paraphrase": false,
+      "expected_skill": "react_agent",
+      "expected_execution_mode": "react",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": true,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 1.6594339977018535,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-rewoo-001",
+      "test_name": "layer1_route-kw-rewoo-001",
+      "timestamp": "2026-06-15T16:51:17.988662+00:00",
+      "input_query": "采集A、B、C三个竞品的功能数据",
+      "is_paraphrase": false,
+      "expected_skill": "rewoo_agent",
+      "expected_execution_mode": "rewoo",
+      "expected_complexity": "high",
+      "actual_skill": "competitor_analyzer",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": false,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 0.11222198372706771,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-rewoo-002",
+      "test_name": "layer1_route-kw-rewoo-002",
+      "timestamp": "2026-06-15T16:51:30.397745+00:00",
+      "input_query": "并行搜索多个关键词",
+      "is_paraphrase": false,
+      "expected_skill": "rewoo_agent",
+      "expected_execution_mode": "rewoo",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 12407.641994010191,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-reflex-001",
+      "test_name": "layer1_route-kw-reflex-001",
+      "timestamp": "2026-06-15T16:51:30.401715+00:00",
+      "input_query": "审查这段代码的合规性",
+      "is_paraphrase": false,
+      "expected_skill": "reflexion_agent",
+      "expected_execution_mode": "reflexion",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 2.146629965864122,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-reflex-002",
+      "test_name": "layer1_route-kw-reflex-002",
+      "timestamp": "2026-06-15T16:51:41.776746+00:00",
+      "input_query": "生成一个高精度的数据分析脚本",
+      "is_paraphrase": false,
+      "expected_skill": "reflexion_agent",
+      "expected_execution_mode": "reflexion",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 11373.252779012546,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-planexec-001",
+      "test_name": "layer1_route-kw-planexec-001",
+      "timestamp": "2026-06-15T16:51:41.781653+00:00",
+      "input_query": "生成一份市场分析报告",
+      "is_paraphrase": false,
+      "expected_skill": "plan_exec_agent",
+      "expected_execution_mode": "plan_exec",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 2.6980440015904605,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-planexec-002",
+      "test_name": "layer1_route-kw-planexec-002",
+      "timestamp": "2026-06-15T16:51:58.323009+00:00",
+      "input_query": "规划产品优化方案",
+      "is_paraphrase": false,
+      "expected_skill": "plan_exec_agent",
+      "expected_execution_mode": "plan_exec",
+      "expected_complexity": "high",
+      "actual_skill": "plan_exec_agent",
+      "actual_execution_mode": "plan_exec",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 16539.517820987385,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-coderev-001",
+      "test_name": "layer1_route-kw-coderev-001",
+      "timestamp": "2026-06-15T16:51:58.326721+00:00",
+      "input_query": "Review this code for quality",
+      "is_paraphrase": false,
+      "expected_skill": "code_reviewer",
+      "expected_execution_mode": "direct",
+      "expected_complexity": "low",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 2.0271629909984767,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-geo-001",
+      "test_name": "layer1_route-kw-geo-001",
+      "timestamp": "2026-06-15T16:51:58.328807+00:00",
+      "input_query": "帮我优化这篇文章的SEO",
+      "is_paraphrase": false,
+      "expected_skill": "geo_optimizer",
+      "expected_execution_mode": "llm_generate",
+      "expected_complexity": "low",
+      "actual_skill": "geo_optimizer",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": false,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 0.13758597197011113,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-deai-001",
+      "test_name": "layer1_route-kw-deai-001",
+      "timestamp": "2026-06-15T16:51:58.331082+00:00",
+      "input_query": "帮我把这篇文章去AI化",
+      "is_paraphrase": false,
+      "expected_skill": "deai_agent",
+      "expected_execution_mode": "llm_generate",
+      "expected_complexity": "low",
+      "actual_skill": "deai_agent",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": false,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 0.13313599629327655,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-content-001",
+      "test_name": "layer1_route-kw-content-001",
+      "timestamp": "2026-06-15T16:52:04.523209+00:00",
+      "input_query": "帮我写一篇关于AI的文章",
+      "is_paraphrase": false,
+      "expected_skill": "content_generator",
+      "expected_execution_mode": "llm_generate",
+      "expected_complexity": "low",
+      "actual_skill": "content_generator",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": false,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 6190.30976598151,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-citation-001",
+      "test_name": "layer1_route-kw-citation-001",
+      "timestamp": "2026-06-15T16:52:04.527767+00:00",
+      "input_query": "检测我们的品牌在AI平台的引用情况",
+      "is_paraphrase": false,
+      "expected_skill": "citation_detector",
+      "expected_execution_mode": "custom",
+      "expected_complexity": "medium",
+      "actual_skill": null,
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 2.5777029804885387,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-trend-001",
+      "test_name": "layer1_route-kw-trend-001",
+      "timestamp": "2026-06-15T16:52:12.996375+00:00",
+      "input_query": "分析品牌趋势",
+      "is_paraphrase": false,
+      "expected_skill": "trend_agent",
+      "expected_execution_mode": "tool_call",
+      "expected_complexity": "medium",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 8466.026534966659,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-competitor-001",
+      "test_name": "layer1_route-kw-competitor-001",
+      "timestamp": "2026-06-15T16:52:13.000816+00:00",
+      "input_query": "分析我的竞品策略",
+      "is_paraphrase": false,
+      "expected_skill": "competitor_analyzer",
+      "expected_execution_mode": "tool_call",
+      "expected_complexity": "medium",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 2.310166019015014,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-schema-001",
+      "test_name": "layer1_route-kw-schema-001",
+      "timestamp": "2026-06-15T16:52:13.002942+00:00",
+      "input_query": "帮我优化Schema",
+      "is_paraphrase": false,
+      "expected_skill": "schema_advisor",
+      "expected_execution_mode": "custom",
+      "expected_complexity": "medium",
+      "actual_skill": "schema_advisor",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 0.13258098624646664,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-monitor-001",
+      "test_name": "layer1_route-kw-monitor-001",
+      "timestamp": "2026-06-15T16:52:13.004811+00:00",
+      "input_query": "监测品牌引用变化",
+      "is_paraphrase": false,
+      "expected_skill": "monitor",
+      "expected_execution_mode": "custom",
+      "expected_complexity": "medium",
+      "actual_skill": "monitor",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 0.16177998622879386,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-goal-001",
+      "test_name": "layer1_route-kw-goal-001",
+      "timestamp": "2026-06-15T16:52:23.690257+00:00",
+      "input_query": "分析竞品SEO策略并生成优化方案",
+      "is_paraphrase": false,
+      "expected_skill": "goal_driven_agent",
+      "expected_execution_mode": "tool_call",
+      "expected_complexity": "medium",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 10683.721612032969,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "semantic-direct-001",
+      "test_name": "semantic_semantic-direct-001",
+      "timestamp": "2026-06-15T16:52:23.694450+00:00",
+      "input_query": "简单生成任务，无需工具调用",
+      "is_paraphrase": false,
+      "expected_skill": "direct_agent",
+      "expected_execution_mode": "direct",
+      "expected_complexity": "low",
+      "actual_skill": null,
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "semantic_router",
+      "subcategory": "description_match",
+      "response_time_ms": 2.3536229855380952,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "semantic-react-001",
+      "test_name": "semantic_semantic-react-001",
+      "timestamp": "2026-06-15T16:52:28.222957+00:00",
+      "input_query": "需要动态适应、逐步推理和工具调用",
+      "is_paraphrase": false,
+      "expected_skill": "react_agent",
+      "expected_execution_mode": "react",
+      "expected_complexity": "high",
+      "actual_skill": "react_agent",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "semantic_router",
+      "subcategory": "description_match",
+      "response_time_ms": 4526.445869996678,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "semantic-rewoo-001",
+      "test_name": "semantic_semantic-rewoo-001",
+      "timestamp": "2026-06-15T16:52:28.227266+00:00",
+      "input_query": "多源数据并行采集、无依赖工具调用批量执行",
+      "is_paraphrase": false,
+      "expected_skill": "rewoo_agent",
+      "expected_execution_mode": "rewoo",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "semantic_router",
+      "subcategory": "description_match",
+      "response_time_ms": 2.2753190132789314,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "semantic-reflex-001",
+      "test_name": "semantic_semantic-reflex-001",
+      "timestamp": "2026-06-15T16:52:30.828067+00:00",
+      "input_query": "需要高精度和自我验证的任务",
+      "is_paraphrase": false,
+      "expected_skill": "reflexion_agent",
+      "expected_execution_mode": "reflexion",
+      "expected_complexity": "high",
+      "actual_skill": "reflexion_agent",
+      "actual_execution_mode": "reflexion",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "semantic_router",
+      "subcategory": "description_match",
+      "response_time_ms": 2598.3480180148035,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "semantic-planexec-001",
+      "test_name": "semantic_semantic-planexec-001",
+      "timestamp": "2026-06-15T16:52:30.830628+00:00",
+      "input_query": "结构化多步骤任务，需要可审查的规划和执行",
+      "is_paraphrase": false,
+      "expected_skill": "plan_exec_agent",
+      "expected_execution_mode": "plan_exec",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "semantic_router",
+      "subcategory": "description_match",
+      "response_time_ms": 0.47854799777269363,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "semantic-geo-001",
+      "test_name": "semantic_semantic-geo-001",
+      "timestamp": "2026-06-15T16:52:30.832637+00:00",
+      "input_query": "对文章进行GEO/SEO优化，提升在AI搜索引擎中的可见性",
+      "is_paraphrase": false,
+      "expected_skill": "geo_optimizer",
+      "expected_execution_mode": "llm_generate",
+      "expected_complexity": "low",
+      "actual_skill": "geo_optimizer",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "semantic_router",
+      "subcategory": "description_match",
+      "response_time_ms": 0.18697103951126337,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "semantic-citation-001",
+      "test_name": "semantic_semantic-citation-001",
+      "timestamp": "2026-06-15T16:52:30.836459+00:00",
+      "input_query": "检测品牌在各AI平台回答中的引用情况",
+      "is_paraphrase": false,
+      "expected_skill": "citation_detector",
+      "expected_execution_mode": "custom",
+      "expected_complexity": "medium",
+      "actual_skill": null,
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "semantic_router",
+      "subcategory": "description_match",
+      "response_time_ms": 2.184002019930631,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "semantic-competitor-001",
+      "test_name": "semantic_semantic-competitor-001",
+      "timestamp": "2026-06-15T16:52:30.838268+00:00",
+      "input_query": "分析竞品策略、对比品牌差距或发现竞争机会",
+      "is_paraphrase": false,
+      "expected_skill": "competitor_analyzer",
+      "expected_execution_mode": "tool_call",
+      "expected_complexity": "medium",
+      "actual_skill": "competitor_analyzer",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "semantic_router",
+      "subcategory": "description_match",
+      "response_time_ms": 0.15963200712576509,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "semantic-colloquial-review-001",
+      "test_name": "semantic_semantic-colloquial-review-001",
+      "timestamp": "2026-06-15T16:52:42.892865+00:00",
+      "input_query": "帮我看看代码有没有问题",
+      "is_paraphrase": false,
+      "expected_skill": "code_reviewer",
+      "expected_execution_mode": "react",
+      "expected_complexity": "medium",
+      "actual_skill": "code_reviewer",
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "semantic_router",
+      "subcategory": "colloquial_match",
+      "response_time_ms": 12052.96553100925,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "semantic-colloquial-trend-001",
+      "test_name": "semantic_semantic-colloquial-trend-001",
+      "timestamp": "2026-06-15T16:52:42.895968+00:00",
+      "input_query": "最近市场行情怎么样",
+      "is_paraphrase": false,
+      "expected_skill": "trend_agent",
+      "expected_execution_mode": "tool_call",
+      "expected_complexity": "medium",
+      "actual_skill": "trend_agent",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "semantic_router",
+      "subcategory": "colloquial_match",
+      "response_time_ms": 0.17141696298494935,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "semantic-colloquial-content-001",
+      "test_name": "semantic_semantic-colloquial-content-001",
+      "timestamp": "2026-06-15T16:52:42.899454+00:00",
+      "input_query": "帮我写点东西",
+      "is_paraphrase": false,
+      "expected_skill": "content_generator",
+      "expected_execution_mode": "llm_generate",
+      "expected_complexity": "low",
+      "actual_skill": "content_generator",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": false,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "semantic_router",
+      "subcategory": "colloquial_match",
+      "response_time_ms": 0.17780199414119124,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "semantic-colloquial-citation-001",
+      "test_name": "semantic_semantic-colloquial-citation-001",
+      "timestamp": "2026-06-15T16:52:42.901686+00:00",
+      "input_query": "这个引用对不对",
+      "is_paraphrase": false,
+      "expected_skill": "citation_detector",
+      "expected_execution_mode": "custom",
+      "expected_complexity": "medium",
+      "actual_skill": "citation_detector",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "semantic_router",
+      "subcategory": "colloquial_match",
+      "response_time_ms": 0.13318302808329463,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "semantic-colloquial-competitor-001",
+      "test_name": "semantic_semantic-colloquial-competitor-001",
+      "timestamp": "2026-06-15T16:52:42.903890+00:00",
+      "input_query": "对手怎么样",
+      "is_paraphrase": false,
+      "expected_skill": "competitor_analyzer",
+      "expected_execution_mode": "tool_call",
+      "expected_complexity": "medium",
+      "actual_skill": "competitor_analyzer",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "semantic_router",
+      "subcategory": "colloquial_match",
+      "response_time_ms": 0.12990902177989483,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "semantic-mixed-review-001",
+      "test_name": "semantic_semantic-mixed-review-001",
+      "timestamp": "2026-06-15T16:52:42.908177+00:00",
+      "input_query": "review一下这段代码",
+      "is_paraphrase": false,
+      "expected_skill": "code_reviewer",
+      "expected_execution_mode": "react",
+      "expected_complexity": "medium",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "semantic_router",
+      "subcategory": "mixed_lang_match",
+      "response_time_ms": 2.3870580480434,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "semantic-mixed-geo-001",
+      "test_name": "semantic_semantic-mixed-geo-001",
+      "timestamp": "2026-06-15T16:52:42.910255+00:00",
+      "input_query": "做个SEO优化",
+      "is_paraphrase": false,
+      "expected_skill": "geo_optimizer",
+      "expected_execution_mode": "llm_generate",
+      "expected_complexity": "low",
+      "actual_skill": "geo_optimizer",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": false,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "semantic_router",
+      "subcategory": "mixed_lang_match",
+      "response_time_ms": 0.1305780024267733,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "semantic-mixed-monitor-001",
+      "test_name": "semantic_semantic-mixed-monitor-001",
+      "timestamp": "2026-06-15T16:52:42.912450+00:00",
+      "input_query": "monitor一下系统状态",
+      "is_paraphrase": false,
+      "expected_skill": "monitor",
+      "expected_execution_mode": "tool_call",
+      "expected_complexity": "medium",
+      "actual_skill": "monitor",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "semantic_router",
+      "subcategory": "mixed_lang_match",
+      "response_time_ms": 0.15147699741646647,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-direct-001",
+      "test_name": "para_orig_route-kw-direct-001",
+      "timestamp": "2026-06-15T16:52:42.914808+00:00",
+      "input_query": "翻译这段话",
+      "is_paraphrase": false,
+      "expected_skill": "direct_agent",
+      "expected_execution_mode": "direct",
+      "expected_complexity": "low",
+      "actual_skill": "direct_agent",
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 0.1444679801352322,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-direct-001",
+      "test_name": "para_route-kw-direct-001_0",
+      "timestamp": "2026-06-15T16:52:42.915469+00:00",
+      "input_query": "翻译这段话",
+      "is_paraphrase": true,
+      "expected_skill": "direct_agent",
+      "expected_execution_mode": "direct",
+      "expected_complexity": "low",
+      "actual_skill": "direct_agent",
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 0.12570497347041965,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-direct-001",
+      "test_name": "para_route-kw-direct-001_1",
+      "timestamp": "2026-06-15T16:52:42.915955+00:00",
+      "input_query": "翻译这段话",
+      "is_paraphrase": true,
+      "expected_skill": "direct_agent",
+      "expected_execution_mode": "direct",
+      "expected_complexity": "low",
+      "actual_skill": "direct_agent",
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 0.11087598977610469,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-direct-001",
+      "test_name": "para_route-kw-direct-001_2",
+      "timestamp": "2026-06-15T16:52:42.916417+00:00",
+      "input_query": "翻译这段话",
+      "is_paraphrase": true,
+      "expected_skill": "direct_agent",
+      "expected_execution_mode": "direct",
+      "expected_complexity": "low",
+      "actual_skill": "direct_agent",
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 0.1154160127043724,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-direct-002",
+      "test_name": "para_orig_route-kw-direct-002",
+      "timestamp": "2026-06-15T16:52:48.823381+00:00",
+      "input_query": "帮我总结一下",
+      "is_paraphrase": false,
+      "expected_skill": "direct_agent",
+      "expected_execution_mode": "direct",
+      "expected_complexity": "low",
+      "actual_skill": "direct_agent",
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 5905.160357011482,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-direct-002",
+      "test_name": "para_route-kw-direct-002_0",
+      "timestamp": "2026-06-15T16:52:48.826343+00:00",
+      "input_query": "帮我总结一下",
+      "is_paraphrase": true,
+      "expected_skill": "direct_agent",
+      "expected_execution_mode": "direct",
+      "expected_complexity": "low",
+      "actual_skill": null,
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 2.4711660225875676,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-direct-002",
+      "test_name": "para_route-kw-direct-002_1",
+      "timestamp": "2026-06-15T16:52:48.826917+00:00",
+      "input_query": "帮我总结一下",
+      "is_paraphrase": true,
+      "expected_skill": "direct_agent",
+      "expected_execution_mode": "direct",
+      "expected_complexity": "low",
+      "actual_skill": "direct_agent",
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 0.1197229721583426,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-direct-002",
+      "test_name": "para_route-kw-direct-002_2",
+      "timestamp": "2026-06-15T16:52:48.827366+00:00",
+      "input_query": "帮我总结一下",
+      "is_paraphrase": true,
+      "expected_skill": "direct_agent",
+      "expected_execution_mode": "direct",
+      "expected_complexity": "low",
+      "actual_skill": "direct_agent",
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 0.10035402374342084,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-direct-003",
+      "test_name": "para_orig_route-kw-direct-003",
+      "timestamp": "2026-06-15T16:52:48.830277+00:00",
+      "input_query": "什么是RAG？",
+      "is_paraphrase": false,
+      "expected_skill": "direct_agent",
+      "expected_execution_mode": "direct",
+      "expected_complexity": "low",
+      "actual_skill": "direct_agent",
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 0.2366119879297912,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-direct-003",
+      "test_name": "para_route-kw-direct-003_0",
+      "timestamp": "2026-06-15T16:52:59.573007+00:00",
+      "input_query": "什么是RAG？",
+      "is_paraphrase": true,
+      "expected_skill": "direct_agent",
+      "expected_execution_mode": "direct",
+      "expected_complexity": "low",
+      "actual_skill": "direct_agent",
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 10742.036784009542,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-direct-003",
+      "test_name": "para_route-kw-direct-003_1",
+      "timestamp": "2026-06-15T16:52:59.575759+00:00",
+      "input_query": "什么是RAG？",
+      "is_paraphrase": true,
+      "expected_skill": "direct_agent",
+      "expected_execution_mode": "direct",
+      "expected_complexity": "low",
+      "actual_skill": null,
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 2.285740978550166,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-direct-003",
+      "test_name": "para_route-kw-direct-003_2",
+      "timestamp": "2026-06-15T16:53:03.327613+00:00",
+      "input_query": "什么是RAG？",
+      "is_paraphrase": true,
+      "expected_skill": "direct_agent",
+      "expected_execution_mode": "direct",
+      "expected_complexity": "low",
+      "actual_skill": "direct_agent",
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": true,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 3751.3481359928846,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-react-001",
+      "test_name": "para_orig_route-kw-react-001",
+      "timestamp": "2026-06-15T16:53:03.332556+00:00",
+      "input_query": "搜索一下AI Agent市场数据",
+      "is_paraphrase": false,
+      "expected_skill": "react_agent",
+      "expected_execution_mode": "react",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": true,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 2.620374958496541,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-react-001",
+      "test_name": "para_route-kw-react-001_0",
+      "timestamp": "2026-06-15T16:53:16.156076+00:00",
+      "input_query": "搜索一下AI Agent市场数据",
+      "is_paraphrase": true,
+      "expected_skill": "react_agent",
+      "expected_execution_mode": "react",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": true,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 12823.078655987047,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-react-001",
+      "test_name": "para_route-kw-react-001_1",
+      "timestamp": "2026-06-15T16:53:16.158891+00:00",
+      "input_query": "搜索一下AI Agent市场数据",
+      "is_paraphrase": true,
+      "expected_skill": "react_agent",
+      "expected_execution_mode": "react",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": true,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 2.353978983592242,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-react-001",
+      "test_name": "para_route-kw-react-001_2",
+      "timestamp": "2026-06-15T16:53:33.222795+00:00",
+      "input_query": "搜索一下AI Agent市场数据",
+      "is_paraphrase": true,
+      "expected_skill": "react_agent",
+      "expected_execution_mode": "react",
+      "expected_complexity": "high",
+      "actual_skill": "trend_agent",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": false,
+      "execution_mode_correct": true,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 17063.475835020654,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-react-002",
+      "test_name": "para_orig_route-kw-react-002",
+      "timestamp": "2026-06-15T16:53:33.226654+00:00",
+      "input_query": "帮我分析这个数据",
+      "is_paraphrase": false,
+      "expected_skill": "react_agent",
+      "expected_execution_mode": "react",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": true,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 2.1171270054765046,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-react-002",
+      "test_name": "para_route-kw-react-002_0",
+      "timestamp": "2026-06-15T16:53:45.186234+00:00",
+      "input_query": "帮我分析这个数据",
+      "is_paraphrase": true,
+      "expected_skill": "react_agent",
+      "expected_execution_mode": "react",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": true,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 11959.168867964763,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-react-002",
+      "test_name": "para_route-kw-react-002_1",
+      "timestamp": "2026-06-15T16:53:45.188794+00:00",
+      "input_query": "帮我分析这个数据",
+      "is_paraphrase": true,
+      "expected_skill": "react_agent",
+      "expected_execution_mode": "react",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": true,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 2.155377995222807,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-react-002",
+      "test_name": "para_route-kw-react-002_2",
+      "timestamp": "2026-06-15T16:54:10.649803+00:00",
+      "input_query": "帮我分析这个数据",
+      "is_paraphrase": true,
+      "expected_skill": "react_agent",
+      "expected_execution_mode": "react",
+      "expected_complexity": "high",
+      "actual_skill": "react_agent",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 25460.632925038226,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-react-003",
+      "test_name": "para_orig_route-kw-react-003",
+      "timestamp": "2026-06-15T16:54:10.683119+00:00",
+      "input_query": "实时监控竞品动态",
+      "is_paraphrase": false,
+      "expected_skill": "react_agent",
+      "expected_execution_mode": "react",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": true,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 2.5149360299110413,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-react-003",
+      "test_name": "para_route-kw-react-003_0",
+      "timestamp": "2026-06-15T16:54:26.573171+00:00",
+      "input_query": "实时监控竞品动态",
+      "is_paraphrase": true,
+      "expected_skill": "react_agent",
+      "expected_execution_mode": "react",
+      "expected_complexity": "high",
+      "actual_skill": "competitor_analyzer",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": false,
+      "execution_mode_correct": true,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 15889.646161987912,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-react-003",
+      "test_name": "para_route-kw-react-003_1",
+      "timestamp": "2026-06-15T16:54:26.573841+00:00",
+      "input_query": "实时监控竞品动态",
+      "is_paraphrase": true,
+      "expected_skill": "react_agent",
+      "expected_execution_mode": "react",
+      "expected_complexity": "high",
+      "actual_skill": "competitor_analyzer",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": false,
+      "execution_mode_correct": true,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 0.1847759704105556,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-react-003",
+      "test_name": "para_route-kw-react-003_2",
+      "timestamp": "2026-06-15T16:54:26.576540+00:00",
+      "input_query": "实时监控竞品动态",
+      "is_paraphrase": true,
+      "expected_skill": "react_agent",
+      "expected_execution_mode": "react",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": true,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 2.298591018188745,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-rewoo-001",
+      "test_name": "para_orig_route-kw-rewoo-001",
+      "timestamp": "2026-06-15T16:54:26.578588+00:00",
+      "input_query": "采集A、B、C三个竞品的功能数据",
+      "is_paraphrase": false,
+      "expected_skill": "rewoo_agent",
+      "expected_execution_mode": "rewoo",
+      "expected_complexity": "high",
+      "actual_skill": "competitor_analyzer",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": false,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 0.1343649928458035,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-rewoo-001",
+      "test_name": "para_route-kw-rewoo-001_0",
+      "timestamp": "2026-06-15T16:54:26.579414+00:00",
+      "input_query": "采集A、B、C三个竞品的功能数据",
+      "is_paraphrase": true,
+      "expected_skill": "rewoo_agent",
+      "expected_execution_mode": "rewoo",
+      "expected_complexity": "high",
+      "actual_skill": "rewoo_agent",
+      "actual_execution_mode": "rewoo",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": true,
+      "execution_mode_correct": true,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 0.18782296683639288,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-rewoo-001",
+      "test_name": "para_route-kw-rewoo-001_1",
+      "timestamp": "2026-06-15T16:54:26.579942+00:00",
+      "input_query": "采集A、B、C三个竞品的功能数据",
+      "is_paraphrase": true,
+      "expected_skill": "rewoo_agent",
+      "expected_execution_mode": "rewoo",
+      "expected_complexity": "high",
+      "actual_skill": "competitor_analyzer",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": false,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 0.11551898205652833,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-rewoo-001",
+      "test_name": "para_route-kw-rewoo-001_2",
+      "timestamp": "2026-06-15T16:54:26.580533+00:00",
+      "input_query": "采集A、B、C三个竞品的功能数据",
+      "is_paraphrase": true,
+      "expected_skill": "rewoo_agent",
+      "expected_execution_mode": "rewoo",
+      "expected_complexity": "high",
+      "actual_skill": "competitor_analyzer",
+      "actual_execution_mode": "skill_react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": false,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 0.12395897647365928,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-rewoo-002",
+      "test_name": "para_orig_route-kw-rewoo-002",
+      "timestamp": "2026-06-15T16:54:38.782975+00:00",
+      "input_query": "并行搜索多个关键词",
+      "is_paraphrase": false,
+      "expected_skill": "rewoo_agent",
+      "expected_execution_mode": "rewoo",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 12200.403939001262,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-rewoo-002",
+      "test_name": "para_route-kw-rewoo-002_0",
+      "timestamp": "2026-06-15T16:54:38.785626+00:00",
+      "input_query": "并行搜索多个关键词",
+      "is_paraphrase": true,
+      "expected_skill": "rewoo_agent",
+      "expected_execution_mode": "rewoo",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 2.231539983768016,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-rewoo-002",
+      "test_name": "para_route-kw-rewoo-002_1",
+      "timestamp": "2026-06-15T16:54:51.536812+00:00",
+      "input_query": "并行搜索多个关键词",
+      "is_paraphrase": true,
+      "expected_skill": "rewoo_agent",
+      "expected_execution_mode": "rewoo",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 12750.671702960972,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-rewoo-002",
+      "test_name": "para_route-kw-rewoo-002_2",
+      "timestamp": "2026-06-15T16:54:51.539379+00:00",
+      "input_query": "并行搜索多个关键词",
+      "is_paraphrase": true,
+      "expected_skill": "rewoo_agent",
+      "expected_execution_mode": "rewoo",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 2.15318298432976,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-reflex-001",
+      "test_name": "para_orig_route-kw-reflex-001",
+      "timestamp": "2026-06-15T16:55:04.031642+00:00",
+      "input_query": "审查这段代码的合规性",
+      "is_paraphrase": false,
+      "expected_skill": "reflexion_agent",
+      "expected_execution_mode": "reflexion",
+      "expected_complexity": "high",
+      "actual_skill": "code_reviewer",
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": false,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 12490.38528103847,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-reflex-001",
+      "test_name": "para_route-kw-reflex-001_0",
+      "timestamp": "2026-06-15T16:55:04.034439+00:00",
+      "input_query": "审查这段代码的合规性",
+      "is_paraphrase": true,
+      "expected_skill": "reflexion_agent",
+      "expected_execution_mode": "reflexion",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 2.3601570283062756,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-reflex-001",
+      "test_name": "para_route-kw-reflex-001_1",
+      "timestamp": "2026-06-15T16:55:16.172980+00:00",
+      "input_query": "审查这段代码的合规性",
+      "is_paraphrase": true,
+      "expected_skill": "reflexion_agent",
+      "expected_execution_mode": "reflexion",
+      "expected_complexity": "high",
+      "actual_skill": "code_reviewer",
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": false,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 12138.08024401078,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-reflex-001",
+      "test_name": "para_route-kw-reflex-001_2",
+      "timestamp": "2026-06-15T16:55:16.175857+00:00",
+      "input_query": "审查这段代码的合规性",
+      "is_paraphrase": true,
+      "expected_skill": "reflexion_agent",
+      "expected_execution_mode": "reflexion",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 2.3903230321593583,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-reflex-002",
+      "test_name": "para_orig_route-kw-reflex-002",
+      "timestamp": "2026-06-15T16:55:27.673019+00:00",
+      "input_query": "生成一个高精度的数据分析脚本",
+      "is_paraphrase": false,
+      "expected_skill": "reflexion_agent",
+      "expected_execution_mode": "reflexion",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 11494.954236026388,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-reflex-002",
+      "test_name": "para_route-kw-reflex-002_0",
+      "timestamp": "2026-06-15T16:55:27.675594+00:00",
+      "input_query": "生成一个高精度的数据分析脚本",
+      "is_paraphrase": true,
+      "expected_skill": "reflexion_agent",
+      "expected_execution_mode": "reflexion",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 2.139441028703004,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-reflex-002",
+      "test_name": "para_route-kw-reflex-002_1",
+      "timestamp": "2026-06-15T16:55:44.080887+00:00",
+      "input_query": "生成一个高精度的数据分析脚本",
+      "is_paraphrase": true,
+      "expected_skill": "reflexion_agent",
+      "expected_execution_mode": "reflexion",
+      "expected_complexity": "high",
+      "actual_skill": "direct_agent",
+      "actual_execution_mode": "direct_chat",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": false,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 16404.873749008402,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    },
+    {
+      "benchmark_id": "route-kw-reflex-002",
+      "test_name": "para_route-kw-reflex-002_2",
+      "timestamp": "2026-06-15T16:55:44.083850+00:00",
+      "input_query": "生成一个高精度的数据分析脚本",
+      "is_paraphrase": true,
+      "expected_skill": "reflexion_agent",
+      "expected_execution_mode": "reflexion",
+      "expected_complexity": "high",
+      "actual_skill": null,
+      "actual_execution_mode": "react",
+      "actual_status_code": 200,
+      "actual_response_keys": [],
+      "actual_complexity_score": null,
+      "actual_match_method": null,
+      "actual_match_confidence": null,
+      "skill_correct": null,
+      "execution_mode_correct": false,
+      "complexity_correct": false,
+      "task_succeeded": true,
+      "category": "routing",
+      "subcategory": "keyword_match",
+      "response_time_ms": 2.364657004363835,
+      "error_message": null,
+      "alignment_violations": 0,
+      "cascade_alert": false,
+      "output_quality_score": null,
+      "output_quality_reasoning": null
+    }
+  ],
+  "output_quality_evaluations": [
+    {
+      "benchmark_id": "route-edge-explicit-001",
+      "input_query": "@skill:react_agent 搜索最新的AI新闻",
+      "expected_skill": "react_agent",
+      "actual_skill": "react_agent",
+      "quality_score": 5.0,
+      "reasoning": "路由精准匹配用户指定的技能与意图，执行模式完全正确。",
+      "evaluated": true
+    },
+    {
+      "benchmark_id": "route-kw-direct-001",
+      "input_query": "翻译这段话",
+      "expected_skill": "direct_agent",
+      "actual_skill": "direct_agent",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "route-kw-direct-002",
+      "input_query": "帮我总结一下",
+      "expected_skill": "direct_agent",
+      "actual_skill": "direct_agent",
+      "quality_score": 4.0,
+      "reasoning": "路由与期望完全一致，direct_chat模式适合处理此类缺乏具体上下文的模糊指令，以便进行澄清或基于历史对话进行总结。",
+      "evaluated": true
+    },
+    {
+      "benchmark_id": "route-kw-direct-003",
+      "input_query": "什么是RAG？",
+      "expected_skill": "direct_agent",
+      "actual_skill": "direct_agent",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "route-kw-rewoo-001",
+      "input_query": "采集A、B、C三个竞品的功能数据",
+      "expected_skill": "rewoo_agent",
+      "actual_skill": "competitor_analyzer",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Invalid \\escape: line 1 column 35 (char 34)",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "route-kw-planexec-002",
+      "input_query": "规划产品优化方案",
+      "expected_skill": "plan_exec_agent",
+      "actual_skill": "plan_exec_agent",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "route-kw-geo-001",
+      "input_query": "帮我优化这篇文章的SEO",
+      "expected_skill": "geo_optimizer",
+      "actual_skill": "geo_optimizer",
+      "quality_score": 5.0,
+      "reasoning": "路由精准匹配期望技能，且技能名称完全契合用户优化SEO的意图。",
+      "evaluated": true
+    },
+    {
+      "benchmark_id": "route-kw-deai-001",
+      "input_query": "帮我把这篇文章去AI化",
+      "expected_skill": "deai_agent",
+      "actual_skill": "deai_agent",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "route-kw-content-001",
+      "input_query": "帮我写一篇关于AI的文章",
+      "expected_skill": "content_generator",
+      "actual_skill": "content_generator",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "route-kw-schema-001",
+      "input_query": "帮我优化Schema",
+      "expected_skill": "schema_advisor",
+      "actual_skill": "schema_advisor",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "route-kw-monitor-001",
+      "input_query": "监测品牌引用变化",
+      "expected_skill": "monitor",
+      "actual_skill": "monitor",
+      "quality_score": 5.0,
+      "reasoning": "实际路由技能与期望技能完全一致，精准匹配用户监测品牌引用变化的意图。",
+      "evaluated": true
+    },
+    {
+      "benchmark_id": "semantic-react-001",
+      "input_query": "需要动态适应、逐步推理和工具调用",
+      "expected_skill": "react_agent",
+      "actual_skill": "react_agent",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "semantic-reflex-001",
+      "input_query": "需要高精度和自我验证的任务",
+      "expected_skill": "reflexion_agent",
+      "actual_skill": "reflexion_agent",
+      "quality_score": 5.0,
+      "reasoning": "实际路由技能与期望技能完全一致，且反思（reflexion）执行模式完美契合高精度与自我验证的任务需求。",
+      "evaluated": true
+    },
+    {
+      "benchmark_id": "semantic-geo-001",
+      "input_query": "对文章进行GEO/SEO优化，提升在AI搜索引擎中的可见性",
+      "expected_skill": "geo_optimizer",
+      "actual_skill": "geo_optimizer",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "semantic-competitor-001",
+      "input_query": "分析竞品策略、对比品牌差距或发现竞争机会",
+      "expected_skill": "competitor_analyzer",
+      "actual_skill": "competitor_analyzer",
+      "quality_score": 5.0,
+      "reasoning": "实际路由技能与期望技能完全一致，精准匹配用户分析竞品和发现竞争机会的意图。",
+      "evaluated": true
+    },
+    {
+      "benchmark_id": "semantic-colloquial-review-001",
+      "input_query": "帮我看看代码有没有问题",
+      "expected_skill": "code_reviewer",
+      "actual_skill": "code_reviewer",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "semantic-colloquial-trend-001",
+      "input_query": "最近市场行情怎么样",
+      "expected_skill": "trend_agent",
+      "actual_skill": "trend_agent",
+      "quality_score": 5.0,
+      "reasoning": "实际路由技能与期望技能完全一致，精准匹配了用户查询市场行情的意图。",
+      "evaluated": true
+    },
+    {
+      "benchmark_id": "semantic-colloquial-content-001",
+      "input_query": "帮我写点东西",
+      "expected_skill": "content_generator",
+      "actual_skill": "content_generator",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "semantic-colloquial-citation-001",
+      "input_query": "这个引用对不对",
+      "expected_skill": "citation_detector",
+      "actual_skill": "citation_detector",
+      "quality_score": 5.0,
+      "reasoning": "路由精准匹配用户意图与期望技能，完全符合检测引用正确性的需求。",
+      "evaluated": true
+    },
+    {
+      "benchmark_id": "semantic-colloquial-competitor-001",
+      "input_query": "对手怎么样",
+      "expected_skill": "competitor_analyzer",
+      "actual_skill": "competitor_analyzer",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "semantic-mixed-geo-001",
+      "input_query": "做个SEO优化",
+      "expected_skill": "geo_optimizer",
+      "actual_skill": "geo_optimizer",
+      "quality_score": 5.0,
+      "reasoning": "实际路由技能与期望技能完全一致，精准匹配用户的SEO优化意图。",
+      "evaluated": true
+    },
+    {
+      "benchmark_id": "semantic-mixed-monitor-001",
+      "input_query": "monitor一下系统状态",
+      "expected_skill": "monitor",
+      "actual_skill": "monitor",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "route-kw-direct-001",
+      "input_query": "翻译这段话",
+      "expected_skill": "direct_agent",
+      "actual_skill": "direct_agent",
+      "quality_score": 5.0,
+      "reasoning": "实际路由与期望技能完全一致，且direct_chat模式能够精准且高质量地处理“翻译”这一直接文本指令。",
+      "evaluated": true
+    },
+    {
+      "benchmark_id": "route-kw-direct-001",
+      "input_query": "翻译这段话",
+      "expected_skill": "direct_agent",
+      "actual_skill": "direct_agent",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "route-kw-direct-001",
+      "input_query": "翻译这段话",
+      "expected_skill": "direct_agent",
+      "actual_skill": "direct_agent",
+      "quality_score": 5.0,
+      "reasoning": "实际路由与期望技能完全一致，direct_agent能够精准处理翻译这一直接指令，执行模式完全匹配用户意图。",
+      "evaluated": true
+    },
+    {
+      "benchmark_id": "route-kw-direct-001",
+      "input_query": "翻译这段话",
+      "expected_skill": "direct_agent",
+      "actual_skill": "direct_agent",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "route-kw-direct-002",
+      "input_query": "帮我总结一下",
+      "expected_skill": "direct_agent",
+      "actual_skill": "direct_agent",
+      "quality_score": 5.0,
+      "reasoning": "路由精准匹配期望技能，direct_agent能够妥善处理此类缺乏具体上下文的模糊总结指令（如引导追问或基于历史对话进行总结）。",
+      "evaluated": true
+    },
+    {
+      "benchmark_id": "route-kw-direct-002",
+      "input_query": "帮我总结一下",
+      "expected_skill": "direct_agent",
+      "actual_skill": "direct_agent",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "route-kw-direct-002",
+      "input_query": "帮我总结一下",
+      "expected_skill": "direct_agent",
+      "actual_skill": "direct_agent",
+      "quality_score": 5.0,
+      "reasoning": "用户输入缺乏具体上下文，路由至direct_agent进行直接对话以澄清意图或引导补充信息是完全正确且最优的处理方式。",
+      "evaluated": true
+    },
+    {
+      "benchmark_id": "route-kw-direct-003",
+      "input_query": "什么是RAG？",
+      "expected_skill": "direct_agent",
+      "actual_skill": "direct_agent",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "route-kw-direct-003",
+      "input_query": "什么是RAG？",
+      "expected_skill": "direct_agent",
+      "actual_skill": "direct_agent",
+      "quality_score": 5.0,
+      "reasoning": "路由精准匹配期望技能，直接回答RAG概念完全符合用户意图且质量优秀。",
+      "evaluated": true
+    },
+    {
+      "benchmark_id": "route-kw-direct-003",
+      "input_query": "什么是RAG？",
+      "expected_skill": "direct_agent",
+      "actual_skill": "direct_agent",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "route-kw-react-001",
+      "input_query": "搜索一下AI Agent市场数据",
+      "expected_skill": "react_agent",
+      "actual_skill": "trend_agent",
+      "quality_score": 4.0,
+      "reasoning": "实际路由的trend_agent在处理“市场数据”时比通用的react_agent更具针对性，能精准匹配用户获取市场趋势数据的意图。",
+      "evaluated": true
+    },
+    {
+      "benchmark_id": "route-kw-react-002",
+      "input_query": "帮我分析这个数据",
+      "expected_skill": "react_agent",
+      "actual_skill": "react_agent",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "route-kw-react-003",
+      "input_query": "实时监控竞品动态",
+      "expected_skill": "react_agent",
+      "actual_skill": "competitor_analyzer",
+      "quality_score": 5.0,
+      "reasoning": "实际路由的competitor_analyzer比期望的通用react_agent更精准地垂直匹配了“竞品动态”这一具体意图，路由精准且执行模式合理。",
+      "evaluated": true
+    },
+    {
+      "benchmark_id": "route-kw-react-003",
+      "input_query": "实时监控竞品动态",
+      "expected_skill": "react_agent",
+      "actual_skill": "competitor_analyzer",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "route-kw-rewoo-001",
+      "input_query": "采集A、B、C三个竞品的功能数据",
+      "expected_skill": "rewoo_agent",
+      "actual_skill": "competitor_analyzer",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "route-kw-rewoo-001",
+      "input_query": "采集A、B、C三个竞品的功能数据",
+      "expected_skill": "rewoo_agent",
+      "actual_skill": "rewoo_agent",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "route-kw-rewoo-001",
+      "input_query": "采集A、B、C三个竞品的功能数据",
+      "expected_skill": "rewoo_agent",
+      "actual_skill": "competitor_analyzer",
+      "quality_score": 3.0,
+      "reasoning": "路由到了竞品相关技能，领域高度匹配，但“分析”与“采集”侧重点略有偏差，且处理多竞品采集任务时可能不如通用规划Agent完整灵活。",
+      "evaluated": true
+    },
+    {
+      "benchmark_id": "route-kw-rewoo-001",
+      "input_query": "采集A、B、C三个竞品的功能数据",
+      "expected_skill": "rewoo_agent",
+      "actual_skill": "competitor_analyzer",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "route-kw-reflex-001",
+      "input_query": "审查这段代码的合规性",
+      "expected_skill": "reflexion_agent",
+      "actual_skill": "code_reviewer",
+      "quality_score": 4.0,
+      "reasoning": "实际路由code_reviewer高度契合审查代码的意图，虽与期望的reflexion_agent不同，但路由准确且直接有效。",
+      "evaluated": true
+    },
+    {
+      "benchmark_id": "route-kw-reflex-001",
+      "input_query": "审查这段代码的合规性",
+      "expected_skill": "reflexion_agent",
+      "actual_skill": "code_reviewer",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Event loop is closed",
+      "evaluated": false
+    },
+    {
+      "benchmark_id": "route-kw-reflex-002",
+      "input_query": "生成一个高精度的数据分析脚本",
+      "expected_skill": "reflexion_agent",
+      "actual_skill": "direct_agent",
+      "quality_score": 0.0,
+      "reasoning": "Evaluation error: Invalid \\escape: line 1 column 82 (char 81)",
+      "evaluated": false
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test-results/e2e/capability_report.txt b/test-results/e2e/capability_report.txt
new file mode 100644
index 0000000..949124f
--- /dev/null
+++ b/test-results/e2e/capability_report.txt
@@ -0,0 +1,171 @@
+========================================================================
+  AgentKit 智能化能力分析报告
+  生成时间: 2026-06-15T16:59:06.575194+00:00
+========================================================================
+
+── 总体指标 ──────────────────────────────────────────────
+  观测总数:              82
+  技能路由召回率:        90.00%
+  技能路由精确率:        90.00%
+  技能路由F1:            90.00%
+  执行模式准确率:        40.38%
+  任务成功率:            100.00%
+  过拟合分数:            22.22%
+
+── 分类明细 ──────────────────────────────────────────────
+  [路由/显式前缀]
+    样本数=1  召回率=100.00%  精确率=100.00%  F1=100.00%
+    执行模式准确率=100.00%  成功率=100.00%  平均耗时=0ms
+
+  [路由/问候语]
+    样本数=2  召回率=100.00%  精确率=100.00%  F1=100.00%
+    执行模式准确率=100.00%  成功率=100.00%  平均耗时=0ms
+
+  [路由/身份识别]
+    样本数=1  召回率=100.00%  精确率=100.00%  F1=100.00%
+    执行模式准确率=100.00%  成功率=100.00%  平均耗时=0ms
+
+  [路由/关键词匹配]
+    样本数=62  召回率=67.74%  精确率=67.74%  F1=67.74%
+    执行模式准确率=51.61%  成功率=100.00%  平均耗时=4449ms
+
+  [semantic_router/colloquial_match]
+    样本数=5  召回率=100.00%  精确率=100.00%  F1=100.00%
+    执行模式准确率=0.00%  成功率=100.00%  平均耗时=2411ms
+
+  [semantic_router/description_match]
+    样本数=8  召回率=100.00%  精确率=100.00%  F1=100.00%
+    执行模式准确率=37.50%  成功率=100.00%  平均耗时=892ms
+
+  [semantic_router/mixed_lang_match]
+    样本数=3  召回率=100.00%  精确率=100.00%  F1=100.00%
+    执行模式准确率=33.33%  成功率=100.00%  平均耗时=1ms
+
+── 过拟合分析 ────────────────────────────────────────────
+  [✓ 正常] route-kw-direct-001: 原始输入=✓, 改写一致性=100%
+  [✓ 正常] route-kw-direct-002: 原始输入=✓, 改写一致性=67%
+  [✓ 正常] route-kw-direct-003: 原始输入=✓, 改写一致性=67%
+  [✓ 正常] route-kw-react-001: 原始输入=✗, 改写一致性=100%
+  [✓ 正常] route-kw-react-002: 原始输入=✗, 改写一致性=67%
+  [✓ 正常] route-kw-react-003: 原始输入=✗, 改写一致性=100%
+  [✓ 正常] route-kw-rewoo-001: 原始输入=✗, 改写一致性=67%
+  [✓ 正常] route-kw-rewoo-002: 原始输入=✗, 改写一致性=100%
+  [✓ 正常] route-kw-reflex-001: 原始输入=✗, 改写一致性=100%
+  [✓ 正常] route-kw-reflex-002: 原始输入=✗, 改写一致性=100%
+
+── 语义路由分析 ──────────────────────────────────────────
+  [colloquial_match] 样本数=5  精确率=100.00%  F1=100.00%
+  [description_match] 样本数=8  精确率=100.00%  F1=100.00%
+  [mixed_lang_match] 样本数=3  精确率=100.00%  F1=100.00%
+
+── 智能化短板识别 ────────────────────────────────────────
+  🟠 [高] 执行模式准确率过低 (0.00%)，子类别: colloquial_match
+     证据: 正确数=0/5
+     建议: 检查复杂度估算和模式选择逻辑
+
+  🟠 [高] 执行模式准确率过低 (37.50%)，子类别: description_match
+     证据: 正确数=3/8
+     建议: 检查复杂度估算和模式选择逻辑
+
+  🟠 [高] 执行模式准确率过低 (33.33%)，子类别: mixed_lang_match
+     证据: 正确数=1/3
+     建议: 检查复杂度估算和模式选择逻辑
+
+  🟡 [中] 技能路由F1偏低 (0.68)，子类别: keyword_match
+     证据: 召回率=67.74%, 精确率=67.74%, 样本数=62
+     建议: 微调路由阈值或增加更多意图示例
+
+  🟡 [中] 执行模式准确率过低 (51.61%)，子类别: keyword_match
+     证据: 正确数=32/62
+     建议: 检查复杂度估算和模式选择逻辑
+
+── 根因分析 ──────────────────────────────────────────────
+  ▸ [复杂度估算偏差] 置信度: ███████░░░ 75%
+    原因: 复杂度估算偏差：倾向高估复杂度（将简单任务误判为需要多步推理）
+    详情: 共 31 个执行模式判断错误。低估复杂度 0 次，高估复杂度 1 次。受影响子类别: description_match, mixed_lang_match, colloquial_match, keyword_match
+    受影响用例: route-kw-rewoo-001, route-kw-rewoo-002, route-kw-reflex-001, route-kw-reflex-002, route-kw-planexec-001...
+
+  ▸ [意图歧义] 置信度: ███████░░░ 70%
+    原因: 意图歧义：不同技能的关键词/意图描述重叠，导致路由混淆
+    详情: 技能混淆对: rewoo_agent→competitor_analyzer(2次); reflexion_agent→code_reviewer(1次)
+    受影响用例: route-kw-rewoo-001, route-kw-rewoo-001, route-kw-reflex-001
+
+  ▸ [质量门控阈值过低] 置信度: ██████░░░░ 60%
+    原因: 质量门控阈值过低：任务虽成功完成但输出了错误结果
+    详情: 共 3 个任务虽然HTTP成功但路由到了错误技能。质量门控未能拦截这些错误路由的结果。
+    受影响用例: route-kw-rewoo-001, route-kw-rewoo-001, route-kw-reflex-001
+
+── 改进策略规划 ──────────────────────────────────────────
+  ┌─ 策略 1: 意图歧义：不同技能的关键词/意图描述重叠，导致路由混淆
+  │ 总体策略: 短期：添加互斥关键词消歧；中期：启用LLM二次分类；长期：训练专用意图分类模型替代规则匹配
+  │
+  │ 🟠 [P1] 为易混淆技能添加互斥关键词
+  │   目标模块: configs/skills/*.yaml → intent.disambiguation_keywords
+   │   具体操作: 在技能配置中为容易混淆的技能对添加互斥关键词（disambiguation_keywords），当用户输入同时匹配多个技能时，优先选择包含互斥关键词的技能。
+  │   预期影响: 预计提升精确率 10~25%，减少技能混淆
+  │   工作量: 小
+  │   验证方式: 运行歧义消解回测，验证路由精确率提升
+  │
+  │ 🟡 [P2] 实现LLM二次分类消歧
+  │   目标模块: src/agentkit/chat/skill_routing.py → Layer 1
+   │   具体操作: 当 Layer 0/1 路由到多个候选技能时，调用 LLM quick_classify 进行二次意图判断，选择最匹配的技能。
+  │   预期影响: 预计提升精确率 15~30%，但增加 ~500ms 延迟和 ~100 tokens
+  │   工作量: 中
+  │   验证方式: 运行歧义消解回测，对比延迟和精确率变化
+  │
+  └────────────────────────────────────────────────────────────
+
+  ┌─ 策略 2: 复杂度估算偏差：倾向高估复杂度（将简单任务误判为需要多步推理）
+  │ 总体策略: 短期：调整启发式规则权重；中期：构建复杂度校准数据集；长期：训练复杂度评估模型替代规则
+  │
+  │ 🟠 [P1] 优化复杂度估算启发式规则
+  │   目标模块: src/agentkit/chat/skill_routing.py → HeuristicClassifier
+   │   具体操作: 调整 HeuristicClassifier 的复杂度评分权重：增加任务动词（分析/研究/设计）的权重，降低简单问答动词（是什么/多少）的权重。
+  │   预期影响: 预计提升执行模式准确率 10~20%
+  │   工作量: 小
+  │   验证方式: 运行执行模式回测，验证准确率提升
+  │
+  │ 🟡 [P2] 引入任务复杂度校准数据集
+  │   目标模块: tests/e2e/benchmark_dataset.py
+   │   具体操作: 收集标注了复杂度等级的真实用户查询，构建校准数据集，定期评估和调整复杂度阈值。
+  │   预期影响: 持续提升复杂度判断准确性
+  │   工作量: 中
+  │   验证方式: 每次调整后运行回测，对比前后F1变化
+  │
+  └────────────────────────────────────────────────────────────
+
+  ┌─ 策略 3: 质量门控阈值过低：任务虽成功完成但输出了错误结果
+  │ 总体策略: 短期：增加技能匹配验证；中期：引入输出质量评分模型；长期：实现自动质量回归检测
+  │
+  │ 🟠 [P1] 增强质量门控的技能匹配验证
+  │   目标模块: src/agentkit/quality/gate.py
+   │   具体操作: 在QualityGate中增加技能匹配验证：检查输出是否与路由到的技能的能力范围一致，如果不一致则触发重试或降级。
+  │   预期影响: 减少错误路由导致的低质量输出
+  │   工作量: 中
+  │   验证方式: 运行质量门控回测，验证错误路由拦截率
+  │
+  └────────────────────────────────────────────────────────────
+
+── L3 输出质量评估 ──────────────────────────────────────────
+  评估样本数:          18
+  平均质量评分:        4.72/5.0
+  评分分布:            1分:0 2分:0 3分:1 4分:3 5分:14
+
+  样例:
+    [route-edge-explicit-001] 评分=5 期望=react_agent 实际=react_agent
+      理由: 路由精准匹配用户指定的技能与意图，执行模式完全正确。
+    [route-kw-direct-002] 评分=4 期望=direct_agent 实际=direct_agent
+      理由: 路由与期望完全一致，direct_chat模式适合处理此类缺乏具体上下文的模糊指令，以便进行澄清或基于历史对话进行总结。
+    [route-kw-geo-001] 评分=5 期望=geo_optimizer 实际=geo_optimizer
+      理由: 路由精准匹配期望技能，且技能名称完全契合用户优化SEO的意图。
+    [route-kw-monitor-001] 评分=5 期望=monitor 实际=monitor
+      理由: 实际路由技能与期望技能完全一致，精准匹配用户监测品牌引用变化的意图。
+    [semantic-reflex-001] 评分=5 期望=reflexion_agent 实际=reflexion_agent
+      理由: 实际路由技能与期望技能完全一致，且反思（reflexion）执行模式完美契合高精度与自我验证的任务需求。
+
+── L5 自适应能力 ──────────────────────────────────────────
+  测试组数:            10
+  平均自适应率:        86.67%
+  高自适应(>=80%):     6/10
+
+========================================================================
\ No newline at end of file
diff --git a/test-results/e2e/comprehensive_report.json b/test-results/e2e/comprehensive_report.json
index df1b379..10e773f 100644
--- a/test-results/e2e/comprehensive_report.json
+++ b/test-results/e2e/comprehensive_report.json
@@ -1,6 +1,6 @@
 {
   "report_type": "comprehensive_capability_backtest",
-  "generated_at": "2026-06-17T03:22:42.152439+00:00",
+  "generated_at": "2026-06-17T05:29:48.993554+00:00",
   "total_score": 100.0,
   "total_cases": 50,
   "total_passed": 50,
@@ -215,27 +215,27 @@
         {
           "case_id": "efficiency_greeting",
           "passed": true,
-          "elapsed_ms": 0.41
+          "elapsed_ms": 0.39
         },
         {
           "case_id": "efficiency_chitchat",
           "passed": true,
-          "elapsed_ms": 0.47
+          "elapsed_ms": 0.38
         },
         {
           "case_id": "efficiency_identity",
           "passed": true,
-          "elapsed_ms": 0.48
+          "elapsed_ms": 0.34
         },
         {
           "case_id": "efficiency_react_tool",
           "passed": true,
-          "elapsed_ms": 0.49
+          "elapsed_ms": 0.33
         },
         {
           "case_id": "efficiency_react_complex",
           "passed": true,
-          "elapsed_ms": 0.55
+          "elapsed_ms": 0.33
         }
       ]
     },
diff --git a/test-results/e2e/comprehensive_report.txt b/test-results/e2e/comprehensive_report.txt
index 8335472..9937f82 100644
--- a/test-results/e2e/comprehensive_report.txt
+++ b/test-results/e2e/comprehensive_report.txt
@@ -1,7 +1,7 @@
 ======================================================================
 Fischer AgentKit 综合能力回测报告
 ======================================================================
-生成时间: 2026-06-17T03:22:42.152439+00:00
+生成时间: 2026-06-17T05:29:48.993554+00:00
 总体评分: 100.0%
 用例总数: 50  通过: 50  失败: 0
 
diff --git a/tests/e2e/benchmark_dataset.py b/tests/e2e/benchmark_dataset.py
index 63eecae..e01cc82 100644
--- a/tests/e2e/benchmark_dataset.py
+++ b/tests/e2e/benchmark_dataset.py
@@ -876,6 +876,126 @@ ALIGNMENT_BENCHMARKS: list[BenchmarkCase] = [
 ]
 
 
+# ═══════════════════════════════════════════════════════════════════════════
+# Board Meeting (Private Board) Benchmarks — @board prefix routing
+# ═══════════════════════════════════════════════════════════════════════════
+
+BOARD_BENCHMARKS: list[BenchmarkCase] = [
+    # --- Default template (@board without experts) ---
+    BenchmarkCase(
+        id="board-default-001",
+        input="@board 讨论是否应该进入东南亚市场",
+        expected_execution_mode="board",
+        expected_complexity="high",
+        category="board",
+        subcategory="default_template",
+        paraphrases=[
+            "@board 我们要不要拓展东南亚业务",
+            "@board 东南亚市场进入策略讨论",
+            "@board:private_board 评估东南亚市场机会",
+        ],
+        tags=["board", "default", "strategy"],
+    ),
+    BenchmarkCase(
+        id="board-default-002",
+        input="@board AI产品定价策略应该怎么做",
+        expected_execution_mode="board",
+        expected_complexity="high",
+        category="board",
+        subcategory="default_template",
+        paraphrases=["@board 如何给AI产品定价", "@board AI产品定价讨论"],
+        tags=["board", "default", "pricing"],
+    ),
+    # --- Explicit expert list (@board:expert1,expert2) ---
+    BenchmarkCase(
+        id="board-explicit-001",
+        input="@board:elon_musk,jeff_bezos 讨论火星殖民的商业化路径",
+        expected_execution_mode="board",
+        expected_complexity="high",
+        category="board",
+        subcategory="explicit_experts",
+        paraphrases=[
+            "@board:elon_musk,jeff_bezos 火星商业化方案",
+            "@board:jeff_bezos,elon_musk 如何商业化火星",
+        ],
+        tags=["board", "explicit", "mars"],
+    ),
+    BenchmarkCase(
+        id="board-explicit-002",
+        input="@board:charlie_munger,warren_buffett 价值投资在AI时代的适用性",
+        expected_execution_mode="board",
+        expected_complexity="high",
+        category="board",
+        subcategory="explicit_experts",
+        paraphrases=[
+            "@board:charlie_munger,warren_buffett AI时代还要不要价值投资",
+        ],
+        tags=["board", "explicit", "investing"],
+    ),
+    # --- Explicit default template name ---
+    BenchmarkCase(
+        id="board-template-001",
+        input="@board:private_board 讨论创业公司融资节奏",
+        expected_execution_mode="board",
+        expected_complexity="high",
+        category="board",
+        subcategory="explicit_template",
+        paraphrases=["@board:private_board 创业融资策略", "@board:private_board 融资节奏讨论"],
+        tags=["board", "template", "fundraising"],
+    ),
+    # --- Edge cases ---
+    BenchmarkCase(
+        id="board-edge-empty-topic-001",
+        input="@board",
+        expected_execution_mode="board",
+        expected_complexity="low",
+        category="board",
+        subcategory="empty_topic",
+        tags=["board", "edge", "empty"],
+    ),
+    BenchmarkCase(
+        id="board-edge-no-prefix-001",
+        input="讨论一下市场策略",
+        expected_execution_mode="react",
+        expected_complexity="medium",
+        category="board",
+        subcategory="no_prefix",
+        paraphrases=["分析市场策略", "市场策略讨论"],
+        tags=["board", "edge", "no_match"],
+    ),
+    # --- Name validation ---
+    BenchmarkCase(
+        id="board-name-valid-001",
+        input="@board:elon_musk,jeff_bezos,allenzhang 产品设计哲学",
+        expected_execution_mode="board",
+        expected_complexity="high",
+        category="board",
+        subcategory="name_validation",
+        tags=["board", "validation", "valid"],
+    ),
+    BenchmarkCase(
+        id="board-name-invalid-001",
+        input="@board:@#$ 讨论主题",
+        expected_execution_mode="board",
+        expected_complexity="low",
+        category="board",
+        subcategory="name_validation",
+        tags=["board", "validation", "invalid"],
+    ),
+    # --- Stop command (user intervention) ---
+    BenchmarkCase(
+        id="board-stop-001",
+        input="/stop",
+        expected_execution_mode="board",
+        expected_complexity="low",
+        category="board",
+        subcategory="stop_command",
+        paraphrases=["停止讨论", "结束讨论"],
+        tags=["board", "stop", "intervention"],
+    ),
+]
+
+
 # ═══════════════════════════════════════════════════════════════════════════
 # All benchmarks combined
 # ═══════════════════════════════════════════════════════════════════════════
@@ -888,6 +1008,7 @@ ALL_BENCHMARKS: list[BenchmarkCase] = (
     + CONSISTENCY_BENCHMARKS
     + SEMANTIC_ROUTER_BENCHMARKS
     + ALIGNMENT_BENCHMARKS
+    + BOARD_BENCHMARKS
 )
 
 
diff --git a/tests/unit/experts/test_board_backtest.py b/tests/unit/experts/test_board_backtest.py
new file mode 100644
index 0000000..8b8ac77
--- /dev/null
+++ b/tests/unit/experts/test_board_backtest.py
@@ -0,0 +1,659 @@
+"""私董会 (Board Meeting) 回测脚本 — 能力评估与回归测试
+
+对 BoardRouter / BoardTeam / BoardOrchestrator 进行系统性回测，
+覆盖以下能力维度：
+
+1. 前缀路由准确性 (Prefix Routing Accuracy)
+   - @board 前缀匹配
+   - @board:expert1,expert2 显式专家
+   - @board:private_board 显式模板
+   - 非 @board 输入不应误匹配
+
+2. 主题提取准确性 (Topic Extraction Accuracy)
+   - 从 @board topic 提取 topic
+   - 从 @board:experts topic 提取 topic
+   - 空主题处理
+
+3. 专家名验证 (Expert Name Validation)
+   - 有效名称接受
+   - 无效名称拒绝（含空格、特殊字符）
+   - MAX_EXPERTS=10 上限截断
+
+4. 模板加载 (Template Loading)
+   - 默认 private_board 模板成员加载
+   - 配置目录 YAML 加载
+
+5. 停止命令检测 (Stop Command Detection)
+   - /stop, 停止讨论, stop, 结束讨论
+
+6. BoardTeam 状态机 (BoardTeam State Machine)
+   - FORMING → DISCUSSING → CONCLUDING → COMPLETED → DISSOLVED
+
+7. 讨论历史管理 (Discussion History Management)
+   - 添加发言
+   - 历史格式化
+   - 用户干预
+
+8. BoardRoutingResult 数据结构 (Data Structure Integrity)
+   - 默认值验证
+   - 匹配结果字段填充
+
+9. 回归测试 (Regression: No Interference)
+   - @board 路由不干扰 @team / @skill / 普通聊天
+
+运行方式：
+    pytest tests/unit/experts/test_board_backtest.py -v
+    pytest tests/unit/experts/test_board_backtest.py -v -k "TestPrefixRouting"
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agentkit.experts.board_orchestrator import BoardOrchestrator
+from agentkit.experts.board_router import (
+    BOARD_PREFIX_PATTERN,
+    DEFAULT_TEMPLATE,
+    MAX_EXPERTS,
+    BoardRouter,
+    BoardRoutingResult,
+)
+from agentkit.experts.board import BoardStatus, BoardTeam
+from agentkit.experts.config import ExpertConfig, ExpertTemplate
+from agentkit.experts.registry import ExpertTemplateRegistry
+
+
+# ── 辅助函数 ──────────────────────────────────────────────
+
+
+def _make_expert_template(name: str, persona: str = "测试专家") -> ExpertTemplate:
+    """创建测试用 ExpertTemplate"""
+    config = ExpertConfig(
+        name=name,
+        agent_type="expert",
+        persona=persona,
+        thinking_style="analytical",
+        bound_skills=[],
+        task_mode="llm_generate",
+        prompt={"identity": persona},
+    )
+    return ExpertTemplate(name=name, config=config, is_builtin=True, description=persona)
+
+
+def _make_registry_with_experts() -> ExpertTemplateRegistry:
+    """创建包含预注册专家模板的注册中心"""
+    registry = ExpertTemplateRegistry()
+    registry.register(_make_expert_template("elon_musk", persona="Elon Musk"))
+    registry.register(_make_expert_template("jeff_bezos", persona="Jeff Bezos"))
+    registry.register(_make_expert_template("allenzhang", persona="张小龙"))
+    return registry
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 1. 前缀路由准确性 (Prefix Routing Accuracy)
+# ═══════════════════════════════════════════════════════════════════════════
+
+
+class TestPrefixRouting:
+    """回测：@board 前缀路由准确性"""
+
+    def setup_method(self) -> None:
+        self.router = BoardRouter(template_registry=ExpertTemplateRegistry())
+
+    @pytest.mark.parametrize(
+        "content,expected_matched,expected_board_mode",
+        [
+            ("@board 讨论主题", True, True),
+            ("@board:elon_musk,jeff_bezos 主题", True, True),
+            ("@board:private_board 主题", True, True),
+            ("@board", True, True),
+            ("讨论一下市场策略", False, False),
+            ("@team:analyst,writer 任务", False, False),
+            ("@skill:react_agent 查看ip", False, False),
+            ("普通聊天消息", False, False),
+        ],
+        ids=[
+            "board_default",
+            "board_explicit_experts",
+            "board_explicit_template",
+            "board_no_topic",
+            "plain_text",
+            "team_prefix",
+            "skill_prefix",
+            "chitchat",
+        ],
+    )
+    def test_prefix_matching(
+        self, content: str, expected_matched: bool, expected_board_mode: bool
+    ) -> None:
+        """验证 @board 前缀匹配准确性"""
+        result = self.router.resolve(content)
+        assert result.matched == expected_matched, (
+            f"matched mismatch for {content!r}: "
+            f"expected {expected_matched}, got {result.matched}"
+        )
+        assert result.board_mode == expected_board_mode, (
+            f"board_mode mismatch for {content!r}: "
+            f"expected {expected_board_mode}, got {result.board_mode}"
+        )
+
+    def test_regex_pattern_directly(self) -> None:
+        """直接测试正则表达式 BOARD_PREFIX_PATTERN"""
+        # 匹配 @board
+        m = BOARD_PREFIX_PATTERN.match("@board 主题")
+        assert m is not None
+        assert m.group(1) is None  # no expert list
+        assert m.group(2).strip() == "主题"
+
+        # 匹配 @board:experts
+        m = BOARD_PREFIX_PATTERN.match("@board:a,b 主题")
+        assert m is not None
+        assert m.group(1) == "a,b"
+        assert m.group(2).strip() == "主题"
+
+        # 不匹配 @team
+        m = BOARD_PREFIX_PATTERN.match("@team task")
+        assert m is None
+
+    def test_default_template_uses_private_board(self) -> None:
+        """@board 无指定专家时应使用 private_board 默认模板"""
+        result = self.router.resolve("@board 讨论主题")
+        assert result.use_default_template is True
+        assert result.match_method == "explicit_board"
+
+    def test_explicit_private_board_template(self) -> None:
+        """@board:private_board 应显式使用默认模板"""
+        result = self.router.resolve("@board:private_board 讨论主题")
+        assert result.use_default_template is True
+        assert result.board_mode is True
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 2. 主题提取准确性 (Topic Extraction Accuracy)
+# ═══════════════════════════════════════════════════════════════════════════
+
+
+class TestTopicExtraction:
+    """回测：讨论主题提取准确性"""
+
+    def setup_method(self) -> None:
+        self.router = BoardRouter(template_registry=ExpertTemplateRegistry())
+
+    @pytest.mark.parametrize(
+        "content,expected_topic",
+        [
+            ("@board 讨论是否应该进入东南亚市场", "讨论是否应该进入东南亚市场"),
+            ("@board AI产品定价策略应该怎么做", "AI产品定价策略应该怎么做"),
+            ("@board:elon_musk,jeff_bezos 火星商业化方案", "火星商业化方案"),
+            ("@board:private_board 创业融资策略", "创业融资策略"),
+            ("@board", ""),
+            ("@board   ", ""),
+        ],
+        ids=[
+            "default_chinese",
+            "default_chinese_2",
+            "explicit_experts",
+            "explicit_template",
+            "empty_topic",
+            "whitespace_only",
+        ],
+    )
+    def test_topic_extraction(self, content: str, expected_topic: str) -> None:
+        """验证从 @board 输入中提取讨论主题"""
+        result = self.router.resolve(content)
+        actual = " ".join(result.topic.split())
+        assert actual == expected_topic, (
+            f"topic mismatch for {content!r}: "
+            f"expected {expected_topic!r}, got {actual!r}"
+        )
+
+    def test_colon_no_experts(self) -> None:
+        """@board: topic（冒号后无专家名）的边界行为"""
+        # \S+ requires non-whitespace after colon, so ": topic" → group(1)=None
+        # The colon becomes part of the topic
+        result = self.router.resolve("@board: topic")
+        assert result.matched is True
+        assert result.board_mode is True
+        # group(1) is None because \S+ doesn't match " " (space after colon)
+        assert result.use_default_template is True
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 3. 专家名验证 (Expert Name Validation)
+# ═══════════════════════════════════════════════════════════════════════════
+
+
+class TestExpertNameValidation:
+    """回测：专家名格式验证与上限"""
+
+    def setup_method(self) -> None:
+        self.router = BoardRouter(template_registry=ExpertTemplateRegistry())
+
+    @pytest.mark.parametrize(
+        "content,expected_count",
+        [
+            ("@board:elon_musk,jeff_bezos 主题", 2),
+            ("@board:elon_musk,jeff_bezos,allenzhang 主题", 3),
+            ("@board:charlie_munger,warren_buffett,paul_graham 主题", 3),
+            ("@board:a,b,c,d,e,f,g,h,i,j 主题", 10),
+            ("@board:a,b,c,d,e,f,g,h,i,j,k 主题", 10),  # 11 → capped to 10
+        ],
+        ids=["two", "three", "three_alt", "exactly_ten", "eleven_capped"],
+    )
+    def test_valid_expert_count(self, content: str, expected_count: int) -> None:
+        """验证有效专家名数量"""
+        result = self.router.resolve(content)
+        assert len(result.specified_experts) == expected_count, (
+            f"expert count mismatch for {content!r}: "
+            f"expected {expected_count}, got {len(result.specified_experts)}"
+        )
+
+    def test_invalid_names_rejected(self) -> None:
+        """全部无效专家名时应回退到默认模板"""
+        # @#$ are not valid per _EXPERT_NAME_RE = ^[a-zA-Z0-9_-]{1,64}$
+        result = self.router.resolve("@board:@#$ 主题")
+        # All names invalid → should fall back to default template
+        assert result.use_default_template is True, (
+            "should fall back to default template when all names are invalid"
+        )
+        assert len(result.specified_experts) > 0, (
+            f"default template members should be loaded, got: {result.specified_experts}"
+        )
+
+    def test_max_experts_constant(self) -> None:
+        """MAX_EXPERTS 应为 10"""
+        assert MAX_EXPERTS == 10
+
+    def test_resolve_expert_configs_first_is_moderator(self) -> None:
+        """resolve_expert_configs 应将首位专家设为主持人"""
+        result = self.router.resolve("@board:expert_a,expert_b 主题")
+        configs = self.router.resolve_expert_configs(result.specified_experts)
+        assert len(configs) == 2
+        assert configs[0].is_lead is True
+        assert configs[1].is_lead is False
+
+    def test_resolve_expert_configs_dynamic_generation(self) -> None:
+        """未注册的专家名应动态生成 ExpertConfig"""
+        result = self.router.resolve("@board:dynamic_expert 主题")
+        configs = self.router.resolve_expert_configs(result.specified_experts)
+        assert len(configs) == 1
+        assert configs[0].name == "dynamic_expert"
+        assert configs[0].is_lead is True
+
+    def test_mixed_valid_invalid_names(self) -> None:
+        """混合有效+无效专家名：无效名被过滤，有效名保留"""
+        result = self.router.resolve("@board:elon_musk,@#$,jeff_bezos 主题")
+        assert result.specified_experts == ["elon_musk", "jeff_bezos"], (
+            f"invalid names should be filtered, got: {result.specified_experts}"
+        )
+        assert result.use_default_template is False
+
+    @pytest.mark.parametrize(
+        "name_length,expected_valid",
+        [
+            (64, True),   # exactly 64 chars — valid
+            (65, False),  # 65 chars — invalid (exceeds {1,64})
+        ],
+        ids=["boundary_64_valid", "boundary_65_invalid"],
+    )
+    def test_expert_name_length_boundary(self, name_length: int, expected_valid: bool) -> None:
+        """专家名长度边界：64 字符有效，65 字符无效"""
+        name = "a" * name_length
+        result = self.router.resolve(f"@board:{name} 主题")
+        if expected_valid:
+            assert len(result.specified_experts) == 1
+            assert result.specified_experts[0] == name
+        else:
+            # All names invalid → fallback to default template
+            assert result.use_default_template is True
+
+    def test_resolve_expert_configs_no_template_mutation(self) -> None:
+        """resolve_expert_configs 不应修改注册表中的共享模板配置"""
+        registry = _make_registry_with_experts()
+        router = BoardRouter(template_registry=registry)
+
+        # First call: elon_musk is lead
+        configs1 = router.resolve_expert_configs(["elon_musk", "jeff_bezos"])
+        assert configs1[0].is_lead is True
+        assert configs1[1].is_lead is False
+
+        # Second call: jeff_bezos is lead (reversed order)
+        configs2 = router.resolve_expert_configs(["jeff_bezos", "elon_musk"])
+        assert configs2[0].is_lead is True
+        assert configs2[1].is_lead is False
+
+        # Verify first call's configs are NOT mutated by the second call
+        assert configs1[0].is_lead is True, (
+            "first call's is_lead was mutated by second call (shared template bug)"
+        )
+        assert configs1[1].is_lead is False
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 4. 模板加载 (Template Loading)
+# ═══════════════════════════════════════════════════════════════════════════
+
+
+class TestTemplateLoading:
+    """回测：专家模板加载"""
+
+    def test_default_template_name(self) -> None:
+        """DEFAULT_TEMPLATE 应为 'private_board'"""
+        assert DEFAULT_TEMPLATE == "private_board"
+
+    def test_fallback_default_members(self) -> None:
+        """空注册表时应返回回退默认成员列表"""
+        router = BoardRouter(template_registry=ExpertTemplateRegistry())
+        members = router._load_default_template_members()
+        assert len(members) > 0
+        assert len(members) <= MAX_EXPERTS
+        # 回退列表应包含预设专家
+        assert "elon_musk" in members
+        assert "jeff_bezos" in members
+
+    def test_default_template_members_from_registry(self) -> None:
+        """注册表中存在 private_board 模板时应从模板加载成员"""
+        registry = _make_registry_with_experts()
+        # Register a private_board template with bound_skills as members
+        board_config = ExpertConfig(
+            name="private_board",
+            agent_type="expert",
+            persona="私董会模板",
+            bound_skills=["elon_musk", "jeff_bezos", "allenzhang"],
+            task_mode="llm_generate",
+            prompt={"identity": "Private Board"},
+        )
+        registry.register(
+            ExpertTemplate(
+                name="private_board",
+                config=board_config,
+                is_builtin=True,
+                description="默认私董会模板",
+            )
+        )
+        router = BoardRouter(template_registry=registry)
+        members = router._load_default_template_members()
+        assert members == ["elon_musk", "jeff_bezos", "allenzhang"]
+        assert len(members) <= MAX_EXPERTS
+
+    def test_load_from_configs_directory(self) -> None:
+        """从 configs/experts/ 目录加载 YAML 模板"""
+        from pathlib import Path
+
+        experts_dir = Path(__file__).parent.parent.parent.parent / "configs" / "experts"
+        if not experts_dir.is_dir():
+            pytest.skip(f"configs/experts/ not found at {experts_dir}")
+
+        registry = ExpertTemplateRegistry()
+        loaded = registry.load_from_directory(str(experts_dir))
+        # load_from_directory returns a list[ExpertTemplate]
+        assert isinstance(loaded, list)
+        assert len(loaded) >= 5, f"expected ≥5 templates, got {len(loaded)}"
+
+        # 验证关键专家存在
+        names = {t.config.name for t in loaded}
+        expected_names = {"elon_musk", "jeff_bezos", "allenzhang", "charlie_munger"}
+        assert expected_names.issubset(names), (
+            f"missing expected experts: {expected_names - names}"
+        )
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 5. 停止命令检测 (Stop Command Detection)
+# ═══════════════════════════════════════════════════════════════════════════
+
+
+class TestStopCommandDetection:
+    """回测：停止命令检测"""
+
+    @pytest.mark.parametrize(
+        "command,expected_stop",
+        [
+            ("/stop", True),
+            ("停止讨论", True),
+            ("stop", True),
+            ("结束讨论", True),
+            ("继续讨论", False),
+            ("", False),
+            ("请继续", False),
+            ("STOP", False),  # case-sensitive
+        ],
+        ids=[
+            "slash_stop",
+            "chinese_stop",
+            "english_stop",
+            "chinese_end",
+            "continue",
+            "empty",
+            "please_continue",
+            "uppercase_not_match",
+        ],
+    )
+    def test_stop_command_detection(self, command: str, expected_stop: bool) -> None:
+        """验证停止命令检测"""
+        is_stop = command.strip() in BoardOrchestrator.STOP_COMMANDS
+        assert is_stop == expected_stop, (
+            f"stop detection mismatch for {command!r}: "
+            f"expected {expected_stop}, got {is_stop}"
+        )
+
+    def test_stop_commands_count(self) -> None:
+        """STOP_COMMANDS 应包含 4 个命令"""
+        assert len(BoardOrchestrator.STOP_COMMANDS) == 4
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 6. BoardTeam 状态机 (BoardTeam State Machine)
+# ═══════════════════════════════════════════════════════════════════════════
+
+
+class TestBoardTeamStateMachine:
+    """回测：BoardTeam 生命周期状态"""
+
+    def test_initial_status_is_forming(self) -> None:
+        """新建 BoardTeam 初始状态应为 FORMING"""
+        team = BoardTeam()
+        assert team.status == BoardStatus.FORMING
+
+    def test_status_transitions(self) -> None:
+        """状态转换：FORMING → DISCUSSING → CONCLUDING → COMPLETED → DISSOLVED"""
+        team = BoardTeam()
+        assert team.status == BoardStatus.FORMING
+
+        team.set_status(BoardStatus.DISCUSSING)
+        assert team.status == BoardStatus.DISCUSSING
+
+        team.set_status(BoardStatus.CONCLUDING)
+        assert team.status == BoardStatus.CONCLUDING
+
+        team.set_status(BoardStatus.COMPLETED)
+        assert team.status == BoardStatus.COMPLETED
+
+        team.set_status(BoardStatus.DISSOLVED)
+        assert team.status == BoardStatus.DISSOLVED
+
+    def test_team_id_is_unique(self) -> None:
+        """每个 BoardTeam 应有唯一 team_id"""
+        team1 = BoardTeam()
+        team2 = BoardTeam()
+        assert team1.team_id != team2.team_id
+
+    def test_team_channel_format(self) -> None:
+        """team_channel 应为 'board:{team_id}' 格式"""
+        team = BoardTeam()
+        assert team.team_channel == f"board:{team.team_id}"
+
+    def test_max_rounds_configurable(self) -> None:
+        """max_rounds 应可配置"""
+        team = BoardTeam(max_rounds=3)
+        assert team.max_rounds == 3
+
+        team2 = BoardTeam(max_rounds=10)
+        assert team2.max_rounds == 10
+
+    def test_default_max_rounds(self) -> None:
+        """默认 max_rounds 应为 5"""
+        team = BoardTeam()
+        assert team.max_rounds == 5
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 7. 讨论历史管理 (Discussion History Management)
+# ═══════════════════════════════════════════════════════════════════════════
+
+
+class TestDiscussionHistory:
+    """回测：讨论历史管理"""
+
+    @pytest.fixture
+    def team(self) -> BoardTeam:
+        return BoardTeam()
+
+    async def test_add_to_history(self, team: BoardTeam) -> None:
+        """添加发言到历史"""
+        await team.add_to_history(round=1, expert_name="elon_musk", content="First speech")
+        history = team.history
+        assert len(history) == 1
+        assert history[0]["round"] == 1
+        assert history[0]["expert_name"] == "elon_musk"
+        assert history[0]["content"] == "First speech"
+        assert history[0]["role"] == "expert"
+
+    async def test_add_moderator_speech(self, team: BoardTeam) -> None:
+        """添加主持人发言"""
+        await team.add_to_history(
+            round=1, expert_name="moderator", content="Summary", role="moderator"
+        )
+        history = team.history
+        assert history[0]["role"] == "moderator"
+
+    async def test_add_user_intervention(self, team: BoardTeam) -> None:
+        """添加用户干预"""
+        await team.add_user_intervention("Please focus on cost")
+        history = team.history
+        assert len(history) == 1
+        assert history[0]["role"] == "user"
+        assert history[0]["expert_name"] == "user"
+        assert history[0]["content"] == "Please focus on cost"
+
+    async def test_history_text_format(self, team: BoardTeam) -> None:
+        """历史文本格式化"""
+        await team.add_to_history(round=1, expert_name="elon_musk", content="Speech 1")
+        await team.add_to_history(
+            round=1, expert_name="moderator", content="Round 1 summary", role="moderator"
+        )
+        await team.add_to_history(round=2, expert_name="jeff_bezos", content="Speech 2")
+
+        text = team.get_history_text()
+        assert "第1轮" in text
+        assert "elon_musk" in text
+        assert "Speech 1" in text
+        assert "主持人小结" in text
+        assert "第2轮" in text
+        assert "jeff_bezos" in text
+
+    async def test_history_text_up_to_round(self, team: BoardTeam) -> None:
+        """按轮次过滤历史文本"""
+        await team.add_to_history(round=1, expert_name="a", content="R1")
+        await team.add_to_history(round=2, expert_name="b", content="R2")
+        await team.add_to_history(round=3, expert_name="c", content="R3")
+
+        text_r2 = team.get_history_text(up_to_round=2)
+        assert "R1" in text_r2
+        assert "R2" in text_r2
+        assert "R3" not in text_r2
+
+    async def test_consume_user_interventions(self, team: BoardTeam) -> None:
+        """消费用户干预列表"""
+        await team.add_user_intervention("Intervention 1")
+        await team.add_user_intervention("Intervention 2")
+
+        interventions = team.consume_user_interventions()
+        assert len(interventions) == 2
+        assert "Intervention 1" in interventions
+        assert "Intervention 2" in interventions
+
+        # 二次消费应为空
+        assert team.consume_user_interventions() == []
+
+    def test_empty_history_text(self, team: BoardTeam) -> None:
+        """空历史的文本应为空字符串"""
+        assert team.get_history_text() == ""
+
+    def test_increment_round(self, team: BoardTeam) -> None:
+        """轮次递增"""
+        assert team.current_round == 0
+        r1 = team.increment_round()
+        assert r1 == 1
+        assert team.current_round == 1
+        r2 = team.increment_round()
+        assert r2 == 2
+        assert team.current_round == 2
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 8. BoardRoutingResult 数据结构 (Data Structure Integrity)
+# ═══════════════════════════════════════════════════════════════════════════
+
+
+class TestBoardRoutingResult:
+    """回测：BoardRoutingResult 数据结构完整性"""
+
+    def test_default_values(self) -> None:
+        """默认值应为空/False"""
+        result = BoardRoutingResult()
+        assert result.matched is False
+        assert result.board_mode is False
+        assert result.specified_experts == []
+        assert result.topic == ""
+        assert result.use_default_template is False
+        assert result.match_method == ""
+
+    def test_matched_result_fields(self) -> None:
+        """匹配结果的字段应正确填充"""
+        router = BoardRouter(template_registry=ExpertTemplateRegistry())
+        result = router.resolve("@board:elon_musk,jeff_bezos 讨论主题")
+        assert result.matched is True
+        assert result.board_mode is True
+        assert result.specified_experts == ["elon_musk", "jeff_bezos"]
+        assert result.topic == "讨论主题"
+        assert result.use_default_template is False
+        assert result.match_method == "explicit_board"
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# 9. 回归测试 — 确保不破坏现有路由 (Regression: No Interference)
+# ═══════════════════════════════════════════════════════════════════════════
+
+
+class TestNoInterferenceRegression:
+    """回归测试：@board 路由不应干扰其他前缀路由"""
+
+    def setup_method(self) -> None:
+        self.router = BoardRouter(template_registry=ExpertTemplateRegistry())
+
+    @pytest.mark.parametrize(
+        "content",
+        [
+            "@team:analyst,writer 协作完成任务",
+            "@skill:react_agent 查看ip",
+            "@skill:chat_only 你好",
+            "你好",
+            "搜索golang教程",
+            "执行ls命令",
+        ],
+        ids=[
+            "team_prefix",
+            "skill_react_prefix",
+            "skill_chat_prefix",
+            "greeting",
+            "search",
+            "shell",
+        ],
+    )
+    def test_non_board_inputs_not_matched(self, content: str) -> None:
+        """非 @board 输入不应被 BoardRouter 匹配"""
+        result = self.router.resolve(content)
+        assert result.matched is False
+        assert result.board_mode is False
+        assert result.use_default_template is False
diff --git a/tests/unit/experts/test_board_orchestrator.py b/tests/unit/experts/test_board_orchestrator.py
new file mode 100644
index 0000000..865464f
--- /dev/null
+++ b/tests/unit/experts/test_board_orchestrator.py
@@ -0,0 +1,339 @@
+"""BoardOrchestrator 单元测试 — 私董会讨论引擎"""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from agentkit.experts.board import BoardTeam
+from agentkit.experts.board_orchestrator import BoardOrchestrator
+from agentkit.experts.config import ExpertConfig
+from agentkit.experts.expert import Expert
+
+
+# ── 辅助函数 ──────────────────────────────────────────────
+
+
+def _make_expert_config(
+    name: str = "test_expert",
+    is_lead: bool = False,
+) -> ExpertConfig:
+    """创建测试用 ExpertConfig"""
+    return ExpertConfig(
+        name=name,
+        agent_type="expert",
+        persona=f"测试专家 {name}",
+        thinking_style="analytical",
+        speaking_style="直接",
+        decision_framework="分析",
+        bound_skills=[],
+        is_lead=is_lead,
+        task_mode="llm_generate",
+        prompt={"identity": name},
+        avatar="🎭",
+        color="#FF0000",
+    )
+
+
+def _make_mock_expert(
+    name: str = "test_expert",
+    is_lead: bool = False,
+    is_active: bool = True,
+) -> MagicMock:
+    """创建 mock Expert 实例"""
+    config = _make_expert_config(name, is_lead)
+    expert = MagicMock(spec=Expert)
+    expert.config = config
+    expert.is_active = is_active
+    expert.agent = MagicMock()
+    expert.agent._llm_gateway = MagicMock()
+    return expert
+
+
+def _make_mock_gateway(response_content: str = "测试回复") -> AsyncMock:
+    """创建 mock LLM gateway"""
+    gateway = AsyncMock()
+    response = MagicMock()
+    response.content = response_content
+    gateway.chat = AsyncMock(return_value=response)
+    return gateway
+
+
+def _setup_team_with_experts(
+    team: BoardTeam,
+    experts: list[MagicMock],
+    moderator_name: str | None = None,
+) -> None:
+    """设置 BoardTeam 的内部专家字典"""
+    for expert in experts:
+        team._experts[expert.config.name] = expert
+    if moderator_name:
+        team._moderator_name = moderator_name
+    elif experts:
+        team._moderator_name = experts[0].config.name
+
+
+# ── BoardOrchestrator 初始化测试 ──────────────────────────
+
+
+class TestBoardOrchestratorInit:
+    """BoardOrchestrator 初始化测试"""
+
+    def test_init(self):
+        """初始化"""
+        team = BoardTeam()
+        orchestrator = BoardOrchestrator(team=team)
+        assert orchestrator._team is team
+
+    def test_stop_commands(self):
+        """停止命令集合"""
+        assert "/stop" in BoardOrchestrator.STOP_COMMANDS
+        assert "停止讨论" in BoardOrchestrator.STOP_COMMANDS
+        assert "stop" in BoardOrchestrator.STOP_COMMANDS
+        assert "结束讨论" in BoardOrchestrator.STOP_COMMANDS
+
+
+# ── BoardOrchestrator.execute 测试 ────────────────────────
+
+
+class TestBoardOrchestratorExecute:
+    """BoardOrchestrator.execute 执行流程测试"""
+
+    @pytest.mark.asyncio
+    async def test_execute_no_active_experts(self):
+        """无活跃专家时返回失败"""
+        team = BoardTeam()
+        orchestrator = BoardOrchestrator(team=team)
+
+        result = await orchestrator.execute("测试主题")
+
+        assert result["status"] == "failed"
+        assert "error" in result
+        assert "No active expert" in result["error"]
+
+    @pytest.mark.asyncio
+    async def test_execute_success(self):
+        """正常执行流程"""
+        team = BoardTeam(max_rounds=2)
+        moderator = _make_mock_expert("moderator", is_lead=True)
+        member1 = _make_mock_expert("member1")
+        member2 = _make_mock_expert("member2")
+
+        _setup_team_with_experts(team, [moderator, member1, member2], "moderator")
+
+        # Mock LLM gateway
+        gateway = _make_mock_gateway("测试发言内容")
+        moderator.agent._llm_gateway = gateway
+        member1.agent._llm_gateway = gateway
+        member2.agent._llm_gateway = gateway
+
+        orchestrator = BoardOrchestrator(team=team)
+
+        # Mock broadcast_event to avoid transport issues
+        with patch.object(orchestrator, "_broadcast_event", new_callable=AsyncMock):
+            result = await orchestrator.execute("测试主题")
+
+        assert result["status"] == "completed"
+        assert result["total_rounds"] == 2
+        assert "summary" in result
+
+    @pytest.mark.asyncio
+    async def test_execute_with_stop_command(self):
+        """用户发送停止命令时终止讨论"""
+        team = BoardTeam(max_rounds=5)
+        moderator = _make_mock_expert("moderator", is_lead=True)
+        member1 = _make_mock_expert("member1")
+
+        _setup_team_with_experts(team, [moderator, member1], "moderator")
+
+        gateway = _make_mock_gateway("测试内容")
+        moderator.agent._llm_gateway = gateway
+        member1.agent._llm_gateway = gateway
+
+        # 添加停止命令作为用户干预
+        await team.add_user_intervention("/stop")
+
+        orchestrator = BoardOrchestrator(team=team)
+
+        with patch.object(orchestrator, "_broadcast_event", new_callable=AsyncMock):
+            result = await orchestrator.execute("测试主题")
+
+        # 应该在第1轮就停止（因为 consume_user_interventions 在循环开始时检查）
+        assert result["status"] == "completed"
+        assert result["total_rounds"] <= 1
+
+    @pytest.mark.asyncio
+    async def test_execute_llm_unavailable_uses_fallback(self):
+        """LLM 不可用时使用回退文本，仍正常完成"""
+        team = BoardTeam(max_rounds=2)
+        moderator = _make_mock_expert("moderator", is_lead=True)
+        _setup_team_with_experts(team, [moderator], "moderator")
+
+        # Mock gateway to raise exception for all LLM calls
+        gateway = AsyncMock()
+        gateway.chat = AsyncMock(side_effect=Exception("LLM 不可用"))
+        moderator.agent._llm_gateway = gateway
+
+        orchestrator = BoardOrchestrator(team=team)
+
+        with patch.object(orchestrator, "_broadcast_event", new_callable=AsyncMock):
+            result = await orchestrator.execute("测试主题")
+
+        # LLM 不可用时，orchestrator 使用回退文本，仍正常完成
+        assert result["status"] == "completed"
+        assert result["total_rounds"] == 2
+        # summary 应包含回退文本
+        assert "summary" in result
+
+    @pytest.mark.asyncio
+    async def test_execute_unexpected_exception(self):
+        """非 LLM 异常时进入回退处理"""
+        team = BoardTeam(max_rounds=2)
+        moderator = _make_mock_expert("moderator", is_lead=True)
+        _setup_team_with_experts(team, [moderator], "moderator")
+
+        # Provide a working gateway for fallback conclusion
+        gateway = _make_mock_gateway("回退总结")
+        moderator.agent._llm_gateway = gateway
+
+        orchestrator = BoardOrchestrator(team=team)
+
+        # Mock _generate_moderator_opening to raise an unexpected exception
+        with patch.object(
+            orchestrator,
+            "_generate_moderator_opening",
+            new_callable=AsyncMock,
+            side_effect=RuntimeError("Unexpected error"),
+        ):
+            with patch.object(orchestrator, "_broadcast_event", new_callable=AsyncMock):
+                result = await orchestrator.execute("测试主题")
+
+        # 非预期异常应返回 failed 状态
+        assert result["status"] == "failed"
+        assert "error" in result
+
+
+# ── BoardOrchestrator._has_stop_command 测试 ──────────────
+
+
+class TestBoardOrchestratorStopCommand:
+    """BoardOrchestrator 停止命令检测测试"""
+
+    def test_has_stop_command_true(self):
+        """检测到停止命令"""
+        team = BoardTeam()
+        orchestrator = BoardOrchestrator(team=team)
+
+        assert orchestrator._has_stop_command(["/stop"]) is True
+        assert orchestrator._has_stop_command(["停止讨论"]) is True
+        assert orchestrator._has_stop_command(["some text", "stop"]) is True
+        assert orchestrator._has_stop_command(["结束讨论"]) is True
+
+    def test_has_stop_command_false(self):
+        """无停止命令"""
+        team = BoardTeam()
+        orchestrator = BoardOrchestrator(team=team)
+
+        assert orchestrator._has_stop_command([]) is False
+        assert orchestrator._has_stop_command(["继续讨论"]) is False
+        assert orchestrator._has_stop_command(["请多说一些"]) is False
+
+    def test_has_stop_command_case_insensitive(self):
+        """停止命令大小写不敏感"""
+        team = BoardTeam()
+        orchestrator = BoardOrchestrator(team=team)
+
+        assert orchestrator._has_stop_command(["STOP"]) is True
+        assert orchestrator._has_stop_command(["Stop"]) is True
+
+
+# ── BoardOrchestrator._get_llm_gateway 测试 ───────────────
+
+
+class TestBoardOrchestratorGetGateway:
+    """BoardOrchestrator._get_llm_gateway 测试"""
+
+    def test_get_gateway_from_expert(self):
+        """从指定专家获取 gateway"""
+        team = BoardTeam()
+        orchestrator = BoardOrchestrator(team=team)
+
+        expert = _make_mock_expert("test")
+        gateway = orchestrator._get_llm_gateway(expert)
+
+        assert gateway is not None
+
+    def test_get_gateway_fallback_to_active_experts(self):
+        """从其他活跃专家回退获取 gateway"""
+        team = BoardTeam()
+        moderator = _make_mock_expert("moderator", is_lead=True)
+        # moderator 没有 gateway
+        moderator.agent._llm_gateway = None
+
+        member = _make_mock_expert("member")
+        _setup_team_with_experts(team, [moderator, member], "moderator")
+
+        orchestrator = BoardOrchestrator(team=team)
+
+        # 应该从 member 回退获取
+        gateway = orchestrator._get_llm_gateway(moderator)
+        assert gateway is not None
+
+    def test_get_gateway_none_when_no_gateway(self):
+        """无可用 gateway 时返回 None"""
+        team = BoardTeam()
+        moderator = _make_mock_expert("moderator", is_lead=True)
+        moderator.agent._llm_gateway = None
+        _setup_team_with_experts(team, [moderator], "moderator")
+
+        orchestrator = BoardOrchestrator(team=team)
+        gateway = orchestrator._get_llm_gateway(moderator)
+        assert gateway is None
+
+
+# ── BoardOrchestrator._broadcast_event 测试 ───────────────
+
+
+class TestBoardOrchestratorBroadcast:
+    """BoardOrchestrator._broadcast_event 测试"""
+
+    @pytest.mark.asyncio
+    async def test_broadcast_event_with_transport(self):
+        """有 transport 时广播事件"""
+        team = BoardTeam()
+        orchestrator = BoardOrchestrator(team=team)
+
+        # Mock transport
+        team._handoff_transport = AsyncMock()
+        team._handoff_transport.send = AsyncMock()
+
+        await orchestrator._broadcast_event("board_started", {"topic": "测试"})
+
+        team._handoff_transport.send.assert_called_once()
+        call_args = team._handoff_transport.send.call_args
+        assert call_args[0][0] == team.team_channel
+        assert call_args[0][1]["type"] == "board_started"
+        assert call_args[0][1]["topic"] == "测试"
+
+    @pytest.mark.asyncio
+    async def test_broadcast_event_no_transport(self):
+        """无 transport 时不报错"""
+        team = BoardTeam()
+        team._handoff_transport = None
+        orchestrator = BoardOrchestrator(team=team)
+
+        # 不应抛出异常
+        await orchestrator._broadcast_event("board_started", {"topic": "测试"})
+
+    @pytest.mark.asyncio
+    async def test_broadcast_event_transport_error(self):
+        """transport 错误时不传播异常"""
+        team = BoardTeam()
+        orchestrator = BoardOrchestrator(team=team)
+
+        team._handoff_transport = AsyncMock()
+        team._handoff_transport.send = AsyncMock(side_effect=Exception("Transport error"))
+
+        # 不应抛出异常
+        await orchestrator._broadcast_event("board_started", {"topic": "测试"})
diff --git a/tests/unit/experts/test_board_router.py b/tests/unit/experts/test_board_router.py
new file mode 100644
index 0000000..72a78d4
--- /dev/null
+++ b/tests/unit/experts/test_board_router.py
@@ -0,0 +1,301 @@
+"""BoardRouter 单元测试 — @board 前缀路由解析"""
+
+from __future__ import annotations
+
+from agentkit.experts.board_router import (
+    BOARD_PREFIX_PATTERN,
+    BoardRouter,
+    BoardRoutingResult,
+    MAX_EXPERTS,
+)
+from agentkit.experts.config import ExpertConfig, ExpertTemplate
+from agentkit.experts.registry import ExpertTemplateRegistry
+
+
+# ── 辅助函数 ──────────────────────────────────────────────
+
+
+def _make_expert_template(
+    name: str = "test_expert",
+    persona: str = "测试专家",
+    speaking_style: str = "直接",
+    decision_framework: str = "分析",
+) -> ExpertTemplate:
+    """创建测试用 ExpertTemplate"""
+    config = ExpertConfig(
+        name=name,
+        agent_type="expert",
+        persona=persona,
+        thinking_style="analytical",
+        speaking_style=speaking_style,
+        decision_framework=decision_framework,
+        bound_skills=[],
+        task_mode="llm_generate",
+        prompt={"identity": persona},
+    )
+    return ExpertTemplate(
+        name=name,
+        config=config,
+        is_builtin=True,
+        description=f"{name} 测试模板",
+    )
+
+
+def _make_registry_with_experts() -> ExpertTemplateRegistry:
+    """创建包含预注册专家模板的注册中心"""
+    registry = ExpertTemplateRegistry()
+    registry.register(_make_expert_template("elon_musk", persona="Elon Musk"))
+    registry.register(_make_expert_template("jeff_bezos", persona="Jeff Bezos"))
+    registry.register(_make_expert_template("allenzhang", persona="张小龙"))
+
+    # 注册 private_board 模板（使用 bound_skills 存储成员列表）
+    board_config = ExpertConfig(
+        name="private_board",
+        agent_type="expert",
+        persona="私董会模板",
+        bound_skills=["elon_musk", "jeff_bezos", "allenzhang"],
+        task_mode="llm_generate",
+        prompt={"identity": "Private Board"},
+    )
+    registry.register(ExpertTemplate(
+        name="private_board",
+        config=board_config,
+        is_builtin=True,
+        description="默认私董会模板",
+    ))
+    return registry
+
+
+# ── BOARD_PREFIX_PATTERN 正则测试 ──────────────────────────
+
+
+class TestBoardPrefixPattern:
+    """BOARD_PREFIX_PATTERN 正则匹配测试"""
+
+    def test_match_board_only(self):
+        """@board 前缀匹配（无专家指定）"""
+        match = BOARD_PREFIX_PATTERN.match("@board 讨论AI未来")
+        assert match is not None
+        assert match.group(1) is None
+        assert match.group(2) == "讨论AI未来"
+
+    def test_match_board_with_experts(self):
+        """@board:expert1,expert2 格式匹配"""
+        match = BOARD_PREFIX_PATTERN.match("@board:elon_musk,jeff_bezos SpaceX上市")
+        assert match is not None
+        assert match.group(1) == "elon_musk,jeff_bezos"
+        assert match.group(2) == "SpaceX上市"
+
+    def test_match_board_with_template_name(self):
+        """@board:private_board 显式使用模板"""
+        match = BOARD_PREFIX_PATTERN.match("@board:private_board 讨论主题")
+        assert match is not None
+        assert match.group(1) == "private_board"
+        assert match.group(2) == "讨论主题"
+
+    def test_no_match_regular_input(self):
+        """普通输入不匹配"""
+        assert BOARD_PREFIX_PATTERN.match("你好，今天天气怎么样") is None
+        assert BOARD_PREFIX_PATTERN.match("@team 分析数据") is None
+        assert BOARD_PREFIX_PATTERN.match("@skill:search 搜索内容") is None
+
+    def test_match_board_with_multiline_topic(self):
+        """多行主题匹配"""
+        match = BOARD_PREFIX_PATTERN.match("@board 第一行\n第二行")
+        assert match is not None
+        assert "第一行" in match.group(2)
+        assert "第二行" in match.group(2)
+
+
+# ── BoardRouter.resolve 测试 ──────────────────────────────
+
+
+class TestBoardRouterResolve:
+    """BoardRouter.resolve 路由解析测试"""
+
+    def test_resolve_default_template(self):
+        """@board 主题 → 使用默认模板"""
+        router = BoardRouter(template_registry=_make_registry_with_experts())
+        result = router.resolve("@board 如何看待AI对教育的影响")
+
+        assert result.matched is True
+        assert result.board_mode is True
+        assert result.topic == "如何看待AI对教育的影响"
+        assert result.use_default_template is True
+        assert result.match_method == "explicit_board"
+        assert "elon_musk" in result.specified_experts
+        assert "jeff_bezos" in result.specified_experts
+        assert "allenzhang" in result.specified_experts
+
+    def test_resolve_explicit_template(self):
+        """@board:private_board 主题 → 显式使用默认模板"""
+        router = BoardRouter(template_registry=_make_registry_with_experts())
+        result = router.resolve("@board:private_board 讨论主题")
+
+        assert result.matched is True
+        assert result.use_default_template is True
+        assert result.topic == "讨论主题"
+        assert len(result.specified_experts) == 3
+
+    def test_resolve_specified_experts(self):
+        """@board:expert1,expert2 主题 → 指定专家"""
+        router = BoardRouter(template_registry=_make_registry_with_experts())
+        result = router.resolve("@board:elon_musk,jeff_bezos SpaceX上市问题")
+
+        assert result.matched is True
+        assert result.use_default_template is False
+        assert result.specified_experts == ["elon_musk", "jeff_bezos"]
+        assert result.topic == "SpaceX上市问题"
+
+    def test_resolve_non_board_input(self):
+        """普通输入不匹配"""
+        router = BoardRouter(template_registry=_make_registry_with_experts())
+        result = router.resolve("你好，今天天气怎么样")
+
+        assert result.matched is False
+        assert result.board_mode is False
+        assert result.topic == "你好，今天天气怎么样"
+
+    def test_resolve_empty_topic(self):
+        """@board 无主题 → 空主题"""
+        router = BoardRouter(template_registry=_make_registry_with_experts())
+        result = router.resolve("@board")
+
+        assert result.matched is True
+        assert result.topic == ""
+
+    def test_resolve_invalid_expert_names_filtered(self):
+        """无效专家名被过滤"""
+        router = BoardRouter(template_registry=_make_registry_with_experts())
+        result = router.resolve("@board:elon_musk,invalid@name,jeff_bezos 主题")
+
+        assert result.matched is True
+        assert "elon_musk" in result.specified_experts
+        assert "jeff_bezos" in result.specified_experts
+        assert "invalid@name" not in result.specified_experts
+
+    def test_resolve_max_experts_limit(self):
+        """专家数量超过上限被截断"""
+        router = BoardRouter(template_registry=_make_registry_with_experts())
+        # 构造超过 MAX_EXPERTS 个专家名
+        names = ",".join(f"expert_{i}" for i in range(MAX_EXPERTS + 5))
+        result = router.resolve(f"@board:{names} 讨论主题")
+
+        assert result.matched is True
+        assert len(result.specified_experts) <= MAX_EXPERTS
+
+    def test_resolve_default_template_fallback(self):
+        """无注册中心时使用硬编码回退默认成员"""
+        router = BoardRouter(template_registry=ExpertTemplateRegistry())
+        result = router.resolve("@board 讨论主题")
+
+        assert result.matched is True
+        assert result.use_default_template is True
+        # 回退到硬编码列表
+        assert len(result.specified_experts) > 0
+        assert "elon_musk" in result.specified_experts
+
+
+# ── BoardRouter.resolve_expert_configs 测试 ────────────────
+
+
+class TestBoardRouterResolveConfigs:
+    """BoardRouter.resolve_expert_configs 配置解析测试"""
+
+    def test_resolve_configs_from_templates(self):
+        """从注册模板解析专家配置"""
+        registry = _make_registry_with_experts()
+        router = BoardRouter(template_registry=registry)
+        configs = router.resolve_expert_configs(["elon_musk", "jeff_bezos"])
+
+        assert len(configs) == 2
+        assert configs[0].name == "elon_musk"
+        assert configs[0].is_lead is True  # 第一个为主持人
+        assert configs[1].name == "jeff_bezos"
+        assert configs[1].is_lead is False
+        # 验证 board 模式字段
+        assert configs[0].speaking_style == "直接"
+        assert configs[0].decision_framework == "分析"
+
+    def test_resolve_configs_dynamic_generation(self):
+        """未注册的专家名动态生成配置"""
+        router = BoardRouter(template_registry=ExpertTemplateRegistry())
+        configs = router.resolve_expert_configs(["unknown_expert"])
+
+        assert len(configs) == 1
+        assert configs[0].name == "unknown_expert"
+        assert configs[0].is_lead is True
+
+    def test_resolve_configs_first_is_moderator(self):
+        """第一个专家自动设为主持人"""
+        registry = _make_registry_with_experts()
+        router = BoardRouter(template_registry=registry)
+        configs = router.resolve_expert_configs(["elon_musk", "jeff_bezos", "allenzhang"])
+
+        assert configs[0].is_lead is True
+        assert configs[1].is_lead is False
+        assert configs[2].is_lead is False
+
+    def test_resolve_configs_empty_list(self):
+        """空列表返回空配置"""
+        router = BoardRouter(template_registry=_make_registry_with_experts())
+        configs = router.resolve_expert_configs([])
+        assert len(configs) == 0
+
+    def test_resolve_configs_invalid_name_skipped(self):
+        """无效专家名被跳过"""
+        router = BoardRouter(template_registry=_make_registry_with_experts())
+        configs = router.resolve_expert_configs(["elon_musk", "invalid@name", "jeff_bezos"])
+
+        assert len(configs) == 2
+        assert configs[0].name == "elon_musk"
+        assert configs[1].name == "jeff_bezos"
+
+    def test_resolve_configs_ensure_at_least_one_lead(self):
+        """确保至少有一个主持人"""
+        registry = _make_registry_with_experts()
+        # 修改模板使 is_lead 全为 False
+        for name in ["elon_musk", "jeff_bezos"]:
+            template = registry.get(name)
+            if template:
+                template.config.is_lead = False
+
+        router = BoardRouter(template_registry=registry)
+        configs = router.resolve_expert_configs(["elon_musk", "jeff_bezos"])
+
+        # 第一个应被强制设为 lead
+        assert configs[0].is_lead is True
+
+
+# ── BoardRoutingResult 数据类测试 ──────────────────────────
+
+
+class TestBoardRoutingResult:
+    """BoardRoutingResult 数据类测试"""
+
+    def test_default_values(self):
+        """默认值"""
+        result = BoardRoutingResult()
+        assert result.matched is False
+        assert result.board_mode is False
+        assert result.specified_experts == []
+        assert result.topic == ""
+        assert result.use_default_template is False
+        assert result.match_method == ""
+
+    def test_custom_values(self):
+        """自定义值"""
+        result = BoardRoutingResult(
+            matched=True,
+            board_mode=True,
+            specified_experts=["a", "b"],
+            topic="测试主题",
+            use_default_template=True,
+            match_method="explicit_board",
+        )
+        assert result.matched is True
+        assert result.board_mode is True
+        assert result.specified_experts == ["a", "b"]
+        assert result.topic == "测试主题"
+        assert result.use_default_template is True
+        assert result.match_method == "explicit_board"
diff --git a/tests/unit/experts/test_board_team.py b/tests/unit/experts/test_board_team.py
new file mode 100644
index 0000000..1ca6a4a
--- /dev/null
+++ b/tests/unit/experts/test_board_team.py
@@ -0,0 +1,247 @@
+"""BoardTeam 单元测试 — 私董会容器状态管理"""
+
+from __future__ import annotations
+
+import pytest
+
+from agentkit.experts.board import BoardStatus, BoardTeam
+from agentkit.experts.config import ExpertConfig
+
+
+# ── 辅助函数 ──────────────────────────────────────────────
+
+
+def _make_expert_configs(count: int = 3) -> list[ExpertConfig]:
+    """创建测试用 ExpertConfig 列表"""
+    configs = []
+    for i in range(count):
+        configs.append(ExpertConfig(
+            name=f"expert_{i}",
+            agent_type="expert",
+            persona=f"测试专家 {i}",
+            thinking_style="analytical",
+            speaking_style="直接",
+            decision_framework="分析",
+            bound_skills=[],
+            is_lead=(i == 0),
+            task_mode="llm_generate",
+            prompt={"identity": f"Expert {i}"},
+            avatar="🎭",
+            color=f"#FF{i:02d}000",
+        ))
+    return configs
+
+
+# ── BoardStatus 枚举测试 ──────────────────────────────────
+
+
+class TestBoardStatus:
+    """BoardStatus 枚举测试"""
+
+    def test_status_values(self):
+        """状态值正确"""
+        assert BoardStatus.FORMING.value == "forming"
+        assert BoardStatus.DISCUSSING.value == "discussing"
+        assert BoardStatus.CONCLUDING.value == "concluding"
+        assert BoardStatus.COMPLETED.value == "completed"
+        assert BoardStatus.DISSOLVED.value == "dissolved"
+
+    def test_status_is_string_enum(self):
+        """BoardStatus 是 str enum"""
+        assert isinstance(BoardStatus.FORMING, str)
+        assert BoardStatus.FORMING == "forming"
+
+
+# ── BoardTeam 初始化测试 ──────────────────────────────────
+
+
+class TestBoardTeamInit:
+    """BoardTeam 初始化测试"""
+
+    def test_default_init(self):
+        """默认初始化"""
+        team = BoardTeam()
+        assert team.team_id  # UUID 自动生成
+        assert team.status == BoardStatus.FORMING
+        assert team.moderator is None
+        assert team.experts == []
+        assert team.active_experts == []
+        assert team.member_experts == []
+        assert team.topic == ""
+        assert team.current_round == 0
+        assert team.max_rounds == 5
+        assert team.history == []
+        assert team.team_channel.startswith("board:")
+
+    def test_custom_max_rounds(self):
+        """自定义最大轮次"""
+        team = BoardTeam(max_rounds=10)
+        assert team.max_rounds == 10
+
+    def test_custom_team_id(self):
+        """自定义 team_id"""
+        team = BoardTeam(team_id="custom-board-123")
+        assert team.team_id == "custom-board-123"
+        assert team.team_channel == "board:custom-board-123"
+
+
+# ── BoardTeam 讨论历史测试 ────────────────────────────────
+
+
+class TestBoardTeamHistory:
+    """BoardTeam 讨论历史管理测试"""
+
+    @pytest.mark.asyncio
+    async def test_add_to_history(self):
+        """添加历史记录"""
+        team = BoardTeam()
+        await team.add_to_history(round=1, expert_name="expert_0", content="测试发言", role="expert")
+
+        history = team.history
+        assert len(history) == 1
+        assert history[0]["round"] == 1
+        assert history[0]["expert_name"] == "expert_0"
+        assert history[0]["content"] == "测试发言"
+        assert history[0]["role"] == "expert"
+        assert "timestamp" in history[0]
+
+    @pytest.mark.asyncio
+    async def test_add_multiple_to_history(self):
+        """添加多条历史记录"""
+        team = BoardTeam()
+        await team.add_to_history(1, "expert_0", "发言1", "expert")
+        await team.add_to_history(1, "expert_1", "发言2", "expert")
+        await team.add_to_history(1, "expert_0", "小结", "moderator")
+
+        assert len(team.history) == 3
+
+    def test_get_history_text_empty(self):
+        """空历史返回空字符串"""
+        team = BoardTeam()
+        assert team.get_history_text() == ""
+
+    @pytest.mark.asyncio
+    async def test_get_history_text_formatted(self):
+        """历史文本格式化"""
+        team = BoardTeam()
+        await team.add_to_history(1, "elon_musk", "第一性原理", "expert")
+        await team.add_to_history(1, "moderator", "本轮小结", "moderator")
+
+        text = team.get_history_text()
+        assert "elon_musk" in text
+        assert "第一性原理" in text
+        assert "moderator" in text
+        assert "本轮小结" in text
+        assert "专家发言" in text
+        assert "主持人小结" in text
+
+    @pytest.mark.asyncio
+    async def test_get_history_text_up_to_round(self):
+        """按轮次过滤历史"""
+        team = BoardTeam()
+        await team.add_to_history(1, "expert_0", "第一轮", "expert")
+        await team.add_to_history(2, "expert_0", "第二轮", "expert")
+        await team.add_to_history(3, "expert_0", "第三轮", "expert")
+
+        text = team.get_history_text(up_to_round=2)
+        assert "第一轮" in text
+        assert "第二轮" in text
+        assert "第三轮" not in text
+
+
+# ── BoardTeam 用户干预测试 ────────────────────────────────
+
+
+class TestBoardTeamIntervention:
+    """BoardTeam 用户干预测试"""
+
+    @pytest.mark.asyncio
+    async def test_add_user_intervention(self):
+        """添加用户干预"""
+        team = BoardTeam()
+        await team.add_user_intervention("请讨论AI伦理问题")
+
+        # 干预应出现在历史中
+        history = team.history
+        assert len(history) == 1
+        assert history[0]["expert_name"] == "user"
+        assert history[0]["content"] == "请讨论AI伦理问题"
+        assert history[0]["role"] == "user"
+
+    @pytest.mark.asyncio
+    async def test_consume_user_interventions(self):
+        """消费用户干预（读取后清空）"""
+        team = BoardTeam()
+        await team.add_user_intervention("干预1")
+        await team.add_user_intervention("干预2")
+
+        interventions = team.consume_user_interventions()
+        assert len(interventions) == 2
+        assert "干预1" in interventions
+        assert "干预2" in interventions
+
+        # 再次消费应为空
+        assert team.consume_user_interventions() == []
+
+
+# ── BoardTeam 轮次管理测试 ────────────────────────────────
+
+
+class TestBoardTeamRound:
+    """BoardTeam 轮次管理测试"""
+
+    def test_increment_round(self):
+        """轮次递增"""
+        team = BoardTeam()
+        assert team.current_round == 0
+
+        r1 = team.increment_round()
+        assert r1 == 1
+        assert team.current_round == 1
+
+        r2 = team.increment_round()
+        assert r2 == 2
+        assert team.current_round == 2
+
+
+# ── BoardTeam 状态管理测试 ────────────────────────────────
+
+
+class TestBoardTeamStatus:
+    """BoardTeam 状态管理测试"""
+
+    def test_set_status(self):
+        """设置状态"""
+        team = BoardTeam()
+        assert team.status == BoardStatus.FORMING
+
+        team.set_status(BoardStatus.DISCUSSING)
+        assert team.status == BoardStatus.DISCUSSING
+
+        team.set_status(BoardStatus.COMPLETED)
+        assert team.status == BoardStatus.COMPLETED
+
+        team.set_status(BoardStatus.DISSOLVED)
+        assert team.status == BoardStatus.DISSOLVED
+
+
+# ── BoardTeam 属性测试 ────────────────────────────────────
+
+
+class TestBoardTeamProperties:
+    """BoardTeam 属性测试"""
+
+    def test_handoff_transport_exists(self):
+        """handoff_transport 存在"""
+        team = BoardTeam()
+        assert team.handoff_transport is not None
+
+    def test_workspace_exists(self):
+        """workspace 存在"""
+        team = BoardTeam()
+        assert team.workspace is not None
+
+    def test_get_expert_not_found(self):
+        """获取不存在的专家返回 None"""
+        team = BoardTeam()
+        assert team.get_expert("nonexistent") is None