feat(phase1): implement core kernel and experience foundation (U1-U5)

- U1: GoalPlanner - structured goal decomposition wrapping _decompose_task() - U2: PlanExecutor - parallel execution with retry/skip/replace strategies - U3: PlanChecker - quality gate + review + experience writing - U4: Skill spec upgrade - dependencies, capabilities, version management - U5: ExperienceStore - PostgreSQL+pgvector task experience storage 208 new tests passing, fully backward compatible.
2026-06-09 23:57:03 +08:00 · 2026-06-09 23:57:03 +08:00 · fd4a811929
parent e4d6efb4bf
commit fd4a811929
17 changed files with 7024 additions and 19 deletions
--- a/src/agentkit/core/goal_planner.py
+++ b/src/agentkit/core/goal_planner.py
@ -0,0 +1,594 @@
 """GoalPlanner — 目标分析与计划生成
 用户给定自然语言目标后，自动生成结构化执行计划，包含任务拆解、
 依赖关系、并行度识别。作为 Orchestrator._decompose_task() 的前置增强层。
 执行流程：
 1. 通过结构化目标分解（规则/模板）生成初始方案
 2. 如果初始方案有效则跳过 LLM 调用
 3. 否则将初始方案作为上下文注入 LLM prompt，LLM 细化调整
 4. 识别能力缺口，请求人工介入
 5. 通过 AskHumanTool 请求确认/修改
 """
 from __future__ import annotations
 import logging
 import re
 import uuid
 from typing import Any
 from agentkit.core.plan_schema import (
    ExecutionPlan,
    PlanStep,
    PlanStepStatus,
    SkillGap,
    SkillGapLevel,
 )
 logger = logging.getLogger(__name__)
 class GoalPlanner:
    """目标分析与计划生成器
    将自然语言目标分解为结构化执行计划，包含任务拆解、
    依赖关系和并行度识别。
    使用方式：
        planner = GoalPlanner()
        plan = await planner.generate_plan(
            goal="调研 3 个竞品 SEO 策略并生成对比报告",
            context={},
            available_skills=["web_search", "seo_analyzer", "report_generator"],
        )
    """
    def __init__(self, llm_gateway: Any = None, max_parallel: int = 5):
        """
        Args:
            llm_gateway: LLM Gateway，用于细化计划（可选）
            max_parallel: 最大并行步骤数
        """
        self._llm_gateway = llm_gateway
        self._max_parallel = max_parallel
    async def generate_plan(
        self,
        goal: str,
        context: dict[str, Any] | None = None,
        available_skills: list[str] | None = None,
    ) -> ExecutionPlan:
        """生成结构化执行计划
        Args:
            goal: 自然语言目标
            context: 上下文信息（如已有数据、约束条件等）
            available_skills: 可用 Skill 列表
        Returns:
            ExecutionPlan: 结构化执行计划
        """
        context = context or {}
        available_skills = available_skills or []
        # 1. 通过规则/模板生成初始方案
        plan = self._rule_based_decompose(goal, context, available_skills)
        # 2. 识别能力缺口
        plan.skill_gaps = self._identify_skill_gaps(plan, available_skills)
        # 3. 如果有 LLM Gateway 且初始方案不够精确，让 LLM 细化
        if self._llm_gateway and self._should_refine_with_llm(plan):
            plan = await self._llm_refine_plan(goal, plan, context, available_skills)
            # 细化后重新识别能力缺口
            plan.skill_gaps = self._identify_skill_gaps(plan, available_skills)
        # 4. 构建并行组
        plan.parallel_groups = self._build_parallel_groups(plan.steps)
        return plan
    def _rule_based_decompose(
        self,
        goal: str,
        context: dict[str, Any],
        available_skills: list[str],
    ) -> ExecutionPlan:
        """基于规则/模板的目标分解
        使用启发式规则识别目标中的并列结构和顺序依赖，
        生成初始执行计划。
        """
        steps: list[PlanStep] = []
        # 识别并列结构：如"3 个竞品"、"3个方案"、"A、B、C"
        parallel_items = self._extract_parallel_items(goal)
        if parallel_items and len(parallel_items) > 1:
            # 有并列结构：每个并列项生成一个并行步骤 + 汇总步骤
            steps = self._decompose_parallel_goal(goal, parallel_items, available_skills)
        else:
            # 无明显并列结构：尝试识别顺序步骤
            sequential_parts = self._extract_sequential_parts(goal)
            if len(sequential_parts) > 1:
                steps = self._decompose_sequential_goal(goal, sequential_parts, available_skills)
            else:
                # 单步任务
                steps = self._decompose_simple_goal(goal, available_skills)
        return ExecutionPlan(
            goal=goal,
            steps=steps,
        )
    def _extract_parallel_items(self, goal: str) -> list[str]:
        """从目标中提取并列项
        识别模式：
        - "N 个 X"：如"3 个竞品"、"5 个方案"
        - "A、B、C"：顿号分隔的并列项
        - "A, B, C"：逗号分隔的并列项
        """
        items: list[str] = []
        # 模式1："N 个 X" — 识别数量+类别
        count_match = re.search(r"(\d+)\s*个\s*(.+?)(?:的|和|并|以及|，|,|$)", goal)
        if count_match:
            count = int(count_match.group(1))
            category = count_match.group(2).strip()
            # 生成 N 个并列项
            for i in range(1, count + 1):
                items.append(f"{category} {i}")
            return items
        # 模式2：顿号分隔 — "竞品A、竞品B、竞品C"
        if "、" in goal:
            # 提取顿号分隔的片段
            parts = re.split(r"[、]", goal)
            # 过滤掉太短的片段（可能是标点噪声）
            meaningful = [p.strip() for p in parts if len(p.strip()) > 1]
            if len(meaningful) >= 2:
                items = meaningful
                return items
        # 模式3：英文逗号分隔 — "A, B, C"
        if "," in goal:
            parts = goal.split(",")
            meaningful = [p.strip() for p in parts if len(p.strip()) > 1]
            if len(meaningful) >= 2:
                items = meaningful
                return items
        return items
    def _extract_sequential_parts(self, goal: str) -> list[str]:
        """从目标中提取顺序步骤
        识别模式：
        - "并"：如"调研并生成报告"
        - "然后"/"接着"/"再"：顺序连接词
        - "→"/"->"：箭头分隔
        """
        parts: list[str] = []
        # 模式1：箭头分隔
        if "→" in goal or "->" in goal:
            separator = "→" if "→" in goal else "->"
            parts = [p.strip() for p in goal.split(separator) if p.strip()]
            return parts
        # 模式2：顺序连接词
        sequential_patterns = [
            r"(.+?)然后(.+)",
            r"(.+?)接着(.+)",
            r"(.+?)之后再(.+)",
        ]
        for pattern in sequential_patterns:
            match = re.search(pattern, goal)
            if match:
                parts = [g.strip() for g in match.groups() if g.strip()]
                return parts
        # 模式3："并" 连接 — 如"调研并生成报告"
        if "并" in goal:
            match = re.search(r"(.+?)并(.+)", goal)
            if match:
                parts = [g.strip() for g in match.groups() if g.strip()]
                return parts
        return parts
    def _decompose_parallel_goal(
        self,
        goal: str,
        parallel_items: list[str],
        available_skills: list[str],
    ) -> list[PlanStep]:
        """分解包含并列结构的目标
        生成 N 个并行步骤 + 1 个汇总步骤。
        """
        steps: list[PlanStep] = []
        parallel_step_ids: list[str] = []
        # 为每个并列项生成一个并行步骤
        for i, item in enumerate(parallel_items):
            step_id = f"step-{i}"
            required_skills = self._infer_required_skills(item, available_skills)
            steps.append(PlanStep(
                step_id=step_id,
                name=f"处理: {item}",
                description=f"对「{item}」执行相关操作",
                dependencies=[],
                parallel_group=0,
                required_skills=required_skills,
            ))
            parallel_step_ids.append(step_id)
        # 汇总步骤：依赖所有并行步骤
        summary_skills = self._infer_required_skills("汇总 生成 报告", available_skills)
        steps.append(PlanStep(
            step_id=f"step-{len(parallel_items)}",
            name="汇总结果",
            description="汇总所有并行步骤的结果，生成最终输出",
            dependencies=parallel_step_ids,
            parallel_group=1,
            required_skills=summary_skills,
        ))
        return steps
    def _decompose_sequential_goal(
        self,
        goal: str,
        sequential_parts: list[str],
        available_skills: list[str],
    ) -> list[PlanStep]:
        """分解包含顺序步骤的目标"""
        steps: list[PlanStep] = []
        for i, part in enumerate(sequential_parts):
            step_id = f"step-{i}"
            dependencies = [f"step-{i - 1}"] if i > 0 else []
            required_skills = self._infer_required_skills(part, available_skills)
            steps.append(PlanStep(
                step_id=step_id,
                name=part[:50],  # 截取前 50 字符作为名称
                description=part,
                dependencies=dependencies,
                parallel_group=i,
                required_skills=required_skills,
            ))
        return steps
    def _decompose_simple_goal(
        self,
        goal: str,
        available_skills: list[str],
    ) -> list[PlanStep]:
        """分解简单目标为单步计划"""
        required_skills = self._infer_required_skills(goal, available_skills)
        return [
            PlanStep(
                step_id="step-0",
                name=goal[:50],
                description=goal,
                dependencies=[],
                parallel_group=0,
                required_skills=required_skills,
            )
        ]
    def _infer_required_skills(self, text: str, available_skills: list[str]) -> list[str]:
        """根据文本推断所需的 Skill
        基于关键词匹配，将文本中的意图映射到可用 Skill。
        """
        skill_keywords: dict[str, list[str]] = {
            "web_search": ["搜索", "查询", "查找", "调研", "search", "find", "lookup"],
            "seo_analyzer": ["seo", "搜索引擎优化", "关键词", "排名"],
            "report_generator": ["报告", "汇总", "总结", "生成", "对比", "report", "summary"],
            "data_analyzer": ["分析", "统计", "数据", "analyze", "data"],
            "document_writer": ["写", "撰写", "文档", "write", "document"],
            "code_generator": ["代码", "编程", "开发", "code", "develop"],
        }
        text_lower = text.lower()
        matched: list[str] = []
        for skill, keywords in skill_keywords.items():
            if skill not in available_skills:
                continue
            if any(kw in text_lower for kw in keywords):
                matched.append(skill)
        return matched
    def _identify_skill_gaps(
        self, plan: ExecutionPlan, available_skills: list[str]
    ) -> list[SkillGap]:
        """识别能力缺口
        检查每个步骤所需的 Skill 是否可用，标注缺口。
        """
        gaps: list[SkillGap] = []
        available_set = set(available_skills)
        for step in plan.steps:
            for skill in step.required_skills:
                if skill not in available_set:
                    gaps.append(SkillGap(
                        step_name=step.name,
                        required_skill=skill,
                        level=SkillGapLevel.HIGH,
                        suggestion=f"请安装或注册 '{skill}' Skill，或手动完成该步骤",
                    ))
            # 如果步骤没有匹配到任何 Skill，标注缺口
            if not step.required_skills:
                if not available_skills:
                    # 无可用 Skill 时标注为 HIGH
                    gaps.append(SkillGap(
                        step_name=step.name,
                        required_skill="(无可用 Skill)",
                        level=SkillGapLevel.HIGH,
                        suggestion="当前无可用 Skill，请注册所需 Skill 或手动完成该步骤",
                    ))
                else:
                    # 有 Skill 但未匹配到时标注为 MEDIUM
                    gaps.append(SkillGap(
                        step_name=step.name,
                        required_skill="(未匹配)",
                        level=SkillGapLevel.MEDIUM,
                        suggestion=f"无法自动匹配 Skill，可用 Skill: {', '.join(available_skills[:5])}",
                    ))
        return gaps
    def _should_refine_with_llm(self, plan: ExecutionPlan) -> bool:
        """判断是否需要 LLM 细化
        当初始方案步骤描述过于简单、能力缺口较多、或所有步骤
        都没有匹配到 Skill 时，需要 LLM 细化。
        """
        # 如果所有步骤都没有匹配到任何 Skill，让 LLM 重新评估
        if plan.steps and all(not s.required_skills for s in plan.steps):
            return True
        # 如果有较多能力缺口，让 LLM 重新评估
        if len(plan.skill_gaps) > len(plan.steps):
            return True
        return False
    async def _llm_refine_plan(
        self,
        goal: str,
        initial_plan: ExecutionPlan,
        context: dict[str, Any],
        available_skills: list[str],
    ) -> ExecutionPlan:
        """使用 LLM 细化执行计划
        将初始方案作为上下文注入 LLM prompt，让 LLM 细化调整。
        """
        import json
        # 构建初始方案摘要
        initial_summary = json.dumps(
            [s.to_dict() for s in initial_plan.steps],
            ensure_ascii=False,
            indent=2,
        )
        skills_str = ", ".join(available_skills) if available_skills else "无"
        prompt = (
            f"Refine the following execution plan for the given goal.\n\n"
            f"Goal: {goal}\n\n"
            f"Initial Plan (generated by rules):\n{initial_summary}\n\n"
            f"Available Skills: {skills_str}\n\n"
            f"Context: {json.dumps(context, ensure_ascii=False) if context else 'None'}\n\n"
            'Respond ONLY with a JSON array of steps: '
            '[{"name": "...", "description": "...", "dependencies": [], '
            '"required_skills": [...]}]\n'
            "The dependencies field lists step indices (0-based) that must complete first.\n"
            "Each step should have a clear, specific description (at least 20 characters).\n"
            "Do not include any other text."
        )
        try:
            response = await self._llm_gateway.chat(
                messages=[{"role": "user", "content": prompt}],
                model="default",
            )
            step_defs = json.loads(response.content)
            if not isinstance(step_defs, list) or not step_defs:
                return initial_plan
            steps: list[PlanStep] = []
            for i, defn in enumerate(step_defs):
                depends_on = [f"step-{j}" for j in defn.get("dependencies", [])]
                steps.append(PlanStep(
                    step_id=f"step-{i}",
                    name=defn.get("name", f"Step {i}"),
                    description=defn.get("description", ""),
                    dependencies=depends_on,
                    parallel_group=0,  # 后续由 _build_parallel_groups 重新计算
                    required_skills=defn.get("required_skills", []),
                ))
            return ExecutionPlan(
                goal=goal,
                steps=steps,
                metadata={"refined_by_llm": True},
            )
        except Exception as e:
            logger.warning(f"LLM plan refinement failed, using initial plan: {e}")
            return initial_plan
    def _build_parallel_groups(self, steps: list[PlanStep]) -> list[list[str]]:
        """构建并行执行组
        基于依赖关系拓扑排序，无依赖的步骤分到同一组并行执行。
        复用 Orchestrator._build_parallel_groups() 的拓扑排序逻辑。
        """
        step_map = {s.step_id: s for s in steps}
        completed: set[str] = set()
        groups: list[list[str]] = []
        remaining = set(s.step_id for s in steps)
        while remaining:
            # 找到所有依赖已满足的步骤
            ready = []
            for sid in remaining:
                step = step_map[sid]
                if all(dep in completed for dep in step.dependencies):
                    ready.append(sid)
            if not ready:
                # 循环依赖 — 将剩余步骤放入一组
                groups.append(list(remaining))
                break
            # 限制组大小
            group = ready[: self._max_parallel]
            groups.append(group)
            for sid in group:
                completed.add(sid)
                remaining.discard(sid)
        # 更新步骤的 parallel_group 字段
        for group_idx, group in enumerate(groups):
            for sid in group:
                step = step_map.get(sid)
                if step:
                    step.parallel_group = group_idx
        return groups
    def update_plan_from_feedback(
        self,
        plan: ExecutionPlan,
        modifications: dict[str, Any],
    ) -> ExecutionPlan:
        """根据用户反馈更新计划
        Args:
            plan: 原始执行计划
            modifications: 修改内容，可包含：
                - add_steps: 新增步骤列表
                - remove_steps: 要移除的步骤 ID 列表
                - update_steps: 要更新的步骤 {step_id: {field: value}}
                - reorder: 是否重新排序
        Returns:
            更新后的 ExecutionPlan
        """
        steps = list(plan.steps)
        # 移除步骤
        remove_ids = set(modifications.get("remove_steps", []))
        if remove_ids:
            steps = [s for s in steps if s.step_id not in remove_ids]
            # 清理依赖引用
            for step in steps:
                step.dependencies = [d for d in step.dependencies if d not in remove_ids]
        # 更新步骤
        update_map: dict[str, dict] = modifications.get("update_steps", {})
        for step in steps:
            if step.step_id in update_map:
                updates = update_map[step.step_id]
                for field_name, value in updates.items():
                    if hasattr(step, field_name):
                        setattr(step, field_name, value)
        # 新增步骤
        add_steps = modifications.get("add_steps", [])
        for new_step_def in add_steps:
            step_id = new_step_def.get("step_id", f"step-{len(steps)}")
            # 确保唯一性
            existing_ids = {s.step_id for s in steps}
            while step_id in existing_ids:
                step_id = f"step-{uuid.uuid4().hex[:4]}"
            steps.append(PlanStep(
                step_id=step_id,
                name=new_step_def.get("name", "New Step"),
                description=new_step_def.get("description", ""),
                dependencies=new_step_def.get("dependencies", []),
                required_skills=new_step_def.get("required_skills", []),
            ))
        # 重新构建并行组
        parallel_groups = self._build_parallel_groups(steps)
        return ExecutionPlan(
            plan_id=plan.plan_id,
            goal=plan.goal,
            steps=steps,
            parallel_groups=parallel_groups,
            skill_gaps=plan.skill_gaps,  # 保留原有缺口信息
            confirmed=False,  # 修改后需要重新确认
            metadata=plan.metadata,
        )
    def validate_plan(self, plan: ExecutionPlan) -> list[str]:
        """验证执行计划的合法性
        Returns:
            错误信息列表，空列表表示验证通过
        """
        errors: list[str] = []
        step_ids = {s.step_id for s in plan.steps}
        # 检查依赖引用是否存在
        for step in plan.steps:
            for dep in step.dependencies:
                if dep not in step_ids:
                    errors.append(f"步骤 '{step.step_id}' 依赖不存在的步骤 '{dep}'")
        # 检查循环依赖
        visited: set[str] = set()
        in_stack: set[str] = set()
        def has_cycle(sid: str) -> bool:
            if sid in in_stack:
                return True
            if sid in visited:
                return False
            visited.add(sid)
            in_stack.add(sid)
            step = plan.get_step(sid)
            if step:
                for dep in step.dependencies:
                    if has_cycle(dep):
                        return True
            in_stack.discard(sid)
            return False
        for step in plan.steps:
            if has_cycle(step.step_id):
                errors.append(f"检测到循环依赖，涉及步骤 '{step.step_id}'")
                break
        # 检查并行组与步骤的一致性
        grouped_ids: set[str] = set()
        for group in plan.parallel_groups:
            for sid in group:
                if sid not in step_ids:
                    errors.append(f"并行组包含不存在的步骤 '{sid}'")
                if sid in grouped_ids:
                    errors.append(f"步骤 '{sid}' 出现在多个并行组中")
                grouped_ids.add(sid)
        ungrouped = step_ids - grouped_ids
        if ungrouped:
            errors.append(f"步骤未分配到并行组: {', '.join(ungrouped)}")
        return errors
--- a/src/agentkit/core/orchestrator.py
+++ b/src/agentkit/core/orchestrator.py
@ -10,11 +10,16 @@ import logging
 import uuid
 from dataclasses import dataclass, field
 from enum import Enum
-from typing import Any
+from typing import TYPE_CHECKING, Any
 from agentkit.core.protocol import TaskMessage, TaskResult, TaskStatus
 from agentkit.core.shared_workspace import SharedWorkspace
 if TYPE_CHECKING:
    from agentkit.core.goal_planner import GoalPlanner
    from agentkit.core.plan_executor import PlanExecutor
    from agentkit.core.plan_checker import PlanChecker
 logger = logging.getLogger(__name__)
@ -95,6 +100,9 @@ class Orchestrator:
        llm_gateway: Any = None,
        max_parallel: int = 5,
        subtask_timeout: float = 300.0,
        goal_planner: GoalPlanner | None = None,
        plan_executor: PlanExecutor | None = None,
        plan_checker: PlanChecker | None = None,
    ):
        """
        Args:
@ -103,12 +111,18 @@ class Orchestrator:
            llm_gateway: LLM Gateway，用于任务分解
            max_parallel: 最大并行子任务数
            subtask_timeout: 子任务超时时间（秒）
            goal_planner: GoalPlanner 实例，用于结构化目标分解（可选）
            plan_executor: PlanExecutor 实例，用于执行 ExecutionPlan（可选）
            plan_checker: PlanChecker 实例，用于检查和复盘（可选）
        """
        self._agent_pool = agent_pool
        self._workspace = workspace or SharedWorkspace()
        self._llm_gateway = llm_gateway
        self._max_parallel = max_parallel
        self._subtask_timeout = subtask_timeout
        self._goal_planner = goal_planner
        self._plan_executor = plan_executor
        self._plan_checker = plan_checker
    async def execute(self, task: TaskMessage) -> OrchestrationResult:
        """执行编排任务
@ -175,6 +189,28 @@ class Orchestrator:
        """将复杂任务分解为子任务"""
        plan_id = str(uuid.uuid4())[:8]
        # If GoalPlanner available, use it for structured decomposition
        if self._goal_planner:
            try:
                execution_plan = await self._goal_planner.generate_plan(
                    goal=str(task.input_data),
                    context={"task_type": task.task_type, "agent_name": task.agent_name},
                    available_skills=self._get_available_skill_names(),
                )
                subtasks = self._convert_execution_plan_to_subtasks(
                    execution_plan, task.task_id, task.agent_name, task.task_type, task.input_data,
                )
                if subtasks:
                    parallel_groups = self._build_parallel_groups(subtasks)
                    return OrchestrationPlan(
                        plan_id=plan_id,
                        parent_task_id=task.task_id,
                        subtasks=subtasks,
                        parallel_groups=parallel_groups,
                    )
            except Exception as e:
                logger.warning(f"GoalPlanner decomposition failed, falling back: {e}")
        # If LLM gateway available, use it for decomposition
        if self._llm_gateway:
            try:
@ -404,3 +440,60 @@ class Orchestrator:
            aggregated["partial_success"] = True
        return aggregated
    def _get_available_skill_names(self) -> list[str]:
        """获取可用 Skill 名称列表"""
        try:
            agents_info = self._agent_pool.list_agents()
            return [a["name"] for a in agents_info]
        except Exception:
            return []
    def _convert_execution_plan_to_subtasks(
        self,
        execution_plan: Any,
        parent_task_id: str,
        default_agent: str,
        default_task_type: str,
        original_input: dict[str, Any],
    ) -> list[SubTask]:
        """将 ExecutionPlan 的 PlanStep 转换为 SubTask 列表"""
        subtasks: list[SubTask] = []
        for step in execution_plan.steps:
            # 尝试根据 required_skills 匹配 agent
            assigned_agent = default_agent
            if step.required_skills:
                matched_agent = self._match_agent_for_skills(step.required_skills)
                if matched_agent:
                    assigned_agent = matched_agent
            subtasks.append(SubTask(
                task_id=step.step_id,
                parent_task_id=parent_task_id,
                assigned_agent=assigned_agent,
                task_type=default_task_type,
                input_data={
                    **original_input,
                    "step_name": step.name,
                    "step_description": step.description,
                },
                depends_on=list(step.dependencies),
            ))
        return subtasks
    def _match_agent_for_skills(self, required_skills: list[str]) -> str | None:
        """根据所需 Skill 匹配 Agent"""
        try:
            agents_info = self._agent_pool.list_agents()
            for skill in required_skills:
                for agent in agents_info:
                    name = agent.get("name", "")
                    agent_type = agent.get("agent_type", "")
                    description = agent.get("description", "").lower()
                    if skill.lower() in name.lower() or skill.lower() in agent_type.lower() or skill.lower() in description:
                        return name
        except Exception:
            pass
        return None
--- a/src/agentkit/core/plan_checker.py
+++ b/src/agentkit/core/plan_checker.py
@ -0,0 +1,739 @@
 """PlanChecker — 计划检查与复盘
 每步执行后检查产出质量，全部完成后复盘总结并写入经验库。
 核心能力：
 1. QualityGate: 每步完成后验证产出（required_fields / min_word_count / 自定义校验）
 2. LLMReflector: 使用 LLM 评估步骤质量（可选，回退到规则评估）
 3. ReviewReport: 全部完成后生成复盘报告（成功路径、失败原因、耗时分布、优化建议）
 4. ExperienceStore: 复盘结果写入经验库（可选依赖）
 使用方式：
    checker = PlanChecker()
    result = await checker.check_step(step, exec_result)
    report = await checker.review_plan(plan, plan_result)
 """
 from __future__ import annotations
 import logging
 import time
 from dataclasses import dataclass, field
 from enum import Enum
 from typing import Any, Callable, Awaitable
 from agentkit.core.plan_schema import ExecutionPlan, PlanStep, PlanStepStatus
 from agentkit.core.plan_executor import PlanExecutionResult, StepExecutionResult
 from agentkit.skills.base import QualityGateConfig
 logger = logging.getLogger(__name__)
 class CheckStatus(str, Enum):
    """检查结果状态"""
    PASS = "pass"
    FAIL = "fail"
    SKIP = "skip"
@dataclass
 class CheckResult:
    """单步检查结果
    Attributes:
        step_id: 步骤 ID
        status: 检查状态（pass/fail/skip）
        reason: 检查原因说明
        quality_score: 质量评分（0.0 ~ 1.0）
        details: 详细检查项
    """
    step_id: str
    status: CheckStatus
    reason: str = ""
    quality_score: float = 0.0
    details: dict[str, Any] = field(default_factory=dict)
@dataclass
 class ReviewReport:
    """复盘报告
    全部步骤完成后生成，包含成功路径、失败原因、耗时分布和优化建议。
    Attributes:
        plan_id: 计划 ID
        outcome: 整体结果（"success" / "partial" / "failure"）
        success_path: 成功步骤路径（按执行顺序）
        failure_reasons: 失败原因列表
        duration_distribution: 各步骤耗时分布
        optimization_tips: 优化建议
        quality_scores: 各步骤质量评分
        total_duration_ms: 总耗时
        success_rate: 成功率
    """
    plan_id: str
    outcome: str = "success"
    success_path: list[str] = field(default_factory=list)
    failure_reasons: list[str] = field(default_factory=list)
    duration_distribution: dict[str, float] = field(default_factory=dict)
    optimization_tips: list[str] = field(default_factory=list)
    quality_scores: dict[str, float] = field(default_factory=dict)
    total_duration_ms: float = 0.0
    success_rate: float = 1.0
    def to_dict(self) -> dict[str, Any]:
        """序列化为字典"""
        return {
            "plan_id": self.plan_id,
            "outcome": self.outcome,
            "success_path": self.success_path,
            "failure_reasons": self.failure_reasons,
            "duration_distribution": self.duration_distribution,
            "optimization_tips": self.optimization_tips,
            "quality_scores": self.quality_scores,
            "total_duration_ms": self.total_duration_ms,
            "success_rate": self.success_rate,
        }
 # 自定义校验器类型：接收步骤结果，返回 (通过, 原因)
 CustomValidator = Callable[[dict[str, Any] | None], tuple[bool, str]]
 class QualityGate:
    """质量门控
    基于 QualityGateConfig 验证步骤产出：
    1. required_fields: 结果字典必须包含指定字段
    2. min_word_count: 结果文本字段最少字数
    3. custom_validator: 自定义校验函数
    """
    def __init__(
        self,
        config: QualityGateConfig | None = None,
        custom_validator: CustomValidator | None = None,
    ):
        self._config = config or QualityGateConfig()
        self._custom_validator = custom_validator
    def check(self, step: PlanStep, exec_result: StepExecutionResult) -> CheckResult:
        """检查步骤产出质量
        Args:
            step: 计划步骤
            exec_result: 步骤执行结果
        Returns:
            CheckResult: 检查结果
        """
        # 跳过非完成步骤
        if exec_result.status != PlanStepStatus.COMPLETED:
            return CheckResult(
                step_id=step.step_id,
                status=CheckStatus.SKIP,
                reason=f"Step status is {exec_result.status.value}, skipping quality check",
            )
        result = exec_result.result
        details: dict[str, Any] = {}
        failures: list[str] = []
        # 1. 检查 required_fields
        missing_fields = self._check_required_fields(result)
        if missing_fields:
            failures.append(f"Missing required fields: {', '.join(missing_fields)}")
            details["missing_fields"] = missing_fields
        # 2. 检查 min_word_count
        word_count_result = self._check_min_word_count(result)
        if word_count_result:
            failures.append(word_count_result)
            details["word_count_check"] = word_count_result
        # 3. 自定义校验
        custom_result = self._check_custom(result)
        if custom_result:
            failures.append(custom_result)
            details["custom_check"] = custom_result
        if failures:
            return CheckResult(
                step_id=step.step_id,
                status=CheckStatus.FAIL,
                reason="; ".join(failures),
                quality_score=self._compute_quality_score(len(failures)),
                details=details,
            )
        return CheckResult(
            step_id=step.step_id,
            status=CheckStatus.PASS,
            reason="All quality checks passed",
            quality_score=1.0,
            details=details,
        )
    def _check_required_fields(self, result: dict[str, Any] | None) -> list[str]:
        """检查必填字段"""
        if not self._config.required_fields:
            return []
        if result is None:
            return list(self._config.required_fields)
        return [f for f in self._config.required_fields if f not in result]
    def _check_min_word_count(self, result: dict[str, Any] | None) -> str:
        """检查最少字数"""
        if self._config.min_word_count <= 0:
            return ""
        if result is None:
            return f"Result is None, cannot check min_word_count ({self._config.min_word_count})"
        total_words = 0
        for value in result.values():
            if isinstance(value, str):
                total_words += len(value.split())
        if total_words < self._config.min_word_count:
            return (
                f"Word count ({total_words}) is below minimum "
                f"({self._config.min_word_count})"
            )
        return ""
    def _check_custom(self, result: dict[str, Any] | None) -> str:
        """执行自定义校验"""
        if self._custom_validator is None:
            return ""
        try:
            passed, reason = self._custom_validator(result)
            if not passed:
                return reason or "Custom validation failed"
        except Exception as e:
            return f"Custom validator error: {e}"
        return ""
    @staticmethod
    def _compute_quality_score(failure_count: int) -> float:
        """根据失败项数计算质量评分"""
        if failure_count == 0:
            return 1.0
        if failure_count == 1:
            return 0.5
        if failure_count == 2:
            return 0.25
        return 0.1
 class RuleBasedStepReflector:
    """基于规则的步骤反思器
    评估步骤执行质量，生成质量评分和改进建议。
    当 LLM 不可用时的回退方案。
    """
    async def reflect_step(
        self,
        step: PlanStep,
        exec_result: StepExecutionResult,
    ) -> tuple[float, list[str]]:
        """对步骤执行结果进行反思
        Args:
            step: 计划步骤
            exec_result: 步骤执行结果
        Returns:
            (quality_score, suggestions): 质量评分和改进建议
        """
        suggestions: list[str] = []
        if exec_result.status != PlanStepStatus.COMPLETED:
            # 失败步骤
            score = 0.0
            if exec_result.error:
                if "timed out" in exec_result.error.lower():
                    suggestions.append(
                        f"Step '{step.name}' timed out: consider increasing timeout or decomposing the task"
                    )
                elif "no agent available" in exec_result.error.lower():
                    suggestions.append(
                        f"Step '{step.name}' had no available agent: check skill registry"
                    )
                else:
                    suggestions.append(
                        f"Step '{step.name}' failed: {exec_result.error}"
                    )
            return score, suggestions
        # 成功步骤
        score = 0.6  # 基础分
        # 有输出数据加分
        if exec_result.result and len(exec_result.result) > 0:
            score += 0.2
        # 无重试加分
        if exec_result.retry_count == 0:
            score += 0.1
        # 耗时合理加分
        if exec_result.duration_ms > 0 and exec_result.duration_ms < 30000:
            score += 0.1
        score = min(score, 1.0)
        # 生成建议
        if exec_result.retry_count > 0:
            suggestions.append(
                f"Step '{step.name}' required {exec_result.retry_count} retries: "
                f"consider improving step reliability"
            )
        if exec_result.duration_ms > 60000:
            suggestions.append(
                f"Step '{step.name}' took {exec_result.duration_ms / 1000:.1f}s: "
                f"consider optimizing for performance"
            )
        return score, suggestions
 class PlanChecker:
    """计划检查器
    每步执行后检查产出质量，全部完成后复盘总结并写入经验库。
    检查环节：每步完成后，QualityGate 验证产出 + Reflector 评估是否达标
    复盘环节：全部完成后，生成复盘报告（成功路径、失败原因、耗时分布）
    经验写入：复盘结果写入 ExperienceStore（可选）
    闭环：检查不通过 → 触发重试或计划调整
    使用方式：
        # 独立使用
        checker = PlanChecker()
        result = await checker.check_step(step, exec_result)
        report = await checker.review_plan(plan, plan_result)
        # 与 PlanExecutor 集成
        checker = PlanChecker(experience_store=store)
        executor = PlanExecutor(
            agent_pool=pool,
            on_step_complete=checker.make_step_complete_callback(),
        )
    """
    def __init__(
        self,
        quality_gate: QualityGate | None = None,
        quality_gate_config: QualityGateConfig | None = None,
        custom_validator: CustomValidator | None = None,
        reflector: Any | None = None,
        experience_store: Any | None = None,
        max_check_retries: int = 1,
        quality_threshold: float = 0.5,
        step_quality_configs: dict[str, QualityGateConfig] | None = None,
    ):
        """初始化 PlanChecker
        Args:
            quality_gate: 质量门控实例（优先使用）
            quality_gate_config: 质量门控配置（quality_gate 为 None 时使用）
            custom_validator: 自定义校验函数
            reflector: 步骤反思器（None 时使用 RuleBasedStepReflector）
            experience_store: 经验存储（None 时不写入经验库）
            max_check_retries: 检查不通过时最大重试次数
            quality_threshold: 质量评分阈值，低于此值视为不通过
            step_quality_configs: 每步骤独立的质量门控配置
        """
        if quality_gate is not None:
            self._quality_gate = quality_gate
        else:
            self._quality_gate = QualityGate(
                config=quality_gate_config,
                custom_validator=custom_validator,
            )
        self._reflector = reflector or RuleBasedStepReflector()
        self._experience_store = experience_store
        self._max_check_retries = max_check_retries
        self._quality_threshold = quality_threshold
        self._step_quality_configs = step_quality_configs or {}
        # 内部状态：记录每步检查结果
        self._check_results: dict[str, CheckResult] = {}
        self._step_quality_gates: dict[str, QualityGate] = {}
        # 为有独立配置的步骤创建 QualityGate
        for step_id, config in self._step_quality_configs.items():
            self._step_quality_gates[step_id] = QualityGate(config=config)
    async def check_step(
        self,
        step: PlanStep,
        exec_result: StepExecutionResult,
    ) -> CheckResult:
        """检查单个步骤的产出质量
        在每步完成后调用，验证产出是否达标。
        Args:
            step: 计划步骤
            exec_result: 步骤执行结果
        Returns:
            CheckResult: 检查结果
        """
        # 选择步骤专属或默认 QualityGate
        gate = self._step_quality_gates.get(step.step_id, self._quality_gate)
        # 1. QualityGate 规则检查
        gate_result = gate.check(step, exec_result)
        # 2. Reflector 评估（仅对已完成步骤）
        if exec_result.status == PlanStepStatus.COMPLETED:
            try:
                reflect_score, suggestions = await self._reflector.reflect_step(
                    step, exec_result
                )
            except Exception as e:
                logger.warning(f"Reflector failed for step '{step.step_id}': {e}")
                reflect_score = gate_result.quality_score
                suggestions = []
            # 综合评分：取 QualityGate 和 Reflector 的加权平均
            combined_score = 0.4 * gate_result.quality_score + 0.6 * reflect_score
            # 如果 Reflector 评分低于阈值，标记为不通过
            if combined_score < self._quality_threshold and gate_result.status == CheckStatus.PASS:
                gate_result = CheckResult(
                    step_id=step.step_id,
                    status=CheckStatus.FAIL,
                    reason=f"Quality score ({combined_score:.2f}) below threshold ({self._quality_threshold})",
                    quality_score=combined_score,
                    details={
                        **gate_result.details,
                        "reflector_score": reflect_score,
                        "reflector_suggestions": suggestions,
                    },
                )
            elif gate_result.status != CheckStatus.PASS:
                # 已有不通过结果，更新评分
                gate_result = CheckResult(
                    step_id=step.step_id,
                    status=gate_result.status,
                    reason=gate_result.reason,
                    quality_score=combined_score,
                    details={
                        **gate_result.details,
                        "reflector_score": reflect_score,
                        "reflector_suggestions": suggestions,
                    },
                )
            else:
                # 通过，更新评分
                gate_result = CheckResult(
                    step_id=step.step_id,
                    status=gate_result.status,
                    reason=gate_result.reason,
                    quality_score=combined_score,
                    details={
                        **gate_result.details,
                        "reflector_score": reflect_score,
                        "reflector_suggestions": suggestions,
                    },
                )
        # 记录检查结果
        self._check_results[step.step_id] = gate_result
        logger.info(
            f"Check step '{step.step_id}': status={gate_result.status.value}, "
            f"score={gate_result.quality_score:.2f}, reason={gate_result.reason}"
        )
        return gate_result
    async def review_plan(
        self,
        plan: ExecutionPlan,
        plan_result: PlanExecutionResult,
        task_type: str = "",
        goal: str = "",
    ) -> ReviewReport:
        """复盘整个计划执行结果
        全部步骤完成后调用，生成复盘报告并写入经验库。
        Args:
            plan: 执行计划
            plan_result: 计划执行结果
            task_type: 任务类型（写入经验库用）
            goal: 任务目标（写入经验库用）
        Returns:
            ReviewReport: 复盘报告
        """
        # 1. 构建成功路径
        success_path = plan_result.completed_steps
        # 2. 收集失败原因
        failure_reasons = self._collect_failure_reasons(plan_result)
        # 3. 构建耗时分布
        duration_distribution = {
            sid: r.duration_ms
            for sid, r in plan_result.step_results.items()
        }
        # 4. 收集质量评分
        quality_scores = {
            sid: cr.quality_score
            for sid, cr in self._check_results.items()
        }
        # 5. 计算成功率
        total_steps = len(plan.steps)
        completed_count = len(plan_result.completed_steps)
        success_rate = completed_count / total_steps if total_steps > 0 else 0.0
        # 6. 判断整体结果
        outcome = self._determine_outcome(plan_result)
        # 7. 生成优化建议
        optimization_tips = self._generate_optimization_tips(
            plan_result, quality_scores
        )
        report = ReviewReport(
            plan_id=plan.plan_id,
            outcome=outcome,
            success_path=success_path,
            failure_reasons=failure_reasons,
            duration_distribution=duration_distribution,
            optimization_tips=optimization_tips,
            quality_scores=quality_scores,
            total_duration_ms=plan_result.total_duration_ms,
            success_rate=success_rate,
        )
        logger.info(
            f"Review plan '{plan.plan_id}': outcome={outcome}, "
            f"success_rate={success_rate:.2f}, "
            f"failures={len(failure_reasons)}, "
            f"tips={len(optimization_tips)}"
        )
        # 8. 写入经验库（可选）
        if self._experience_store is not None:
            await self._write_experience(report, plan, plan_result, task_type, goal)
        return report
    def should_retry(self, check_result: CheckResult, retry_count: int) -> bool:
        """判断是否应该重试
        检查不通过且重试次数未耗尽时返回 True。
        Args:
            check_result: 检查结果
            retry_count: 当前重试次数
        Returns:
            是否应该重试
        """
        if check_result.status != CheckStatus.FAIL:
            return False
        if check_result.status == CheckStatus.SKIP:
            return False
        return retry_count < self._max_check_retries
    def should_request_human(self, check_result: CheckResult, retry_count: int) -> bool:
        """判断是否应该请求人工介入
        检查不通过且重试次数已耗尽时返回 True。
        Args:
            check_result: 检查结果
            retry_count: 当前重试次数
        Returns:
            是否应该请求人工介入
        """
        if check_result.status != CheckStatus.FAIL:
            return False
        return retry_count >= self._max_check_retries
    def make_step_complete_callback(
        self,
    ) -> Callable[[PlanStep, StepExecutionResult], Awaitable[None]]:
        """创建步骤完成回调，用于与 PlanExecutor 集成
        用法：
            checker = PlanChecker()
            executor = PlanExecutor(
                agent_pool=pool,
                on_step_complete=checker.make_step_complete_callback(),
            )
        Returns:
            异步回调函数
        """
        async def on_step_complete(
            step: PlanStep, exec_result: StepExecutionResult
        ) -> None:
            await self.check_step(step, exec_result)
        return on_step_complete
    def _collect_failure_reasons(self, plan_result: PlanExecutionResult) -> list[str]:
        """收集失败原因"""
        reasons: list[str] = []
        for sid, r in plan_result.step_results.items():
            if r.status == PlanStepStatus.FAILED:
                reason = f"Step '{sid}' failed"
                if r.error:
                    reason += f": {r.error}"
                reasons.append(reason)
            elif r.status == PlanStepStatus.SKIPPED:
                reason = f"Step '{sid}' skipped"
                if r.error:
                    reason += f": {r.error}"
                reasons.append(reason)
        # 补充检查不通过的原因
        for sid, cr in self._check_results.items():
            if cr.status == CheckStatus.FAIL:
                reason = f"Step '{sid}' quality check failed: {cr.reason}"
                if reason not in reasons:
                    reasons.append(reason)
        return reasons
    def _determine_outcome(self, plan_result: PlanExecutionResult) -> str:
        """判断整体结果"""
        total = len(plan_result.step_results)
        if total == 0:
            return "success"
        completed = len(plan_result.completed_steps)
        failed = len(plan_result.failed_steps)
        skipped = len(plan_result.skipped_steps)
        if completed == total:
            return "success"
        if failed == total or (failed + skipped == total and completed == 0):
            return "failure"
        return "partial"
    def _generate_optimization_tips(
        self,
        plan_result: PlanExecutionResult,
        quality_scores: dict[str, float],
    ) -> list[str]:
        """生成优化建议"""
        tips: list[str] = []
        # 基于质量评分
        low_quality_steps = [
            sid for sid, score in quality_scores.items() if score < self._quality_threshold
        ]
        if low_quality_steps:
            tips.append(
                f"Steps with low quality scores: {', '.join(low_quality_steps)}. "
                f"Consider improving input data or step configuration."
            )
        # 基于重试
        high_retry_steps = [
            (sid, r.retry_count)
            for sid, r in plan_result.step_results.items()
            if r.retry_count > 0
        ]
        if high_retry_steps:
            steps_str = ", ".join(
                f"'{sid}' ({count} retries)" for sid, count in high_retry_steps
            )
            tips.append(
                f"Steps requiring retries: {steps_str}. "
                f"Consider improving step reliability."
            )
        # 基于耗时
        slow_steps = [
            (sid, r.duration_ms)
            for sid, r in plan_result.step_results.items()
            if r.duration_ms > 60000
        ]
        if slow_steps:
            steps_str = ", ".join(
                f"'{sid}' ({ms / 1000:.1f}s)" for sid, ms in slow_steps
            )
            tips.append(
                f"Slow steps detected: {steps_str}. "
                f"Consider optimizing for performance."
            )
        # 基于跳过步骤
        skipped = plan_result.skipped_steps
        if skipped:
            tips.append(
                f"Skipped steps: {', '.join(skipped)}. "
                f"Review dependency chain and failure handling strategy."
            )
        # 基于检查结果中的 Reflector 建议
        for sid, cr in self._check_results.items():
            reflector_suggestions = cr.details.get("reflector_suggestions", [])
            for suggestion in reflector_suggestions:
                if suggestion not in tips:
                    tips.append(suggestion)
        return tips
    async def _write_experience(
        self,
        report: ReviewReport,
        plan: ExecutionPlan,
        plan_result: PlanExecutionResult,
        task_type: str,
        goal: str,
    ) -> None:
        """将复盘结果写入经验库"""
        from agentkit.evolution.experience_schema import TaskExperience
        # 构建步骤摘要
        steps_summary_parts: list[str] = []
        for step in plan.steps:
            r = plan_result.step_results.get(step.step_id)
            if r:
                steps_summary_parts.append(
                    f"{step.name}: {r.status.value}"
                    + (f" ({r.duration_ms / 1000:.1f}s)" if r.duration_ms > 0 else "")
                )
        steps_summary = "; ".join(steps_summary_parts)
        experience = TaskExperience(
            task_type=task_type or "plan_execution",
            goal=goal or plan.goal,
            steps_summary=steps_summary,
            outcome=report.outcome,
            duration_seconds=report.total_duration_ms / 1000,
            success_rate=report.success_rate,
            failure_reasons=report.failure_reasons,
            optimization_tips=report.optimization_tips,
        )
        try:
            exp_id = await self._experience_store.record_experience(experience)
            logger.info(f"Experience recorded: {exp_id} outcome={report.outcome}")
        except Exception as e:
            logger.error(f"Failed to write experience to store: {e}")
    def reset(self) -> None:
        """重置内部状态（用于新一轮检查）"""
        self._check_results.clear()
--- a/src/agentkit/core/plan_executor.py
+++ b/src/agentkit/core/plan_executor.py
@ -0,0 +1,501 @@
 """PlanExecutor — 执行计划执行器
 按确认后的 ExecutionPlan 执行，自动并行调度无依赖步骤，支持执行中调整。
 执行流程：
 1. 按 parallel_groups 分组执行步骤
 2. 每组内使用 asyncio.gather 并行执行
 3. 步骤级状态机：PENDING → RUNNING → COMPLETED/FAILED
 4. 失败处理：重试 / 调整计划（跳过/替换）/ 请求人工介入
 5. 与 AgentPool 集成：每个步骤通过 AgentPool 创建 Agent 执行
 """
 from __future__ import annotations
 import asyncio
 import logging
 import time
 from dataclasses import dataclass, field
 from enum import Enum
 from typing import Any, Callable, Awaitable
 from agentkit.core.plan_schema import ExecutionPlan, PlanStep, PlanStepStatus
 from agentkit.core.protocol import TaskMessage, TaskResult, TaskStatus
 logger = logging.getLogger(__name__)
 class FailureAction(str, Enum):
    """步骤失败后的处理策略"""
    RETRY = "retry"
    SKIP = "skip"
    REPLACE = "replace"
    REQUEST_HUMAN = "request_human"
    ABORT = "abort"
@dataclass
 class StepExecutionResult:
    """单个步骤的执行结果"""
    step_id: str
    status: PlanStepStatus
    result: dict[str, Any] | None = None
    error: str | None = None
    retry_count: int = 0
    duration_ms: float = 0.0
@dataclass
 class PlanExecutionResult:
    """整个计划的执行结果"""
    plan_id: str
    step_results: dict[str, StepExecutionResult]
    status: TaskStatus
    total_duration_ms: float
    adjusted: bool = False
    human_intervention_requested: bool = False
    @property
    def completed_steps(self) -> list[str]:
        return [sid for sid, r in self.step_results.items() if r.status == PlanStepStatus.COMPLETED]
    @property
    def failed_steps(self) -> list[str]:
        return [sid for sid, r in self.step_results.items() if r.status == PlanStepStatus.FAILED]
    @property
    def skipped_steps(self) -> list[str]:
        return [sid for sid, r in self.step_results.items() if r.status == PlanStepStatus.SKIPPED]
 # 回调类型
 OnStepCompleteCallback = Callable[[PlanStep, StepExecutionResult], Awaitable[None]]
 OnStepFailedCallback = Callable[[PlanStep, StepExecutionResult], FailureAction]
 OnHumanInterventionCallback = Callable[[PlanStep, StepExecutionResult], Awaitable[FailureAction]]
 class PlanExecutor:
    """执行计划执行器
    按确认后的 ExecutionPlan 执行，自动并行调度无依赖步骤，
    支持失败重试、计划调整和人工介入。
    使用方式：
        executor = PlanExecutor(agent_pool=pool)
        result = await executor.execute(plan, original_task)
    """
    def __init__(
        self,
        agent_pool: Any,
        max_retries: int = 2,
        step_timeout: float = 300.0,
        max_parallel: int = 5,
        on_step_complete: OnStepCompleteCallback | None = None,
        on_step_failed: OnStepFailedCallback | None = None,
        on_human_intervention: OnHumanInterventionCallback | None = None,
    ):
        """
        Args:
            agent_pool: AgentPool 实例
            max_retries: 步骤失败后最大重试次数
            step_timeout: 单个步骤超时时间（秒）
            max_parallel: 最大并行步骤数
            on_step_complete: 步骤完成回调
            on_step_failed: 步骤失败回调，返回 FailureAction 决定后续处理
            on_human_intervention: 人工介入回调
        """
        self._agent_pool = agent_pool
        self._max_retries = max_retries
        self._step_timeout = step_timeout
        self._max_parallel = max_parallel
        self._on_step_complete = on_step_complete
        self._on_step_failed = on_step_failed
        self._on_human_intervention = on_human_intervention
    async def execute(
        self,
        plan: ExecutionPlan,
        original_task: TaskMessage,
    ) -> PlanExecutionResult:
        """执行确认后的 ExecutionPlan
        Args:
            plan: 已确认的执行计划
            original_task: 原始任务消息
        Returns:
            PlanExecutionResult: 执行结果
        """
        start_time = time.monotonic()
        step_results: dict[str, StepExecutionResult] = {}
        plan_adjusted = False
        human_intervention_requested = False
        # 构建步骤索引
        step_map = {s.step_id: s for s in plan.steps}
        # 按 parallel_groups 分组执行
        for group in plan.parallel_groups:
            # 过滤掉已跳过/已完成的步骤（可能因计划调整而变化）
            active_step_ids = [
                sid for sid in group
                if sid in step_map and step_map[sid].status in (PlanStepStatus.PENDING,)
            ]
            if not active_step_ids:
                continue
            # 为每个步骤注入依赖结果
            coros = []
            for step_id in active_step_ids:
                step = step_map[step_id]
                enriched_input = self._inject_dependency_results(step, step_results)
                coros.append(self._execute_step_with_retry(step, enriched_input, original_task))
            # 并行执行当前组
            results = await asyncio.gather(*coros, return_exceptions=True)
            for step_id, result in zip(active_step_ids, results):
                if isinstance(result, Exception):
                    step_results[step_id] = StepExecutionResult(
                        step_id=step_id,
                        status=PlanStepStatus.FAILED,
                        error=str(result),
                    )
                else:
                    step_results[step_id] = result
                # 处理失败步骤
                if step_results[step_id].status == PlanStepStatus.FAILED:
                    step = step_map[step_id]
                    action_taken = await self._handle_step_failure(
                        step, step_results[step_id], step_map, step_results, plan,
                    )
                    if action_taken == "adjusted":
                        plan_adjusted = True
                    elif action_taken in ("human", "human_adjusted"):
                        human_intervention_requested = True
                        if action_taken == "human_adjusted":
                            plan_adjusted = True
        # 计算总耗时
        total_duration_ms = (time.monotonic() - start_time) * 1000
        # 确定整体状态
        status = self._determine_overall_status(plan, step_results)
        return PlanExecutionResult(
            plan_id=plan.plan_id,
            step_results=step_results,
            status=status,
            total_duration_ms=total_duration_ms,
            adjusted=plan_adjusted,
            human_intervention_requested=human_intervention_requested,
        )
    async def _execute_step_with_retry(
        self,
        step: PlanStep,
        input_data: dict[str, Any],
        original_task: TaskMessage,
    ) -> StepExecutionResult:
        """执行单个步骤，支持重试
        Args:
            step: 计划步骤
            input_data: 注入依赖结果后的输入数据
            original_task: 原始任务消息
        Returns:
            StepExecutionResult: 步骤执行结果
        """
        step.status = PlanStepStatus.RUNNING
        retry_count = 0
        last_error: str | None = None
        while retry_count <= self._max_retries:
            start = time.monotonic()
            try:
                result = await asyncio.wait_for(
                    self._execute_step_once(step, input_data, original_task),
                    timeout=self._step_timeout,
                )
                duration_ms = (time.monotonic() - start) * 1000
                step.status = PlanStepStatus.COMPLETED
                exec_result = StepExecutionResult(
                    step_id=step.step_id,
                    status=PlanStepStatus.COMPLETED,
                    result=result,
                    retry_count=retry_count,
                    duration_ms=duration_ms,
                )
                # 完成回调
                if self._on_step_complete:
                    await self._on_step_complete(step, exec_result)
                return exec_result
            except asyncio.TimeoutError:
                last_error = f"Step '{step.step_id}' timed out after {self._step_timeout}s"
                logger.warning(last_error)
            except Exception as e:
                last_error = str(e)
                logger.warning(f"Step '{step.step_id}' failed (attempt {retry_count + 1}): {e}")
            retry_count += 1
        # 所有重试耗尽
        step.status = PlanStepStatus.FAILED
        step.error = last_error
        return StepExecutionResult(
            step_id=step.step_id,
            status=PlanStepStatus.FAILED,
            error=last_error,
            retry_count=retry_count - 1,
            duration_ms=0.0,
        )
    async def _execute_step_once(
        self,
        step: PlanStep,
        input_data: dict[str, Any],
        original_task: TaskMessage,
    ) -> dict[str, Any]:
        """执行单个步骤一次
        通过 AgentPool 创建 Agent 执行步骤。
        Args:
            step: 计划步骤
            input_data: 输入数据
            original_task: 原始任务消息
        Returns:
            步骤执行结果字典
        """
        # 尝试通过 required_skills 创建 Agent
        agent = None
        for skill_name in step.required_skills:
            try:
                agent = await self._agent_pool.create_agent_from_skill(skill_name)
                break
            except Exception as e:
                logger.debug(f"Failed to create agent from skill '{skill_name}': {e}")
                continue
        # 如果 Skill 创建失败，尝试从池中获取已有 Agent
        if agent is None:
            # 尝试用步骤名称或默认 agent
            agent = self._agent_pool.get_agent(step.step_id)
            if agent is None and step.required_skills:
                agent = self._agent_pool.get_agent(step.required_skills[0])
        if agent is None:
            raise RuntimeError(
                f"No agent available for step '{step.step_id}' "
                f"(required_skills: {step.required_skills})"
            )
        # 构造 TaskMessage
        task_msg = TaskMessage(
            task_id=step.step_id,
            agent_name=agent.name if hasattr(agent, "name") else step.step_id,
            task_type=original_task.task_type,
            priority=original_task.priority,
            input_data=input_data,
            callback_url=None,
            created_at=original_task.created_at,
            timeout_seconds=int(self._step_timeout),
        )
        result = await agent.execute(task_msg)
        if isinstance(result, TaskResult):
            if result.status == TaskStatus.FAILED:
                raise RuntimeError(result.error_message or "Agent execution failed")
            return result.output_data or {}
        return result if isinstance(result, dict) else {"output": result}
    async def _handle_step_failure(
        self,
        step: PlanStep,
        exec_result: StepExecutionResult,
        step_map: dict[str, PlanStep],
        step_results: dict[str, StepExecutionResult],
        plan: ExecutionPlan,
    ) -> str:
        """处理步骤失败
        根据失败类型决定：重试 / 调整计划 / 请求人工
        Args:
            step: 失败的步骤
            exec_result: 执行结果
            step_map: 步骤映射
            step_results: 所有步骤结果
            plan: 执行计划
        Returns:
            "none" / "adjusted" / "human"
        """
        # 如果已有回调，让回调决定
        if self._on_step_failed:
            action = await self._on_step_failed(step, exec_result)
        else:
            # 默认策略：根据错误类型决定
            action = self._default_failure_action(step, exec_result)
        if action == FailureAction.RETRY:
            # 重试已在 _execute_step_with_retry 中处理
            return "none"
        if action == FailureAction.SKIP:
            step.status = PlanStepStatus.SKIPPED
            exec_result.status = PlanStepStatus.SKIPPED
            # 跳过依赖此步骤的后续步骤
            self._skip_dependent_steps(step.step_id, step_map, step_results, plan)
            return "adjusted"
        if action == FailureAction.REPLACE:
            # 替换步骤：标记当前步骤为 SKIPPED，后续步骤不再依赖它
            step.status = PlanStepStatus.SKIPPED
            exec_result.status = PlanStepStatus.SKIPPED
            return "adjusted"
        if action == FailureAction.REQUEST_HUMAN:
            if self._on_human_intervention:
                human_action = await self._on_human_intervention(step, exec_result)
                if human_action == FailureAction.SKIP:
                    step.status = PlanStepStatus.SKIPPED
                    exec_result.status = PlanStepStatus.SKIPPED
                    self._skip_dependent_steps(step.step_id, step_map, step_results, plan)
                    return "human_adjusted"
                elif human_action == FailureAction.RETRY:
                    # 人工介入后重试
                    return "human"
            return "human"
        if action == FailureAction.ABORT:
            # 将失败步骤本身也标记为 SKIPPED
            step.status = PlanStepStatus.SKIPPED
            exec_result.status = PlanStepStatus.SKIPPED
            # 中止所有后续步骤
            self._abort_remaining_steps(step_map, step_results, plan)
            return "adjusted"
        return "none"
    def _default_failure_action(self, step: PlanStep, exec_result: StepExecutionResult) -> FailureAction:
        """默认失败处理策略
        根据错误类型决定：
        - 超时错误 → RETRY（重试已在 _execute_step_with_retry 处理）
        - Agent 不可用 → SKIP
        - 其他错误 → SKIP
        """
        error = exec_result.error or ""
        if "timed out" in error.lower():
            # 超时已通过重试处理，重试耗尽后跳过
            return FailureAction.SKIP
        if "no agent available" in error.lower():
            return FailureAction.SKIP
        return FailureAction.SKIP
    def _skip_dependent_steps(
        self,
        failed_step_id: str,
        step_map: dict[str, PlanStep],
        step_results: dict[str, StepExecutionResult],
        plan: ExecutionPlan,
    ) -> None:
        """跳过依赖失败步骤的后续步骤"""
        for step in plan.steps:
            if failed_step_id in step.dependencies and step.status == PlanStepStatus.PENDING:
                step.status = PlanStepStatus.SKIPPED
                step_results[step.step_id] = StepExecutionResult(
                    step_id=step.step_id,
                    status=PlanStepStatus.SKIPPED,
                    error=f"Skipped due to failed dependency '{failed_step_id}'",
                )
                # 递归跳过
                self._skip_dependent_steps(step.step_id, step_map, step_results, plan)
    def _abort_remaining_steps(
        self,
        step_map: dict[str, PlanStep],
        step_results: dict[str, StepExecutionResult],
        plan: ExecutionPlan,
    ) -> None:
        """中止所有剩余的未执行步骤"""
        for step in plan.steps:
            if step.status == PlanStepStatus.PENDING:
                step.status = PlanStepStatus.SKIPPED
                step_results[step.step_id] = StepExecutionResult(
                    step_id=step.step_id,
                    status=PlanStepStatus.SKIPPED,
                    error="Aborted due to previous step failure",
                )
    def _inject_dependency_results(
        self,
        step: PlanStep,
        step_results: dict[str, StepExecutionResult],
    ) -> dict[str, Any]:
        """将依赖步骤的结果注入到当前步骤的输入中
        兼容 Orchestrator 的 subtask_results 累积模式。
        """
        enriched = dict(step.input_data)
        if step.dependencies:
            dep_results: dict[str, dict[str, Any]] = {}
            for dep_id in step.dependencies:
                if dep_id in step_results:
                    dep_result = step_results[dep_id]
                    dep_results[dep_id] = {
                        "status": dep_result.status.value,
                        "result": dep_result.result,
                        "error": dep_result.error,
                    }
            if dep_results:
                enriched["dependency_results"] = dep_results
        # 添加步骤元信息
        enriched["step_name"] = step.name
        enriched["step_description"] = step.description
        return enriched
    def _determine_overall_status(
        self,
        plan: ExecutionPlan,
        step_results: dict[str, StepExecutionResult],
    ) -> TaskStatus:
        """根据步骤执行结果确定整体状态"""
        total = len(plan.steps)
        if total == 0:
            return TaskStatus.COMPLETED
        completed = sum(1 for r in step_results.values() if r.status == PlanStepStatus.COMPLETED)
        failed = sum(1 for r in step_results.values() if r.status == PlanStepStatus.FAILED)
        skipped = sum(1 for r in step_results.values() if r.status == PlanStepStatus.SKIPPED)
        if completed == total:
            return TaskStatus.COMPLETED
        if failed == total:
            return TaskStatus.FAILED
        if completed + skipped == total:
            # 所有步骤要么完成要么跳过
            return TaskStatus.COMPLETED
        if failed > 0:
            return TaskStatus.COMPLETED  # 部分成功
        return TaskStatus.COMPLETED
--- a/src/agentkit/core/plan_schema.py
+++ b/src/agentkit/core/plan_schema.py
@ -0,0 +1,148 @@
 """Plan Schema — GoalPlanner 的执行计划数据模型
 定义 ExecutionPlan 和 PlanStep，用于 GoalPlanner 生成结构化执行计划。
 """
 from __future__ import annotations
 import uuid
 from dataclasses import dataclass, field
 from enum import Enum
 from typing import Any
 class PlanStepStatus(str, Enum):
    """计划步骤状态"""
    PENDING = "pending"
    RUNNING = "running"
    COMPLETED = "completed"
    FAILED = "failed"
    SKIPPED = "skipped"
 class SkillGapLevel(str, Enum):
    """能力缺口严重程度"""
    NONE = "none"
    LOW = "low"
    MEDIUM = "medium"
    HIGH = "high"
@dataclass
 class SkillGap:
    """能力缺口：某个步骤需要的 Skill 不可用"""
    step_name: str
    required_skill: str
    level: SkillGapLevel
    suggestion: str = ""
@dataclass
 class PlanStep:
    """计划步骤
    每个步骤代表一个可执行的原子任务，包含名称、描述、依赖关系、
    并行分组和所需 Skill。
    """
    step_id: str
    name: str
    description: str
    dependencies: list[str] = field(default_factory=list)
    parallel_group: int = 0
    required_skills: list[str] = field(default_factory=list)
    input_data: dict[str, Any] = field(default_factory=dict)
    status: PlanStepStatus = PlanStepStatus.PENDING
    result: dict[str, Any] | None = None
    error: str | None = None
    def to_dict(self) -> dict[str, Any]:
        return {
            "step_id": self.step_id,
            "name": self.name,
            "description": self.description,
            "dependencies": self.dependencies,
            "parallel_group": self.parallel_group,
            "required_skills": self.required_skills,
            "input_data": self.input_data,
            "status": self.status.value if isinstance(self.status, PlanStepStatus) else self.status,
            "result": self.result,
            "error": self.error,
        }
@dataclass
 class ExecutionPlan:
    """执行计划
    由 GoalPlanner 生成的结构化执行计划，包含多个 PlanStep，
    每个步骤有明确的依赖关系和并行分组。
    """
    plan_id: str = field(default_factory=lambda: str(uuid.uuid4())[:8])
    goal: str = ""
    steps: list[PlanStep] = field(default_factory=list)
    parallel_groups: list[list[str]] = field(default_factory=list)
    skill_gaps: list[SkillGap] = field(default_factory=list)
    confirmed: bool = False
    metadata: dict[str, Any] = field(default_factory=dict)
    @property
    def has_skill_gaps(self) -> bool:
        """是否存在能力缺口"""
        return any(gap.level in (SkillGapLevel.MEDIUM, SkillGapLevel.HIGH) for gap in self.skill_gaps)
    def get_step(self, step_id: str) -> PlanStep | None:
        """按 ID 获取步骤"""
        for step in self.steps:
            if step.step_id == step_id:
                return step
        return None
    def to_readable(self) -> str:
        """序列化为可读格式，用于人工确认"""
        lines = [f"📋 执行计划 [{self.plan_id}]", f"目标: {self.goal}", ""]
        for group_idx, group in enumerate(self.parallel_groups):
            lines.append(f"── 并行组 {group_idx + 1} ──")
            for step_id in group:
                step = self.get_step(step_id)
                if step is None:
                    continue
                deps = f" (依赖: {', '.join(step.dependencies)})" if step.dependencies else ""
                skills = f" [需要: {', '.join(step.required_skills)}]" if step.required_skills else ""
                lines.append(f"  [{step.step_id}] {step.name}{deps}{skills}")
                lines.append(f"      {step.description}")
            lines.append("")
        if self.skill_gaps:
            lines.append("⚠️ 能力缺口:")
            for gap in self.skill_gaps:
                lines.append(f"  - {gap.step_name}: 缺少 '{gap.required_skill}' ({gap.level.value})")
                if gap.suggestion:
                    lines.append(f"    建议: {gap.suggestion}")
            lines.append("")
        return "\n".join(lines)
    def to_dict(self) -> dict[str, Any]:
        return {
            "plan_id": self.plan_id,
            "goal": self.goal,
            "steps": [s.to_dict() for s in self.steps],
            "parallel_groups": self.parallel_groups,
            "skill_gaps": [
                {
                    "step_name": g.step_name,
                    "required_skill": g.required_skill,
                    "level": g.level.value,
                    "suggestion": g.suggestion,
                }
                for g in self.skill_gaps
            ],
            "confirmed": self.confirmed,
            "metadata": self.metadata,
        }
--- a/src/agentkit/evolution/experience_schema.py
+++ b/src/agentkit/evolution/experience_schema.py
@ -0,0 +1,111 @@
 """Experience Schema - 任务经验数据模型
 定义 TaskExperience 和 EvolutionMetrics 数据类，
 用于存储任务执行经验和追踪进化指标趋势。
 """
 from __future__ import annotations
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from typing import Any
@dataclass
 class TaskExperience:
    """任务执行经验
    记录单次任务执行的关键信息，包括成功路径、失败原因、耗时等，
    支持按任务类型检索和语义搜索。
    Attributes:
        experience_id: 唯一标识
        task_type: 任务类型（如 "code_review", "data_analysis"）
        goal: 任务目标描述
        steps_summary: 执行步骤摘要
        outcome: 执行结果（"success" / "failure" / "partial"）
        duration_seconds: 执行耗时（秒）
        success_rate: 成功率（0.0 ~ 1.0）
        failure_reasons: 失败原因列表
        optimization_tips: 优化建议列表
        embedding: 语义向量（由 embedder 生成）
        created_at: 创建时间
    """
    experience_id: str = ""
    task_type: str = ""
    goal: str = ""
    steps_summary: str = ""
    outcome: str = "success"
    duration_seconds: float = 0.0
    success_rate: float = 1.0
    failure_reasons: list[str] = field(default_factory=list)
    optimization_tips: list[str] = field(default_factory=list)
    embedding: list[float] | None = None
    created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
    def to_dict(self) -> dict[str, Any]:
        """转换为字典（不含 embedding）"""
        return {
            "experience_id": self.experience_id,
            "task_type": self.task_type,
            "goal": self.goal,
            "steps_summary": self.steps_summary,
            "outcome": self.outcome,
            "duration_seconds": self.duration_seconds,
            "success_rate": self.success_rate,
            "failure_reasons": self.failure_reasons,
            "optimization_tips": self.optimization_tips,
            "created_at": self.created_at.isoformat(),
        }
    def text_for_embedding(self) -> str:
        """生成用于 embedding 的文本表示"""
        parts = [f"Task: {self.task_type}", f"Goal: {self.goal}"]
        if self.steps_summary:
            parts.append(f"Steps: {self.steps_summary}")
        if self.failure_reasons:
            parts.append(f"Failures: {'; '.join(self.failure_reasons)}")
        if self.optimization_tips:
            parts.append(f"Tips: {'; '.join(self.optimization_tips)}")
        return " | ".join(parts)
@dataclass
 class EvolutionMetrics:
    """进化指标趋势
    追踪指定时间窗口内任务执行的完成率、平均耗时和重试率趋势。
    Attributes:
        task_type: 任务类型
        time_window: 时间窗口描述（如 "1h", "24h", "7d"）
        completion_rate: 完成率（0.0 ~ 1.0）
        avg_duration: 平均耗时（秒）
        retry_rate: 重试率（0.0 ~ 1.0）
        sample_count: 样本数量
        window_start: 窗口起始时间
        window_end: 窗口结束时间
    """
    task_type: str = ""
    time_window: str = "24h"
    completion_rate: float = 0.0
    avg_duration: float = 0.0
    retry_rate: float = 0.0
    sample_count: int = 0
    window_start: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
    window_end: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
    def to_dict(self) -> dict[str, Any]:
        """转换为字典"""
        return {
            "task_type": self.task_type,
            "time_window": self.time_window,
            "completion_rate": self.completion_rate,
            "avg_duration": self.avg_duration,
            "retry_rate": self.retry_rate,
            "sample_count": self.sample_count,
            "window_start": self.window_start.isoformat(),
            "window_end": self.window_end.isoformat(),
        }
--- a/src/agentkit/evolution/experience_store.py
+++ b/src/agentkit/evolution/experience_store.py
@ -0,0 +1,516 @@
 """ExperienceStore - 任务经验存储
 提供两种后端实现：
 - ExperienceStore: 基于 PostgreSQL + pgvector 的语义检索存储
 - InMemoryExperienceStore: 基于内存字典的轻量存储（用于测试）
 存储任务执行经验（成功路径、失败原因、耗时分布），
 支持按任务类型检索和语义搜索，追踪完成率/耗时/重试率趋势。
 """
 from __future__ import annotations
 import logging
 import math
 import uuid
 from datetime import datetime, timedelta, timezone
 from typing import Any
 from sqlalchemy import text
 from agentkit.evolution.experience_schema import EvolutionMetrics, TaskExperience
 from agentkit.memory.embedder import Embedder
 logger = logging.getLogger(__name__)
 class ExperienceStore:
    """任务经验存储 - PostgreSQL + pgvector 混合存储
    基于 pgvector 向量索引 + tsvector 全文索引，
    支持精确匹配 task_type + 语义相似度排序 + 时效性衰减。
    检索策略：
    1. pgvector ``<=>`` 算符进行最近邻检索
    2. Python 侧 time_decay 重排
    3. 混合评分：alpha * cosine + (1 - alpha) * time_decay_score
    当 pgvector_enabled=False 或 embedder 不可用时，
    回退到客户端 O(N) cosine similarity。
    """
    def __init__(
        self,
        session_factory: Any,
        experience_model: Any,
        embedder: Embedder | None = None,
        decay_rate: float = 0.01,
        alpha: float = 0.7,
        retrieve_limit: int = 200,
        pgvector_enabled: bool = True,
        table_name: str = "task_experiences",
    ):
        """
        Args:
            session_factory: 返回 async context manager 的工厂
            experience_model: TaskExperience ORM 模型类
            embedder: 嵌入器，用于生成向量
            decay_rate: 时间衰减率（越大衰减越快）
            alpha: 混合评分权重，alpha * cosine + (1-alpha) * time_decay
            retrieve_limit: 客户端检索时的最大候选行数
            pgvector_enabled: 是否使用 pgvector 原生 ``<=>`` 算符检索
            table_name: pgvector 查询使用的表名
        """
        self._session_factory = session_factory
        self._experience_model = experience_model
        self._embedder = embedder
        self._decay_rate = decay_rate
        self._alpha = alpha
        self._retrieve_limit = retrieve_limit
        self._pgvector_enabled = pgvector_enabled
        self._table_name = table_name
    async def record_experience(self, experience: TaskExperience) -> str:
        """记录任务经验
        如果 experience.embedding 为 None 且 embedder 可用，
        自动生成 embedding。
        Args:
            experience: 任务经验数据
        Returns:
            经验 ID
        """
        if not experience.experience_id:
            experience.experience_id = str(uuid.uuid4())
        # 自动生成 embedding
        if experience.embedding is None and self._embedder is not None:
            text = experience.text_for_embedding()
            try:
                experience.embedding = await self._embedder.embed(text)
            except Exception as e:
                logger.warning(f"Failed to generate embedding for experience {experience.experience_id}: {e}")
        async with self._session_factory() as db:
            try:
                Model = self._experience_model
                entry = Model(
                    id=experience.experience_id,
                    task_type=experience.task_type,
                    goal=experience.goal,
                    steps_summary=experience.steps_summary,
                    outcome=experience.outcome,
                    duration_seconds=experience.duration_seconds,
                    success_rate=experience.success_rate,
                    failure_reasons=experience.failure_reasons,
                    optimization_tips=experience.optimization_tips,
                    embedding=experience.embedding,
                    created_at=experience.created_at,
                )
                db.add(entry)
                await db.commit()
                logger.info(
                    f"Experience recorded: {experience.experience_id} "
                    f"task_type={experience.task_type} outcome={experience.outcome}"
                )
                return experience.experience_id
            except Exception as e:
                await db.rollback()
                logger.error(f"Failed to record experience: {e}")
                raise
    async def search(
        self,
        query: str,
        top_k: int = 5,
        task_type: str | None = None,
        search_multiplier: int = 5,
    ) -> list[TaskExperience]:
        """语义检索相似经验
        支持精确匹配 task_type + 语义相似度排序 + 时效性衰减。
        Args:
            query: 搜索查询文本
            top_k: 返回的最大结果数
            task_type: 可选的任务类型过滤
            search_multiplier: 预取行数倍数
        """
        async with self._session_factory() as db:
            try:
                if self._pgvector_enabled and self._embedder:
                    return await self._search_pgvector(db, query, top_k, task_type, search_multiplier)
                return await self._search_client_side(db, query, top_k, task_type, search_multiplier)
            except Exception as e:
                logger.error(f"Failed to search experiences: {e}")
                return []
    async def _search_pgvector(
        self,
        db: Any,
        query: str,
        top_k: int,
        task_type: str | None,
        search_multiplier: int,
    ) -> list[TaskExperience]:
        """使用 pgvector ``<=>`` 算符检索，再 Python 侧 time_decay 重排"""
        query_embedding = await self._embedder.embed(query)
        fetch_limit = top_k * search_multiplier
        where_clauses = []
        params: dict[str, Any] = {"query_vec": str(query_embedding), "lim": fetch_limit}
        if task_type:
            where_clauses.append("task_type = :task_type")
            params["task_type"] = task_type
        where_sql = (" WHERE " + " AND ".join(where_clauses)) if where_clauses else ""
        sql = text(
            f"SELECT *, embedding <=> :query_vec AS distance "
            f"FROM {self._table_name}{where_sql} "
            f"ORDER BY embedding <=> :query_vec "
            f"LIMIT :lim"
        )
        result = await db.execute(sql, params)
        rows = result.mappings().all()
        if not rows:
            return []
        # Re-rank with time_decay in Python
        items: list[tuple[float, TaskExperience]] = []
        for row in rows:
            row_embedding = row.get("embedding")
            age_hours = (
                (datetime.now(timezone.utc) - row["created_at"]).total_seconds() / 3600
                if row.get("created_at")
                else 0
            )
            decay = math.exp(-self._decay_rate * age_hours)
            time_decay_score = (row.get("success_rate") or 0.5) * decay
            if row_embedding is not None:
                cosine_sim = _compute_cosine_similarity(query_embedding, row_embedding)
                score = self._alpha * cosine_sim + (1 - self._alpha) * time_decay_score
            else:
                score = time_decay_score
            exp = TaskExperience(
                experience_id=str(row.get("id", "")),
                task_type=row.get("task_type", ""),
                goal=row.get("goal", ""),
                steps_summary=row.get("steps_summary", ""),
                outcome=row.get("outcome", "success"),
                duration_seconds=row.get("duration_seconds", 0.0),
                success_rate=row.get("success_rate", 1.0),
                failure_reasons=row.get("failure_reasons") or [],
                optimization_tips=row.get("optimization_tips") or [],
                embedding=row_embedding,
                created_at=row.get("created_at") or datetime.now(timezone.utc),
            )
            items.append((score, exp))
        items.sort(key=lambda x: x[0], reverse=True)
        return [exp for _, exp in items[:top_k]]
    async def _search_client_side(
        self,
        db: Any,
        query: str,
        top_k: int,
        task_type: str | None,
        search_multiplier: int,
    ) -> list[TaskExperience]:
        """客户端 O(N) cosine similarity 检索（回退路径）"""
        Model = self._experience_model
        from sqlalchemy import select
        stmt = select(Model)
        if task_type:
            stmt = stmt.where(Model.task_type == task_type)
        stmt = stmt.order_by(Model.created_at.desc()).limit(top_k * search_multiplier)
        result = await db.execute(stmt)
        entries = result.scalars().all()
        query_embedding = None
        if self._embedder and entries:
            query_embedding = await self._embedder.embed(query)
        items: list[tuple[float, TaskExperience]] = []
        for entry in entries:
            age_hours = (
                (datetime.now(timezone.utc) - entry.created_at).total_seconds() / 3600
                if entry.created_at
                else 0
            )
            decay = math.exp(-self._decay_rate * age_hours)
            time_decay_score = (entry.success_rate or 0.5) * decay
            if self._embedder and query_embedding is not None and entry.embedding is not None:
                cosine_sim = _compute_cosine_similarity(query_embedding, entry.embedding)
                score = self._alpha * cosine_sim + (1 - self._alpha) * time_decay_score
            else:
                score = time_decay_score
            exp = TaskExperience(
                experience_id=str(entry.id),
                task_type=entry.task_type,
                goal=entry.goal,
                steps_summary=entry.steps_summary,
                outcome=entry.outcome,
                duration_seconds=entry.duration_seconds,
                success_rate=entry.success_rate,
                failure_reasons=entry.failure_reasons or [],
                optimization_tips=entry.optimization_tips or [],
                embedding=entry.embedding,
                created_at=entry.created_at or datetime.now(timezone.utc),
            )
            items.append((score, exp))
        items.sort(key=lambda x: x[0], reverse=True)
        return [exp for _, exp in items[:top_k]]
    async def get_metrics(
        self,
        task_type: str | None = None,
        time_window: str = "24h",
    ) -> list[EvolutionMetrics]:
        """获取进化指标趋势
        按任务类型和时间窗口聚合完成率、平均耗时和重试率。
        Args:
            task_type: 可选的任务类型过滤，None 表示所有类型
            time_window: 时间窗口（"1h", "24h", "7d", "30d"）
        """
        window_delta = _parse_time_window(time_window)
        window_start = datetime.now(timezone.utc) - window_delta
        window_end = datetime.now(timezone.utc)
        async with self._session_factory() as db:
            try:
                where_clauses = ["created_at >= :window_start"]
                params: dict[str, Any] = {"window_start": window_start}
                if task_type:
                    where_clauses.append("task_type = :task_type")
                    params["task_type"] = task_type
                where_sql = " AND ".join(where_clauses)
                # 按任务类型聚合
                group_by = "task_type" if task_type is None else ""
                select_clause = "task_type"
                if task_type:
                    select_clause += f", '{task_type}' as filtered_task_type"
                sql = text(
                    f"SELECT task_type, "
                    f"  COUNT(*) as sample_count, "
                    f"  AVG(CASE WHEN outcome = 'success' THEN 1.0 ELSE 0.0 END) as completion_rate, "
                    f"  AVG(duration_seconds) as avg_duration, "
                    f"  AVG(CASE WHEN success_rate < 1.0 THEN 1.0 ELSE 0.0 END) as retry_rate "
                    f"FROM {self._table_name} "
                    f"WHERE {where_sql} "
                    f"GROUP BY task_type"
                )
                result = await db.execute(sql, params)
                rows = result.mappings().all()
                metrics_list = []
                for row in rows:
                    metrics_list.append(
                        EvolutionMetrics(
                            task_type=row["task_type"],
                            time_window=time_window,
                            completion_rate=row["completion_rate"] or 0.0,
                            avg_duration=row["avg_duration"] or 0.0,
                            retry_rate=row["retry_rate"] or 0.0,
                            sample_count=row["sample_count"] or 0,
                            window_start=window_start,
                            window_end=window_end,
                        )
                    )
                return metrics_list
            except Exception as e:
                logger.error(f"Failed to get metrics: {e}")
                return []
 class InMemoryExperienceStore:
    """基于内存字典的任务经验存储（用于测试和轻量场景）
    无需数据库，纯 dict-based 实现，支持与 ExperienceStore 相同的接口。
    """
    def __init__(
        self,
        embedder: Embedder | None = None,
        decay_rate: float = 0.01,
        alpha: float = 0.7,
    ):
        self._embedder = embedder
        self._decay_rate = decay_rate
        self._alpha = alpha
        self._experiences: dict[str, TaskExperience] = {}
    async def record_experience(self, experience: TaskExperience) -> str:
        """记录任务经验"""
        if not experience.experience_id:
            experience.experience_id = str(uuid.uuid4())
        # 自动生成 embedding
        if experience.embedding is None and self._embedder is not None:
            text = experience.text_for_embedding()
            try:
                experience.embedding = await self._embedder.embed(text)
            except Exception as e:
                logger.warning(f"Failed to generate embedding for experience {experience.experience_id}: {e}")
        # 存储副本，避免外部修改影响内部状态
        self._experiences[experience.experience_id] = TaskExperience(
            experience_id=experience.experience_id,
            task_type=experience.task_type,
            goal=experience.goal,
            steps_summary=experience.steps_summary,
            outcome=experience.outcome,
            duration_seconds=experience.duration_seconds,
            success_rate=experience.success_rate,
            failure_reasons=list(experience.failure_reasons),
            optimization_tips=list(experience.optimization_tips),
            embedding=experience.embedding,
            created_at=experience.created_at,
        )
        logger.info(
            f"Experience recorded: {experience.experience_id} "
            f"task_type={experience.task_type} outcome={experience.outcome}"
        )
        return experience.experience_id
    async def search(
        self,
        query: str,
        top_k: int = 5,
        task_type: str | None = None,
        search_multiplier: int = 5,
    ) -> list[TaskExperience]:
        """语义检索相似经验"""
        # 生成 query embedding
        query_embedding = None
        if self._embedder:
            try:
                query_embedding = await self._embedder.embed(query)
            except Exception as e:
                logger.warning(f"Failed to generate query embedding: {e}")
        # 筛选候选
        candidates = list(self._experiences.values())
        if task_type:
            candidates = [e for e in candidates if e.task_type == task_type]
        # 计算得分
        items: list[tuple[float, TaskExperience]] = []
        for exp in candidates:
            age_hours = (
                (datetime.now(timezone.utc) - exp.created_at).total_seconds() / 3600
                if exp.created_at
                else 0
            )
            decay = math.exp(-self._decay_rate * age_hours)
            time_decay_score = exp.success_rate * decay
            if query_embedding is not None and exp.embedding is not None:
                cosine_sim = _compute_cosine_similarity(query_embedding, exp.embedding)
                score = self._alpha * cosine_sim + (1 - self._alpha) * time_decay_score
            else:
                score = time_decay_score
            items.append((score, exp))
        items.sort(key=lambda x: x[0], reverse=True)
        return [exp for _, exp in items[:top_k]]
    async def get_metrics(
        self,
        task_type: str | None = None,
        time_window: str = "24h",
    ) -> list[EvolutionMetrics]:
        """获取进化指标趋势"""
        window_delta = _parse_time_window(time_window)
        window_start = datetime.now(timezone.utc) - window_delta
        window_end = datetime.now(timezone.utc)
        # 筛选时间窗口内的经验
        candidates = [
            e for e in self._experiences.values()
            if e.created_at >= window_start
        ]
        if task_type:
            candidates = [e for e in candidates if e.task_type == task_type]
        # 按 task_type 分组聚合
        groups: dict[str, list[TaskExperience]] = {}
        for exp in candidates:
            groups.setdefault(exp.task_type, []).append(exp)
        metrics_list = []
        for tt, exps in groups.items():
            n = len(exps)
            if n == 0:
                continue
            completion_rate = sum(1 for e in exps if e.outcome == "success") / n
            avg_duration = sum(e.duration_seconds for e in exps) / n
            retry_rate = sum(1 for e in exps if e.success_rate < 1.0) / n
            metrics_list.append(
                EvolutionMetrics(
                    task_type=tt,
                    time_window=time_window,
                    completion_rate=completion_rate,
                    avg_duration=avg_duration,
                    retry_rate=retry_rate,
                    sample_count=n,
                    window_start=window_start,
                    window_end=window_end,
                )
            )
        return metrics_list
 # ── 辅助函数 ──────────────────────────────────────────────
 def _compute_cosine_similarity(vec_a: list[float], vec_b: list[float]) -> float:
    """计算两个向量的余弦相似度"""
    if len(vec_a) != len(vec_b):
        logger.warning(f"Vector dimension mismatch: {len(vec_a)} vs {len(vec_b)}")
        return 0.0
    if not vec_a:
        return 0.0
    dot_product = sum(a * b for a, b in zip(vec_a, vec_b))
    magnitude_a = sum(a**2 for a in vec_a) ** 0.5
    magnitude_b = sum(b**2 for b in vec_b) ** 0.5
    if magnitude_a == 0.0 or magnitude_b == 0.0:
        return 0.0
    return dot_product / (magnitude_a * magnitude_b)
 def _parse_time_window(window: str) -> timedelta:
    """解析时间窗口字符串为 timedelta
    支持格式: "1h", "24h", "7d", "30d"
    """
    unit = window[-1].lower()
    value = int(window[:-1])
    if unit == "h":
        return timedelta(hours=value)
    elif unit == "d":
        return timedelta(days=value)
    else:
        logger.warning(f"Unknown time window unit '{unit}', defaulting to 24h")
        return timedelta(hours=24)
--- a/src/agentkit/skills/init.py
+++ b/src/agentkit/skills/init.py
@ -4,6 +4,12 @@ from agentkit.skills.base import IntentConfig, QualityGateConfig, Skill, SkillCo
 from agentkit.skills.loader import SkillLoader
 from agentkit.skills.pipeline import SkillPipeline
 from agentkit.skills.registry import SkillRegistry
 from agentkit.skills.schema import (
    CapabilityTag,
    DependencyDecl,
    HealthCheckResult,
    SkillSpec,
 )
 __all__ = [
    "IntentConfig",
@ -13,4 +19,8 @@ __all__ = [
    "SkillPipeline",
    "SkillRegistry",
    "SkillLoader",
    "CapabilityTag",
    "DependencyDecl",
    "HealthCheckResult",
    "SkillSpec",
 ]
--- a/src/agentkit/skills/base.py
+++ b/src/agentkit/skills/base.py
@ -1,11 +1,14 @@
 """Skill 基础类 - SkillConfig, IntentConfig, QualityGateConfig, Skill"""
 from __future__ import annotations
 import logging
 from dataclasses import dataclass, field
 from typing import Any
 from agentkit.core.config_driven import AgentConfig
 from agentkit.core.exceptions import ConfigValidationError
 from agentkit.skills.schema import CapabilityTag, DependencyDecl
 from agentkit.tools.base import Tool
 logger = logging.getLogger(__name__)
@ -78,6 +81,9 @@ class SkillConfig(AgentConfig):
        # v3 新增字段：SKILL.md 支持
        skill_md_path: str | None = None,
        disclosure_level: int = 0,
        # v4 新增字段：依赖声明、能力标签
        dependencies: list[dict[str, Any] | DependencyDecl] | None = None,
        capabilities: list[str | dict[str, Any] | CapabilityTag] | None = None,
    ):
        super().__init__(
            name=name,
@ -102,6 +108,9 @@ class SkillConfig(AgentConfig):
        self.evolution = EvolutionConfig(**(evolution or {}))
        self.skill_md_path = skill_md_path
        self.disclosure_level = disclosure_level
        # v4: 解析依赖和能力标签
        self.dependencies = self._parse_dependencies(dependencies or [])
        self.capabilities = self._parse_capabilities(capabilities or [])
        self._validate_v2()
    def _validate_v2(self) -> None:
@ -116,6 +125,38 @@ class SkillConfig(AgentConfig):
                ),
            )
    @staticmethod
    def _parse_dependencies(
        raw: list[dict[str, Any] | DependencyDecl],
    ) -> list[DependencyDecl]:
        """解析依赖声明列表，支持 dict 或 DependencyDecl 实例"""
        result: list[DependencyDecl] = []
        for item in raw:
            if isinstance(item, DependencyDecl):
                result.append(item)
            elif isinstance(item, dict):
                result.append(DependencyDecl(**item))
            else:
                logger.warning(f"Skipping invalid dependency declaration: {item}")
        return result
    @staticmethod
    def _parse_capabilities(
        raw: list[str | dict[str, Any] | CapabilityTag],
    ) -> list[CapabilityTag]:
        """解析能力标签列表，支持 str / dict / CapabilityTag 实例"""
        result: list[CapabilityTag] = []
        for item in raw:
            if isinstance(item, CapabilityTag):
                result.append(item)
            elif isinstance(item, str):
                result.append(CapabilityTag(tag=item))
            elif isinstance(item, dict):
                result.append(CapabilityTag(**item))
            else:
                logger.warning(f"Skipping invalid capability declaration: {item}")
        return result
    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> "SkillConfig":
        """从字典创建配置"""
@ -141,6 +182,8 @@ class SkillConfig(AgentConfig):
            evolution=data.get("evolution"),
            skill_md_path=data.get("skill_md_path"),
            disclosure_level=data.get("disclosure_level", 0),
            dependencies=data.get("dependencies"),
            capabilities=data.get("capabilities"),
        )
    @classmethod
@ -187,6 +230,20 @@ class SkillConfig(AgentConfig):
        }
        d["skill_md_path"] = self.skill_md_path
        d["disclosure_level"] = self.disclosure_level
        # v4: 序列化依赖和能力标签
        d["dependencies"] = [
            {
                "name": dep.name,
                "version_constraint": dep.version_constraint,
                "type": dep.type,
                "required": dep.required,
            }
            for dep in self.dependencies
        ]
        d["capabilities"] = [
            {"tag": cap.tag, "description": cap.description}
            for cap in self.capabilities
        ]
        return d
@ -204,6 +261,10 @@ class Skill:
    def name(self) -> str:
        return self._config.name
    @property
    def version(self) -> str:
        return self._config.version
    @property
    def config(self) -> SkillConfig:
        return self._config
@ -212,6 +273,16 @@ class Skill:
    def tools(self) -> list[Tool]:
        return self._tools
    @property
    def capabilities(self) -> list:
        """返回 Skill 的能力标签列表"""
        return self._config.capabilities
    @property
    def dependencies(self) -> list:
        """返回 Skill 的依赖声明列表"""
        return self._config.dependencies
    def bind_tool(self, tool: Tool) -> None:
        """绑定工具到 Skill"""
        self._tools.append(tool)
--- a/src/agentkit/skills/loader.py
+++ b/src/agentkit/skills/loader.py
@ -1,8 +1,11 @@
-"""SkillLoader - 从 YAML/SKILL.md 目录批量加载 Skill"""
+"""SkillLoader - 从 YAML/SKILL.md 目录/Python 包批量加载 Skill"""
 from __future__ import annotations
 import glob
 import logging
 import os
 import sys
 from agentkit.skills.base import Skill, SkillConfig
 from agentkit.skills.registry import SkillRegistry
@ -10,9 +13,16 @@ from agentkit.tools.registry import ToolRegistry
 logger = logging.getLogger(__name__)
 # entry_points group 名称，用于自动发现 Skill 插件
 SKILL_ENTRY_POINT_GROUP = "agentkit.skills"
 class SkillLoader:
-    """从 YAML/SKILL.md 目录批量加载 Skill 并注册到 SkillRegistry"""
+    """从 YAML/SKILL.md 目录/Python 包批量加载 Skill 并注册到 SkillRegistry
    v2 增强：
    - 支持从 Python 包通过 entry_points 自动发现并加载 Skill
    """
    def __init__(
        self,
@ -87,6 +97,74 @@ class SkillLoader:
        logger.info(f"Loaded skill '{skill.name}' from SKILL.md '{path}' (level={disclosure_level})")
        return skill
    def load_from_entry_points(self, group: str | None = None) -> list[Skill]:
        """从 Python 包的 entry_points 自动发现并加载 Skill
        第三方包可通过在 pyproject.toml 或 setup.py 中声明 entry_points
        来注册 Skill 插件::
            [project.entry-points."agentkit.skills"]
            my_rag_skill = "my_package.skills:rag_skill"
        其中 `rag_skill` 应为 Skill 实例或返回 Skill 的可调用对象。
        Args:
            group: entry_points 组名，默认为 "agentkit.skills"
        Returns:
            加载的 Skill 列表
        """
        group_name = group or SKILL_ENTRY_POINT_GROUP
        skills: list[Skill] = []
        try:
            # Python 3.12+ 使用 importlib.metadata
            if sys.version_info >= (3, 12):
                from importlib.metadata import entry_points as _entry_points
                eps = _entry_points(group=group_name)
            else:
                from importlib.metadata import entry_points as _entry_points
                eps = _entry_points().get(group_name, [])
        except Exception as e:
            logger.warning(f"Failed to discover entry_points for group '{group_name}': {e}")
            return skills
        for ep in eps:
            try:
                loaded = ep.load()
                # 支持两种形式：直接是 Skill 实例，或者是返回 Skill 的可调用对象
                if isinstance(loaded, Skill):
                    skill = loaded
                elif callable(loaded):
                    result = loaded()
                    if isinstance(result, Skill):
                        skill = result
                    else:
                        logger.warning(
                            f"Entry point '{ep.name}' did not return a Skill instance, "
                            f"got {type(result)}"
                        )
                        continue
                else:
                    logger.warning(
                        f"Entry point '{ep.name}' is neither a Skill nor callable, "
                        f"got {type(loaded)}"
                    )
                    continue
                self._skill_registry.register(skill)
                skills.append(skill)
                logger.info(
                    f"Loaded skill '{skill.name}' v{skill.version} "
                    f"from entry_point '{ep.name}'"
                )
            except Exception as e:
                logger.warning(
                    f"Failed to load skill from entry_point '{ep.name}': {e}"
                )
        return skills
    def _bind_tools(self, config: SkillConfig) -> list:
        """根据配置中的 tools 列表绑定工具"""
        if not self._tool_registry or not config.tools:
--- a/src/agentkit/skills/registry.py
+++ b/src/agentkit/skills/registry.py
@ -1,4 +1,4 @@
-"""SkillRegistry - Skill 注册中心"""
+"""SkillRegistry - Skill 注册中心（v2: 版本管理、能力查询、依赖检查）"""
 from __future__ import annotations
@ -7,6 +7,7 @@ from typing import TYPE_CHECKING
 from agentkit.core.exceptions import SkillNotFoundError
 from agentkit.skills.base import Skill, SkillConfig
 from agentkit.skills.schema import DependencyDecl, HealthCheckResult
 if TYPE_CHECKING:
    from agentkit.skills.pipeline import SkillPipeline
@ -15,31 +16,93 @@ logger = logging.getLogger(__name__)
 class SkillRegistry:
-    """Skill 注册中心，管理 Skill 的注册、发现、更新"""
+    """Skill 注册中心，管理 Skill 的注册、发现、更新
    v2 增强：
    - 版本管理：同名 Skill 可注册多个版本，默认使用最新版
    - 能力查询：按 capability 标签查询匹配的 Skill
    - 依赖检查：health_check() 验证所有声明依赖是否已注册
    """
    def __init__(self):
        self._skills: dict[str, Skill] = {}
        # 版本历史：name → {version → Skill}
        self._skill_versions: dict[str, dict[str, Skill]] = {}
        self._pipelines: dict[str, SkillPipeline] = {}
    def register(self, skill: Skill) -> None:
-        """注册 Skill，同名覆盖"""
+        """注册 Skill，支持多版本共存
        self._skills[skill.name] = skill
        logger.info(f"Skill '{skill.name}' registered")
-    def unregister(self, name: str) -> None:
+        同名 Skill 注册时保留版本历史，默认指向最新注册的版本。
-        """注销 Skill"""
+        """
-        if name in self._skills:
+        name = skill.name
-            del self._skills[name]
+        version = skill.version
-            logger.info(f"Skill '{name}' unregistered")
+
        # 维护版本历史
        if name not in self._skill_versions:
            self._skill_versions[name] = {}
        self._skill_versions[name][version] = skill
        # 默认指向最新注册的版本
        self._skills[name] = skill
        logger.info(f"Skill '{name}' v{version} registered")
    def unregister(self, name: str, version: str | None = None) -> None:
        """注销 Skill
        Args:
            name: Skill 名称
            version: 可选版本号。若指定则仅注销该版本；
                     若不指定则注销所有版本。
        """
        if version is not None:
            # 仅注销指定版本
            if name in self._skill_versions and version in self._skill_versions[name]:
                del self._skill_versions[name][version]
                logger.info(f"Skill '{name}' v{version} unregistered")
                # 如果删除的是当前默认版本，切换到最新版本
                if name in self._skills and self._skills[name].version == version:
                    remaining = self._skill_versions[name]
                    if remaining:
                        latest = max(remaining.keys())
                        self._skills[name] = remaining[latest]
                        logger.info(
                            f"Skill '{name}' default switched to v{latest}"
                        )
                    else:
                        del self._skills[name]
                        del self._skill_versions[name]
        else:
            # 注销所有版本
            if name in self._skills:
                del self._skills[name]
            if name in self._skill_versions:
                del self._skill_versions[name]
            logger.info(f"Skill '{name}' unregistered (all versions)")
    def get(self, name: str, version: str | None = None) -> Skill:
        """获取 Skill
        Args:
            name: Skill 名称
            version: 可选版本号。若指定则返回特定版本，否则返回默认（最新）版本。
        Raises:
            SkillNotFoundError: Skill 或指定版本不存在
        """
        if version is not None:
            if name not in self._skill_versions:
                raise SkillNotFoundError(name)
            if version not in self._skill_versions[name]:
                raise SkillNotFoundError(f"{name}@{version}")
            return self._skill_versions[name][version]
    def get(self, name: str) -> Skill:
        """获取 Skill，不存在则抛出 SkillNotFoundError"""
        if name not in self._skills:
            raise SkillNotFoundError(name)
        return self._skills[name]
    def list_skills(self) -> list[Skill]:
-        """列出所有已注册的 Skill"""
+        """列出所有已注册的 Skill（每个名称返回默认版本）"""
        return list(self._skills.values())
    def update_skill(self, name: str, config: SkillConfig) -> Skill:
@ -49,13 +112,173 @@ class SkillRegistry:
        old_skill = self._skills[name]
        new_skill = Skill(config, tools=old_skill.tools)
        self._skills[name] = new_skill
-        logger.info(f"Skill '{name}' updated")
+        # 同时更新版本历史
        version = config.version
        if name not in self._skill_versions:
            self._skill_versions[name] = {}
        self._skill_versions[name][version] = new_skill
        logger.info(f"Skill '{name}' updated to v{version}")
        return new_skill
-    def has_skill(self, name: str) -> bool:
+    def has_skill(self, name: str, version: str | None = None) -> bool:
-        """检查 Skill 是否已注册"""
+        """检查 Skill 是否已注册
        Args:
            name: Skill 名称
            version: 可选版本号
        """
        if version is not None:
            return (
                name in self._skill_versions
                and version in self._skill_versions[name]
            )
        return name in self._skills
    # ---- 版本管理 ----
    def get_versions(self, name: str) -> list[str]:
        """获取指定 Skill 的所有已注册版本号
        Args:
            name: Skill 名称
        Returns:
            版本号列表（按注册顺序）
        Raises:
            SkillNotFoundError: Skill 不存在
        """
        if name not in self._skill_versions:
            raise SkillNotFoundError(name)
        return list(self._skill_versions[name].keys())
    # ---- 能力查询 ----
    def query_by_capability(self, tag: str) -> list[Skill]:
        """按能力标签查询 Skill
        Args:
            tag: 能力标签名（如 "rag", "terminal", "computer_use"）
        Returns:
            匹配的 Skill 列表
        """
        result = []
        for skill in self._skills.values():
            capability_tags = [
                cap.tag for cap in skill.capabilities
            ]
            if tag in capability_tags:
                result.append(skill)
        return result
    # ---- 依赖检查 ----
    def health_check(self, name: str | None = None) -> list[HealthCheckResult]:
        """依赖健康检查
        验证所有声明依赖是否已注册，以及版本约束是否满足。
        Args:
            name: 可选，指定检查某个 Skill。若不指定则检查所有 Skill。
        Returns:
            健康检查结果列表
        """
        if name is not None:
            if name not in self._skills:
                raise SkillNotFoundError(name)
            skills_to_check = [self._skills[name]]
        else:
            skills_to_check = list(self._skills.values())
        results = []
        for skill in skills_to_check:
            result = self._check_skill_dependencies(skill)
            results.append(result)
        return results
    def _check_skill_dependencies(self, skill: Skill) -> HealthCheckResult:
        """检查单个 Skill 的依赖是否满足"""
        result = HealthCheckResult(
            skill_name=skill.name,
            skill_version=skill.version,
            healthy=True,
        )
        for dep in skill.dependencies:
            if dep.type == "skill":
                if not self.has_skill(dep.name):
                    if dep.required:
                        result.healthy = False
                        result.missing_dependencies.append(dep.name)
                    else:
                        result.warnings.append(
                            f"Optional skill dependency '{dep.name}' not registered"
                        )
                elif dep.version_constraint:
                    # 简化版本约束检查：仅检查已注册版本是否满足
                    dep_skill = self.get(dep.name)
                    if not self._check_version_constraint(
                        dep_skill.version, dep.version_constraint
                    ):
                        result.version_mismatches.append(
                            f"{dep.name}: need {dep.version_constraint}, "
                            f"got {dep_skill.version}"
                        )
                        if dep.required:
                            result.healthy = False
            elif dep.type == "tool":
                # Tool 依赖检查需要 ToolRegistry，此处仅记录
                # 实际检查在运行时由 SkillLoader 配合 ToolRegistry 完成
                pass
        return result
    @staticmethod
    def _check_version_constraint(
        actual_version: str, constraint: str
    ) -> bool:
        """简化的版本约束检查
        支持基本的约束格式：
        - ">=x.y.z" — 大于等于
        - "<=x.y.z" — 小于等于
        - "==x.y.z" — 精确匹配
        - ">=x.y.z,<a.b.c" — 范围约束
        使用简单的元组比较进行 semver 检查。
        """
        try:
            actual = tuple(int(x) for x in actual_version.split("."))
        except (ValueError, AttributeError):
            return True  # 无法解析时默认通过
        parts = [p.strip() for p in constraint.split(",")]
        for part in parts:
            if part.startswith(">="):
                target = tuple(int(x) for x in part[2:].split("."))
                if actual < target:
                    return False
            elif part.startswith("<="):
                target = tuple(int(x) for x in part[2:].split("."))
                if actual > target:
                    return False
            elif part.startswith("=="):
                target = tuple(int(x) for x in part[2:].split("."))
                if actual != target:
                    return False
            elif part.startswith(">"):
                target = tuple(int(x) for x in part[1:].split("."))
                if actual <= target:
                    return False
            elif part.startswith("<"):
                target = tuple(int(x) for x in part[1:].split("."))
                if actual >= target:
                    return False
        return True
    # ---- Pipeline 管理 ----
    def register_pipeline(self, pipeline: SkillPipeline) -> None:
--- a/src/agentkit/skills/schema.py
+++ b/src/agentkit/skills/schema.py
@ -0,0 +1,176 @@
 """SkillSpec - Skill 标准接口规范定义
 定义 Skill 的元数据、输入输出 Schema、依赖声明、质量门禁配置等标准规范，
 确保 7 项能力以 Skill 插件形式统一接入。
 """
 from __future__ import annotations
 import logging
 from dataclasses import dataclass, field
 from typing import Any
 logger = logging.getLogger(__name__)
@dataclass
 class DependencyDecl:
    """依赖声明 - 声明 Skill/Tool 依赖
    Attributes:
        name: 依赖的 Skill 或 Tool 名称
        version_constraint: 可选的版本约束（如 ">=1.0.0,<2.0.0"）
        type: 依赖类型，"skill" 或 "tool"
        required: 是否为必需依赖（默认 True）
    """
    name: str
    version_constraint: str = ""
    type: str = "skill"  # "skill" | "tool"
    required: bool = True
@dataclass
 class CapabilityTag:
    """能力标签 - 用于 Skill 能力查询
    Attributes:
        tag: 标签名（如 "rag", "terminal", "computer_use"）
        description: 标签描述
    """
    tag: str
    description: str = ""
@dataclass
 class SkillSpec:
    """Skill 标准接口规范
    定义 Skill 的完整元数据、输入输出 Schema、依赖声明和质量门禁配置。
    用于 Skill 注册时的标准化描述和校验。
    Attributes:
        name: Skill 唯一标识名
        version: 语义版本号（遵循 semver）
        description: Skill 功能描述
        capabilities: 能力标签列表，用于按能力查询
        dependencies: 依赖声明列表
        input_schema: 输入 JSON Schema
        output_schema: 输出 JSON Schema
        quality_gate: 质量门禁配置
        metadata: 扩展元数据
    """
    name: str
    version: str = "1.0.0"
    description: str = ""
    capabilities: list[CapabilityTag] = field(default_factory=list)
    dependencies: list[DependencyDecl] = field(default_factory=list)
    input_schema: dict[str, Any] | None = None
    output_schema: dict[str, Any] | None = None
    quality_gate: dict[str, Any] | None = None
    metadata: dict[str, Any] = field(default_factory=dict)
    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> SkillSpec:
        """从字典创建 SkillSpec"""
        capabilities = [
            CapabilityTag(**cap) if isinstance(cap, dict) else cap
            for cap in data.get("capabilities", [])
        ]
        dependencies = [
            DependencyDecl(**dep) if isinstance(dep, dict) else dep
            for dep in data.get("dependencies", [])
        ]
        return cls(
            name=data["name"],
            version=data.get("version", "1.0.0"),
            description=data.get("description", ""),
            capabilities=capabilities,
            dependencies=dependencies,
            input_schema=data.get("input_schema"),
            output_schema=data.get("output_schema"),
            quality_gate=data.get("quality_gate"),
            metadata=data.get("metadata", {}),
        )
    def to_dict(self) -> dict[str, Any]:
        """序列化为字典"""
        d: dict[str, Any] = {
            "name": self.name,
            "version": self.version,
            "description": self.description,
            "capabilities": [
                {"tag": c.tag, "description": c.description}
                for c in self.capabilities
            ],
            "dependencies": [
                {
                    "name": dep.name,
                    "version_constraint": dep.version_constraint,
                    "type": dep.type,
                    "required": dep.required,
                }
                for dep in self.dependencies
            ],
            "metadata": self.metadata,
        }
        if self.input_schema is not None:
            d["input_schema"] = self.input_schema
        if self.output_schema is not None:
            d["output_schema"] = self.output_schema
        if self.quality_gate is not None:
            d["quality_gate"] = self.quality_gate
        return d
    @property
    def capability_tags(self) -> list[str]:
        """返回所有能力标签名列表"""
        return [c.tag for c in self.capabilities]
    @property
    def required_dependencies(self) -> list[DependencyDecl]:
        """返回所有必需依赖"""
        return [dep for dep in self.dependencies if dep.required]
    @property
    def skill_dependencies(self) -> list[DependencyDecl]:
        """返回所有 Skill 类型依赖"""
        return [dep for dep in self.dependencies if dep.type == "skill"]
    @property
    def tool_dependencies(self) -> list[DependencyDecl]:
        """返回所有 Tool 类型依赖"""
        return [dep for dep in self.dependencies if dep.type == "tool"]
@dataclass
 class HealthCheckResult:
    """依赖健康检查结果
    Attributes:
        skill_name: 被检查的 Skill 名称
        skill_version: 被检查的 Skill 版本
        healthy: 是否健康（所有必需依赖已满足）
        missing_dependencies: 缺失的依赖列表
        version_mismatches: 版本不匹配的依赖列表
        warnings: 警告信息列表
    """
    skill_name: str
    skill_version: str = ""
    healthy: bool = True
    missing_dependencies: list[str] = field(default_factory=list)
    version_mismatches: list[str] = field(default_factory=list)
    warnings: list[str] = field(default_factory=list)
    def to_dict(self) -> dict[str, Any]:
        return {
            "skill_name": self.skill_name,
            "skill_version": self.skill_version,
            "healthy": self.healthy,
            "missing_dependencies": self.missing_dependencies,
            "version_mismatches": self.version_mismatches,
            "warnings": self.warnings,
        }
--- a/tests/unit/core/test_plan_checker.py
+++ b/tests/unit/core/test_plan_checker.py
@ -0,0 +1,974 @@
 """Tests for PlanChecker — 计划检查与复盘"""
 from __future__ import annotations
 import pytest
 from datetime import datetime, timezone
 from typing import Any
 from agentkit.core.plan_checker import (
    CheckResult,
    CheckStatus,
    PlanChecker,
    QualityGate,
    ReviewReport,
    RuleBasedStepReflector,
 )
 from agentkit.core.plan_executor import PlanExecutionResult, StepExecutionResult
 from agentkit.core.plan_schema import ExecutionPlan, PlanStep, PlanStepStatus
 from agentkit.skills.base import QualityGateConfig
 from agentkit.evolution.experience_store import InMemoryExperienceStore
 from agentkit.evolution.experience_schema import TaskExperience
 # --- Helpers ---
 def make_step(
    step_id: str = "s0",
    name: str = "Test Step",
    description: str = "A test step",
    **kwargs,
 ) -> PlanStep:
    return PlanStep(step_id=step_id, name=name, description=description, **kwargs)
 def make_step_result(
    step_id: str = "s0",
    status: PlanStepStatus = PlanStepStatus.COMPLETED,
    result: dict[str, Any] | None = None,
    error: str | None = None,
    retry_count: int = 0,
    duration_ms: float = 100.0,
 ) -> StepExecutionResult:
    return StepExecutionResult(
        step_id=step_id,
        status=status,
        result=result,
        error=error,
        retry_count=retry_count,
        duration_ms=duration_ms,
    )
 def make_plan_result(
    plan_id: str = "p1",
    step_results: dict[str, StepExecutionResult] | None = None,
    total_duration_ms: float = 500.0,
 ) -> PlanExecutionResult:
    from agentkit.core.protocol import TaskStatus
    if step_results is None:
        step_results = {
            "s0": make_step_result(),
        }
    return PlanExecutionResult(
        plan_id=plan_id,
        step_results=step_results,
        status=TaskStatus.COMPLETED,
        total_duration_ms=total_duration_ms,
    )
 def make_plan(
    steps: list[PlanStep] | None = None,
    plan_id: str = "p1",
    goal: str = "test goal",
 ) -> ExecutionPlan:
    if steps is None:
        steps = [make_step()]
    return ExecutionPlan(
        plan_id=plan_id,
        goal=goal,
        steps=steps,
        parallel_groups=[[s.step_id for s in steps]],
        confirmed=True,
    )
 # --- QualityGate Tests ---
 class TestQualityGate:
    """QualityGate 规则检查"""
    def test_pass_when_no_config(self):
        """无配置时所有结果通过"""
        gate = QualityGate()
        step = make_step()
        result = make_step_result(result={"data": "test"})
        check = gate.check(step, result)
        assert check.status == CheckStatus.PASS
    def test_pass_with_required_fields_present(self):
        """必填字段全部存在时通过"""
        config = QualityGateConfig(required_fields=["name", "value"])
        gate = QualityGate(config=config)
        step = make_step()
        result = make_step_result(result={"name": "test", "value": 42})
        check = gate.check(step, result)
        assert check.status == CheckStatus.PASS
    def test_fail_with_missing_required_fields(self):
        """缺少必填字段时不通过"""
        config = QualityGateConfig(required_fields=["name", "value", "missing"])
        gate = QualityGate(config=config)
        step = make_step()
        result = make_step_result(result={"name": "test", "value": 42})
        check = gate.check(step, result)
        assert check.status == CheckStatus.FAIL
        assert "missing" in check.reason.lower() or "Missing required fields" in check.reason
    def test_fail_with_none_result_and_required_fields(self):
        """结果为 None 且有必填字段时不通过"""
        config = QualityGateConfig(required_fields=["name"])
        gate = QualityGate(config=config)
        step = make_step()
        result = make_step_result(result=None)
        check = gate.check(step, result)
        assert check.status == CheckStatus.FAIL
    def test_pass_with_min_word_count_met(self):
        """字数满足最低要求时通过"""
        config = QualityGateConfig(min_word_count=3)
        gate = QualityGate(config=config)
        step = make_step()
        result = make_step_result(result={"text": "hello world foo"})
        check = gate.check(step, result)
        assert check.status == CheckStatus.PASS
    def test_fail_with_min_word_count_not_met(self):
        """字数不满足最低要求时不通过"""
        config = QualityGateConfig(min_word_count=100)
        gate = QualityGate(config=config)
        step = make_step()
        result = make_step_result(result={"text": "hello"})
        check = gate.check(step, result)
        assert check.status == CheckStatus.FAIL
        assert "word count" in check.reason.lower() or "Word count" in check.reason
    def test_skip_for_non_completed_step(self):
        """非完成步骤跳过检查"""
        gate = QualityGate()
        step = make_step()
        result = make_step_result(status=PlanStepStatus.FAILED, error="some error")
        check = gate.check(step, result)
        assert check.status == CheckStatus.SKIP
    def test_skip_for_skipped_step(self):
        """跳过的步骤跳过检查"""
        gate = QualityGate()
        step = make_step()
        result = make_step_result(status=PlanStepStatus.SKIPPED, error="skipped")
        check = gate.check(step, result)
        assert check.status == CheckStatus.SKIP
    def test_custom_validator_pass(self):
        """自定义校验通过"""
        def validator(result):
            return (True, "")
        gate = QualityGate(custom_validator=validator)
        step = make_step()
        result = make_step_result(result={"data": "test"})
        check = gate.check(step, result)
        assert check.status == CheckStatus.PASS
    def test_custom_validator_fail(self):
        """自定义校验不通过"""
        def validator(result):
            return (False, "Output format incorrect")
        gate = QualityGate(custom_validator=validator)
        step = make_step()
        result = make_step_result(result={"data": "test"})
        check = gate.check(step, result)
        assert check.status == CheckStatus.FAIL
        assert "Output format incorrect" in check.reason
    def test_custom_validator_exception(self):
        """自定义校验抛异常时不通过"""
        def validator(result):
            raise ValueError("Validator crashed")
        gate = QualityGate(custom_validator=validator)
        step = make_step()
        result = make_step_result(result={"data": "test"})
        check = gate.check(step, result)
        assert check.status == CheckStatus.FAIL
        assert "error" in check.reason.lower() or "Validator crashed" in check.reason
    def test_combined_required_fields_and_word_count(self):
        """同时检查必填字段和字数"""
        config = QualityGateConfig(required_fields=["report"], min_word_count=5)
        gate = QualityGate(config=config)
        step = make_step()
        # 字数不足
        result = make_step_result(result={"report": "hi"})
        check = gate.check(step, result)
        assert check.status == CheckStatus.FAIL
        # 字数满足
        result2 = make_step_result(result={"report": "This is a detailed report content"})
        check2 = gate.check(step, result2)
        assert check2.status == CheckStatus.PASS
    def test_quality_score_decreases_with_failures(self):
        """失败项越多质量评分越低"""
        config = QualityGateConfig(required_fields=["a", "b"], min_word_count=100)
        gate = QualityGate(config=config)
        step = make_step()
        result = make_step_result(result={"a": "x"})  # missing b + word count
        check = gate.check(step, result)
        assert check.quality_score < 0.5
 # --- RuleBasedStepReflector Tests ---
 class TestRuleBasedStepReflector:
    """基于规则的步骤反思器"""
    @pytest.mark.asyncio
    async def test_completed_step_score(self):
        """完成步骤获得合理评分"""
        reflector = RuleBasedStepReflector()
        step = make_step()
        result = make_step_result(
            result={"data": "test"},
            retry_count=0,
            duration_ms=5000,
        )
        score, suggestions = await reflector.reflect_step(step, result)
        assert score >= 0.8
        assert len(suggestions) == 0
    @pytest.mark.asyncio
    async def test_failed_step_zero_score(self):
        """失败步骤评分为零"""
        reflector = RuleBasedStepReflector()
        step = make_step()
        result = make_step_result(
            status=PlanStepStatus.FAILED,
            error="Something went wrong",
        )
        score, suggestions = await reflector.reflect_step(step, result)
        assert score == 0.0
        assert len(suggestions) > 0
    @pytest.mark.asyncio
    async def test_retry_suggestion(self):
        """有重试的步骤生成改进建议"""
        reflector = RuleBasedStepReflector()
        step = make_step()
        result = make_step_result(
            result={"data": "test"},
            retry_count=2,
        )
        score, suggestions = await reflector.reflect_step(step, result)
        assert any("retries" in s.lower() or "retry" in s.lower() for s in suggestions)
    @pytest.mark.asyncio
    async def test_slow_step_suggestion(self):
        """慢步骤生成优化建议"""
        reflector = RuleBasedStepReflector()
        step = make_step()
        result = make_step_result(
            result={"data": "test"},
            duration_ms=120000,  # 120s
        )
        score, suggestions = await reflector.reflect_step(step, result)
        assert any("slow" in s.lower() or "optimizing" in s.lower() for s in suggestions)
    @pytest.mark.asyncio
    async def test_timeout_error_suggestion(self):
        """超时错误生成超时相关建议"""
        reflector = RuleBasedStepReflector()
        step = make_step()
        result = make_step_result(
            status=PlanStepStatus.FAILED,
            error="Step timed out after 300s",
        )
        score, suggestions = await reflector.reflect_step(step, result)
        assert any("timed out" in s.lower() or "timeout" in s.lower() for s in suggestions)
 # --- PlanChecker.check_step Tests ---
 class TestPlanCheckerCheckStep:
    """PlanChecker 单步检查"""
    @pytest.mark.asyncio
    async def test_check_step_pass(self):
        """步骤通过检查"""
        checker = PlanChecker()
        step = make_step()
        result = make_step_result(result={"data": "test"})
        check = await checker.check_step(step, result)
        assert check.status == CheckStatus.PASS
        assert check.quality_score > 0.5
    @pytest.mark.asyncio
    async def test_check_step_fail_quality_gate(self):
        """步骤不通过质量门控"""
        config = QualityGateConfig(required_fields=["missing_field"])
        checker = PlanChecker(quality_gate_config=config)
        step = make_step()
        result = make_step_result(result={"data": "test"})
        check = await checker.check_step(step, result)
        assert check.status == CheckStatus.FAIL
    @pytest.mark.asyncio
    async def test_check_step_skip_for_failed_status(self):
        """失败步骤跳过检查"""
        checker = PlanChecker()
        step = make_step()
        result = make_step_result(status=PlanStepStatus.FAILED, error="error")
        check = await checker.check_step(step, result)
        assert check.status == CheckStatus.SKIP
    @pytest.mark.asyncio
    async def test_check_step_records_result(self):
        """检查结果被记录"""
        checker = PlanChecker()
        step = make_step(step_id="s1")
        result = make_step_result(step_id="s1", result={"data": "test"})
        await checker.check_step(step, result)
        assert "s1" in checker._check_results
    @pytest.mark.asyncio
    async def test_check_step_with_step_specific_config(self):
        """步骤独立质量配置"""
        step_configs = {
            "s0": QualityGateConfig(required_fields=["report"]),
            "s1": QualityGateConfig(required_fields=["analysis"]),
        }
        checker = PlanChecker(step_quality_configs=step_configs)
        # s0 缺少 report
        step0 = make_step(step_id="s0")
        result0 = make_step_result(step_id="s0", result={"data": "test"})
        check0 = await checker.check_step(step0, result0)
        assert check0.status == CheckStatus.FAIL
        # s1 有 analysis
        step1 = make_step(step_id="s1")
        result1 = make_step_result(step_id="s1", result={"analysis": "result"})
        check1 = await checker.check_step(step1, result1)
        assert check1.status == CheckStatus.PASS
    @pytest.mark.asyncio
    async def test_check_step_with_custom_validator(self):
        """自定义校验器"""
        def validator(result):
            if result and result.get("format") == "json":
                return (True, "")
            return (False, "Expected JSON format")
        checker = PlanChecker(custom_validator=validator)
        step = make_step()
        # 格式正确
        result_ok = make_step_result(result={"format": "json", "data": {}})
        check_ok = await checker.check_step(step, result_ok)
        assert check_ok.status == CheckStatus.PASS
        # 格式不正确
        result_bad = make_step_result(result={"format": "xml", "data": {}})
        check_bad = await checker.check_step(step, result_bad)
        assert check_bad.status == CheckStatus.FAIL
 # --- PlanChecker.should_retry / should_request_human Tests ---
 class TestPlanCheckerRetryAndHuman:
    """重试与人工介入判断"""
    def test_should_retry_on_fail_within_limit(self):
        """检查不通过且重试次数未耗尽时应重试"""
        checker = PlanChecker(max_check_retries=2)
        check = CheckResult(step_id="s0", status=CheckStatus.FAIL, reason="quality low")
        assert checker.should_retry(check, 0) is True
        assert checker.should_retry(check, 1) is True
    def test_should_not_retry_on_pass(self):
        """检查通过时不应重试"""
        checker = PlanChecker(max_check_retries=2)
        check = CheckResult(step_id="s0", status=CheckStatus.PASS)
        assert checker.should_retry(check, 0) is False
    def test_should_not_retry_on_skip(self):
        """跳过检查时不应重试"""
        checker = PlanChecker(max_check_retries=2)
        check = CheckResult(step_id="s0", status=CheckStatus.SKIP)
        assert checker.should_retry(check, 0) is False
    def test_should_not_retry_exhausted(self):
        """重试次数耗尽时不应重试"""
        checker = PlanChecker(max_check_retries=1)
        check = CheckResult(step_id="s0", status=CheckStatus.FAIL, reason="quality low")
        assert checker.should_retry(check, 1) is False
    def test_should_request_human_on_exhausted_retries(self):
        """重试耗尽后应请求人工介入"""
        checker = PlanChecker(max_check_retries=1)
        check = CheckResult(step_id="s0", status=CheckStatus.FAIL, reason="quality low")
        assert checker.should_request_human(check, 1) is True
    def test_should_not_request_human_on_pass(self):
        """检查通过时不应请求人工介入"""
        checker = PlanChecker(max_check_retries=1)
        check = CheckResult(step_id="s0", status=CheckStatus.PASS)
        assert checker.should_request_human(check, 0) is False
    def test_should_not_request_human_within_retries(self):
        """重试次数未耗尽时不应请求人工介入"""
        checker = PlanChecker(max_check_retries=2)
        check = CheckResult(step_id="s0", status=CheckStatus.FAIL, reason="quality low")
        assert checker.should_request_human(check, 0) is False
 # --- PlanChecker.review_plan Tests ---
 class TestPlanCheckerReviewPlan:
    """复盘报告生成"""
    @pytest.mark.asyncio
    async def test_all_steps_pass_review(self):
        """所有步骤通过检查 → 生成复盘报告"""
        checker = PlanChecker()
        step0 = make_step(step_id="s0", name="Search")
        step1 = make_step(step_id="s1", name="Analyze")
        plan = make_plan(steps=[step0, step1])
        plan_result = make_plan_result(
            step_results={
                "s0": make_step_result(step_id="s0", result={"data": "A"}),
                "s1": make_step_result(step_id="s1", result={"data": "B"}),
            },
        )
        # 先检查每步
        await checker.check_step(step0, plan_result.step_results["s0"])
        await checker.check_step(step1, plan_result.step_results["s1"])
        # 复盘
        report = await checker.review_plan(plan, plan_result)
        assert report.outcome == "success"
        assert "s0" in report.success_path
        assert "s1" in report.success_path
        assert len(report.failure_reasons) == 0
        assert report.success_rate == 1.0
    @pytest.mark.asyncio
    async def test_partial_failure_review(self):
        """部分步骤失败 → 复盘报告包含失败原因"""
        checker = PlanChecker()
        step0 = make_step(step_id="s0", name="Search")
        step1 = make_step(step_id="s1", name="Analyze")
        plan = make_plan(steps=[step0, step1])
        plan_result = make_plan_result(
            step_results={
                "s0": make_step_result(step_id="s0", result={"data": "A"}),
                "s1": make_step_result(
                    step_id="s1",
                    status=PlanStepStatus.FAILED,
                    error="Agent crashed",
                ),
            },
        )
        await checker.check_step(step0, plan_result.step_results["s0"])
        await checker.check_step(step1, plan_result.step_results["s1"])
        report = await checker.review_plan(plan, plan_result)
        assert report.outcome == "partial"
        assert "s0" in report.success_path
        assert len(report.failure_reasons) > 0
        assert any("s1" in r for r in report.failure_reasons)
        assert report.success_rate == 0.5
    @pytest.mark.asyncio
    async def test_all_failure_review(self):
        """全部步骤失败 → 复盘报告 outcome 为 failure"""
        checker = PlanChecker()
        step0 = make_step(step_id="s0", name="Search")
        plan = make_plan(steps=[step0])
        plan_result = make_plan_result(
            step_results={
                "s0": make_step_result(
                    step_id="s0",
                    status=PlanStepStatus.FAILED,
                    error="Agent unavailable",
                ),
            },
        )
        await checker.check_step(step0, plan_result.step_results["s0"])
        report = await checker.review_plan(plan, plan_result)
        assert report.outcome == "failure"
        assert len(report.failure_reasons) > 0
    @pytest.mark.asyncio
    async def test_review_report_contains_duration_distribution(self):
        """复盘报告包含耗时分布"""
        checker = PlanChecker()
        step0 = make_step(step_id="s0")
        step1 = make_step(step_id="s1")
        plan = make_plan(steps=[step0, step1])
        plan_result = make_plan_result(
            step_results={
                "s0": make_step_result(step_id="s0", result={"data": "A"}, duration_ms=100.0),
                "s1": make_step_result(step_id="s1", result={"data": "B"}, duration_ms=200.0),
            },
        )
        await checker.check_step(step0, plan_result.step_results["s0"])
        await checker.check_step(step1, plan_result.step_results["s1"])
        report = await checker.review_plan(plan, plan_result)
        assert "s0" in report.duration_distribution
        assert "s1" in report.duration_distribution
        assert report.duration_distribution["s0"] == 100.0
        assert report.duration_distribution["s1"] == 200.0
    @pytest.mark.asyncio
    async def test_review_report_contains_quality_scores(self):
        """复盘报告包含质量评分"""
        checker = PlanChecker()
        step0 = make_step(step_id="s0")
        plan = make_plan(steps=[step0])
        plan_result = make_plan_result(
            step_results={
                "s0": make_step_result(step_id="s0", result={"data": "A"}),
            },
        )
        await checker.check_step(step0, plan_result.step_results["s0"])
        report = await checker.review_plan(plan, plan_result)
        assert "s0" in report.quality_scores
        assert report.quality_scores["s0"] > 0
    @pytest.mark.asyncio
    async def test_review_report_contains_optimization_tips(self):
        """复盘报告包含优化建议"""
        checker = PlanChecker()
        step0 = make_step(step_id="s0")
        plan = make_plan(steps=[step0])
        plan_result = make_plan_result(
            step_results={
                "s0": make_step_result(
                    step_id="s0",
                    result={"data": "A"},
                    retry_count=2,
                    duration_ms=120000,
                ),
            },
        )
        await checker.check_step(step0, plan_result.step_results["s0"])
        report = await checker.review_plan(plan, plan_result)
        assert len(report.optimization_tips) > 0
    @pytest.mark.asyncio
    async def test_review_report_to_dict(self):
        """复盘报告可序列化为字典"""
        checker = PlanChecker()
        step0 = make_step(step_id="s0")
        plan = make_plan(steps=[step0])
        plan_result = make_plan_result(
            step_results={
                "s0": make_step_result(step_id="s0", result={"data": "A"}),
            },
        )
        await checker.check_step(step0, plan_result.step_results["s0"])
        report = await checker.review_plan(plan, plan_result)
        d = report.to_dict()
        assert d["plan_id"] == "p1"
        assert d["outcome"] == "success"
        assert isinstance(d["success_path"], list)
        assert isinstance(d["failure_reasons"], list)
        assert isinstance(d["optimization_tips"], list)
 # --- PlanChecker + ExperienceStore Integration Tests ---
 class TestPlanCheckerExperienceStore:
    """复盘结果写入经验库"""
    @pytest.mark.asyncio
    async def test_experience_written_on_review(self):
        """复盘结果写入 ExperienceStore"""
        store = InMemoryExperienceStore()
        checker = PlanChecker(experience_store=store)
        step0 = make_step(step_id="s0")
        plan = make_plan(steps=[step0])
        plan_result = make_plan_result(
            step_results={
                "s0": make_step_result(step_id="s0", result={"data": "A"}),
            },
        )
        await checker.check_step(step0, plan_result.step_results["s0"])
        report = await checker.review_plan(
            plan, plan_result, task_type="test_task", goal="test goal"
        )
        # 验证经验已写入
        results = await store.search("test_task", top_k=10)
        assert len(results) == 1
        assert results[0].outcome == "success"
        assert results[0].task_type == "test_task"
        assert results[0].goal == "test goal"
    @pytest.mark.asyncio
    async def test_failure_experience_written(self):
        """失败经验写入后可检索到"""
        store = InMemoryExperienceStore()
        checker = PlanChecker(experience_store=store)
        step0 = make_step(step_id="s0")
        plan = make_plan(steps=[step0])
        plan_result = make_plan_result(
            step_results={
                "s0": make_step_result(
                    step_id="s0",
                    status=PlanStepStatus.FAILED,
                    error="Agent crashed",
                ),
            },
        )
        await checker.check_step(step0, plan_result.step_results["s0"])
        report = await checker.review_plan(
            plan, plan_result, task_type="risky_task", goal="risky goal"
        )
        # 验证失败经验已写入
        results = await store.search("risky_task", top_k=10)
        assert len(results) == 1
        assert results[0].outcome == "failure"
        assert len(results[0].failure_reasons) > 0
    @pytest.mark.asyncio
    async def test_experience_searchable_by_failure_reason(self):
        """AE3: 错误经验写入后，后续任务能检索到避坑预警"""
        store = InMemoryExperienceStore()
        # 第一次：记录失败经验
        checker = PlanChecker(experience_store=store)
        step0 = make_step(step_id="s0")
        plan = make_plan(steps=[step0])
        plan_result = make_plan_result(
            step_results={
                "s0": make_step_result(
                    step_id="s0",
                    status=PlanStepStatus.FAILED,
                    error="Database connection timeout",
                ),
            },
        )
        await checker.check_step(step0, plan_result.step_results["s0"])
        await checker.review_plan(
            plan, plan_result, task_type="db_query", goal="query database"
        )
        # 第二次：搜索相关经验
        results = await store.search("database timeout", top_k=5, task_type="db_query")
        assert len(results) >= 1
        assert results[0].outcome == "failure"
        assert any("timeout" in r.lower() for r in results[0].failure_reasons)
    @pytest.mark.asyncio
    async def test_no_experience_store_still_works(self):
        """无 ExperienceStore 时复盘仍正常工作"""
        checker = PlanChecker()  # 无 experience_store
        step0 = make_step(step_id="s0")
        plan = make_plan(steps=[step0])
        plan_result = make_plan_result(
            step_results={
                "s0": make_step_result(step_id="s0", result={"data": "A"}),
            },
        )
        await checker.check_step(step0, plan_result.step_results["s0"])
        report = await checker.review_plan(plan, plan_result)
        assert report.outcome == "success"
        assert report.plan_id == "p1"
    @pytest.mark.asyncio
    async def test_experience_store_error_does_not_crash(self):
        """ExperienceStore 写入异常不影响复盘"""
        class FailingStore:
            async def record_experience(self, experience):
                raise RuntimeError("Store is down")
        checker = PlanChecker(experience_store=FailingStore())
        step0 = make_step(step_id="s0")
        plan = make_plan(steps=[step0])
        plan_result = make_plan_result(
            step_results={
                "s0": make_step_result(step_id="s0", result={"data": "A"}),
            },
        )
        await checker.check_step(step0, plan_result.step_results["s0"])
        # 不应抛出异常
        report = await checker.review_plan(plan, plan_result)
        assert report.outcome == "success"
 # --- PlanChecker + PlanExecutor Integration Pattern Tests ---
 class TestPlanCheckerExecutorIntegration:
    """PlanChecker 与 PlanExecutor 集成模式"""
    @pytest.mark.asyncio
    async def test_make_step_complete_callback(self):
        """make_step_complete_callback 创建的回调正确记录检查结果"""
        checker = PlanChecker()
        callback = checker.make_step_complete_callback()
        step = make_step(step_id="s0")
        result = make_step_result(step_id="s0", result={"data": "test"})
        await callback(step, result)
        assert "s0" in checker._check_results
        assert checker._check_results["s0"].status == CheckStatus.PASS
    @pytest.mark.asyncio
    async def test_full_check_review_cycle(self):
        """完整的 检查→复盘→经验写入 闭环"""
        store = InMemoryExperienceStore()
        checker = PlanChecker(experience_store=store)
        # 模拟 3 步计划
        step0 = make_step(step_id="s0", name="Search")
        step1 = make_step(step_id="s1", name="Analyze")
        step2 = make_step(step_id="s2", name="Report")
        plan = make_plan(steps=[step0, step1, step2])
        plan_result = make_plan_result(
            step_results={
                "s0": make_step_result(step_id="s0", result={"search": "data"}, duration_ms=500),
                "s1": make_step_result(step_id="s1", result={"analysis": "result"}, duration_ms=1500),
                "s2": make_step_result(step_id="s2", result={"report": "done"}, duration_ms=800),
            },
        )
        # 逐步检查
        await checker.check_step(step0, plan_result.step_results["s0"])
        await checker.check_step(step1, plan_result.step_results["s1"])
        await checker.check_step(step2, plan_result.step_results["s2"])
        # 复盘
        report = await checker.review_plan(
            plan, plan_result, task_type="analysis", goal="analyze data"
        )
        # 验证复盘报告
        assert report.outcome == "success"
        assert len(report.success_path) == 3
        assert report.success_rate == 1.0
        assert len(report.duration_distribution) == 3
        # 验证经验已写入
        results = await store.search("analysis", top_k=10)
        assert len(results) == 1
        assert results[0].success_rate == 1.0
 # --- PlanChecker Reset Tests ---
 class TestPlanCheckerReset:
    """重置内部状态"""
    @pytest.mark.asyncio
    async def test_reset_clears_check_results(self):
        """reset 清除检查结果"""
        checker = PlanChecker()
        step = make_step(step_id="s0")
        result = make_step_result(result={"data": "test"})
        await checker.check_step(step, result)
        assert len(checker._check_results) > 0
        checker.reset()
        assert len(checker._check_results) == 0
    @pytest.mark.asyncio
    async def test_reset_allows_new_check_cycle(self):
        """重置后可开始新一轮检查"""
        checker = PlanChecker()
        step = make_step(step_id="s0")
        # 第一轮
        result1 = make_step_result(result={"data": "test1"})
        await checker.check_step(step, result1)
        checker.reset()
        # 第二轮
        result2 = make_step_result(result={"data": "test2"})
        check = await checker.check_step(step, result2)
        assert check.status == CheckStatus.PASS
 # --- PlanChecker without LLM Tests ---
 class TestPlanCheckerWithoutLLM:
    """PlanChecker 无 LLM 回退到规则检查"""
    @pytest.mark.asyncio
    async def test_works_without_llm(self):
        """无 LLM 时使用 RuleBasedStepReflector"""
        checker = PlanChecker()  # 默认使用 RuleBasedStepReflector
        step = make_step()
        result = make_step_result(result={"data": "test"})
        check = await checker.check_step(step, result)
        assert check.status == CheckStatus.PASS
        assert check.quality_score > 0
    @pytest.mark.asyncio
    async def test_custom_reflector(self):
        """自定义反思器"""
        class CustomReflector:
            async def reflect_step(self, step, exec_result):
                return (0.9, ["Custom suggestion"])
        checker = PlanChecker(reflector=CustomReflector())
        step = make_step()
        result = make_step_result(result={"data": "test"})
        check = await checker.check_step(step, result)
        assert check.status == CheckStatus.PASS
        assert "Custom suggestion" in check.details.get("reflector_suggestions", [])
 # --- Edge Cases ---
 class TestPlanCheckerEdgeCases:
    """边界情况"""
    @pytest.mark.asyncio
    async def test_empty_plan_review(self):
        """空计划复盘"""
        checker = PlanChecker()
        plan = make_plan(steps=[])
        plan_result = make_plan_result(step_results={})
        report = await checker.review_plan(plan, plan_result)
        assert report.outcome == "success"
        assert report.success_rate == 0.0
    @pytest.mark.asyncio
    async def test_all_skipped_steps_review(self):
        """全部跳过步骤的复盘"""
        checker = PlanChecker()
        step0 = make_step(step_id="s0")
        step1 = make_step(step_id="s1")
        plan = make_plan(steps=[step0, step1])
        plan_result = make_plan_result(
            step_results={
                "s0": make_step_result(
                    step_id="s0",
                    status=PlanStepStatus.SKIPPED,
                    error="Dependency failed",
                ),
                "s1": make_step_result(
                    step_id="s1",
                    status=PlanStepStatus.SKIPPED,
                    error="Dependency failed",
                ),
            },
        )
        report = await checker.review_plan(plan, plan_result)
        assert report.outcome == "failure"
        assert len(report.failure_reasons) > 0
    @pytest.mark.asyncio
    async def test_quality_threshold_triggers_fail(self):
        """质量评分低于阈值触发不通过"""
        class LowScoreReflector:
            async def reflect_step(self, step, exec_result):
                return (0.2, ["Low quality output"])
        checker = PlanChecker(
            reflector=LowScoreReflector(),
            quality_threshold=0.5,
        )
        step = make_step()
        result = make_step_result(result={"data": "test"})
        check = await checker.check_step(step, result)
        # 综合评分 = 0.4 * 1.0 (gate) + 0.6 * 0.2 (reflector) = 0.52
        # 如果 reflector 评分很低，可能低于阈值
        assert check.quality_score < 1.0
    @pytest.mark.asyncio
    async def test_reflector_exception_handled(self):
        """Reflector 异常不影响检查"""
        class CrashingReflector:
            async def reflect_step(self, step, exec_result):
                raise RuntimeError("Reflector crashed")
        checker = PlanChecker(reflector=CrashingReflector())
        step = make_step()
        result = make_step_result(result={"data": "test"})
        check = await checker.check_step(step, result)
        # 应该回退到 gate 的评分
        assert check.status in (CheckStatus.PASS, CheckStatus.FAIL)
    @pytest.mark.asyncio
    async def test_multiple_quality_failures_in_review(self):
        """多个步骤质量检查不通过，复盘报告汇总所有原因"""
        config = QualityGateConfig(required_fields=["report"])
        checker = PlanChecker(quality_gate_config=config)
        step0 = make_step(step_id="s0")
        step1 = make_step(step_id="s1")
        plan = make_plan(steps=[step0, step1])
        plan_result = make_plan_result(
            step_results={
                "s0": make_step_result(step_id="s0", result={"data": "no report"}),
                "s1": make_step_result(step_id="s1", result={"data": "also no report"}),
            },
        )
        await checker.check_step(step0, plan_result.step_results["s0"])
        await checker.check_step(step1, plan_result.step_results["s1"])
        report = await checker.review_plan(plan, plan_result)
        # 质量检查不通过的原因应出现在 failure_reasons 中
        quality_fail_reasons = [
            r for r in report.failure_reasons if "quality check failed" in r
        ]
        assert len(quality_fail_reasons) == 2
--- a/tests/unit/core/test_plan_executor.py
+++ b/tests/unit/core/test_plan_executor.py
@ -0,0 +1,892 @@
 """Tests for PlanExecutor — 执行计划执行器"""
 from __future__ import annotations
 import asyncio
 import pytest
 from datetime import datetime, timezone
 from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch
 from agentkit.core.plan_executor import (
    FailureAction,
    PlanExecutor,
    PlanExecutionResult,
    StepExecutionResult,
 )
 from agentkit.core.plan_schema import (
    ExecutionPlan,
    PlanStep,
    PlanStepStatus,
    SkillGap,
    SkillGapLevel,
 )
 from agentkit.core.protocol import TaskMessage, TaskResult, TaskStatus
 # --- Helpers ---
 def make_task(task_id: str = "t1", input_data: dict | None = None) -> TaskMessage:
    return TaskMessage(
        task_id=task_id,
        agent_name="test_agent",
        task_type="test",
        priority=1,
        input_data=input_data or {"query": "test"},
        callback_url=None,
        created_at=datetime.now(timezone.utc),
    )
 def make_plan(
    steps: list[PlanStep] | None = None,
    parallel_groups: list[list[str]] | None = None,
    goal: str = "test goal",
 ) -> ExecutionPlan:
    if steps is None:
        steps = [PlanStep(step_id="s0", name="Step 0", description="First step")]
    if parallel_groups is None:
        parallel_groups = [[s.step_id for s in steps]]
    return ExecutionPlan(
        plan_id="p1",
        goal=goal,
        steps=steps,
        parallel_groups=parallel_groups,
        confirmed=True,
    )
 class MockAgent:
    """Mock Agent for testing"""
    def __init__(
        self,
        name: str = "mock_agent",
        output_data: dict | None = None,
        should_fail: bool = False,
        fail_count: int = 0,
    ):
        self.name = name
        self.agent_type = "mock"
        self._output_data = output_data or {"result": f"output from {name}"}
        self._should_fail = should_fail
        self._fail_count = fail_count
        self._call_count = 0
    async def execute(self, task: TaskMessage) -> TaskResult:
        self._call_count += 1
        if self._should_fail:
            raise RuntimeError(f"Agent {self.name} failed")
        if self._fail_count > 0 and self._call_count <= self._fail_count:
            raise RuntimeError(f"Agent {self.name} failed (attempt {self._call_count})")
        now = datetime.now(timezone.utc)
        return TaskResult(
            task_id=task.task_id,
            agent_name=self.name,
            status=TaskStatus.COMPLETED,
            output_data=self._output_data,
            error_message=None,
            started_at=now,
            completed_at=now,
        )
 class MockAgentPool:
    """Mock AgentPool for testing"""
    def __init__(
        self,
        agents: dict[str, MockAgent] | None = None,
        skill_agent_map: dict[str, MockAgent] | None = None,
        available_skills: set[str] | None = None,
    ):
        self._agents = agents or {}
        self._skill_agent_map = skill_agent_map or {}
        self._available_skills = available_skills or set(skill_agent_map.keys()) if skill_agent_map else set()
        self._created_skills: list[str] = []
    def get_agent(self, name: str) -> MockAgent | None:
        return self._agents.get(name)
    def list_agents(self) -> list[dict]:
        return [
            {"name": a.name, "agent_type": a.agent_type, "description": f"Mock agent {a.name}"}
            for a in self._agents.values()
        ]
    async def create_agent_from_skill(self, skill_name: str) -> MockAgent:
        self._created_skills.append(skill_name)
        if skill_name not in self._available_skills:
            raise RuntimeError(f"Skill '{skill_name}' not found in registry")
        if skill_name in self._skill_agent_map:
            agent = self._skill_agent_map[skill_name]
            self._agents[skill_name] = agent
            return agent
        # 不应到达这里，因为 available_skills 应包含所有可用的 skill
        raise RuntimeError(f"Skill '{skill_name}' not found in registry")
 # --- Test Scenarios ---
 class TestPlanExecutorAllSuccess:
    """3 个并行步骤全部成功 → 结果正确汇总"""
    @pytest.mark.asyncio
    async def test_three_parallel_steps_all_succeed(self):
        agent_a = MockAgent("web_search", {"data": "A"})
        agent_b = MockAgent("seo_analyzer", {"data": "B"})
        agent_c = MockAgent("report_generator", {"data": "C"})
        pool = MockAgentPool(
            skill_agent_map={
                "web_search": agent_a,
                "seo_analyzer": agent_b,
                "report_generator": agent_c,
            },
        )
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", parallel_group=0, required_skills=["web_search"]),
                PlanStep(step_id="s1", name="Analyze", description="Analyze", parallel_group=0, required_skills=["seo_analyzer"]),
                PlanStep(step_id="s2", name="Report", description="Report", parallel_group=0, required_skills=["report_generator"]),
            ],
            parallel_groups=[["s0", "s1", "s2"]],
        )
        executor = PlanExecutor(agent_pool=pool)
        result = await executor.execute(plan, make_task())
        assert result.status == TaskStatus.COMPLETED
        assert len(result.completed_steps) == 3
        assert len(result.failed_steps) == 0
        assert result.step_results["s0"].status == PlanStepStatus.COMPLETED
        assert result.step_results["s1"].status == PlanStepStatus.COMPLETED
        assert result.step_results["s2"].status == PlanStepStatus.COMPLETED
    @pytest.mark.asyncio
    async def test_parallel_groups_executed_sequentially(self):
        """并行组之间串行执行，组内并行"""
        agent_a = MockAgent("search", {"step": 1})
        agent_b = MockAgent("report", {"step": 2})
        pool = MockAgentPool(
            skill_agent_map={
                "web_search": agent_a,
                "report_generator": agent_b,
            },
        )
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", parallel_group=0, required_skills=["web_search"]),
                PlanStep(step_id="s1", name="Report", description="Report", dependencies=["s0"], parallel_group=1, required_skills=["report_generator"]),
            ],
            parallel_groups=[["s0"], ["s1"]],
        )
        executor = PlanExecutor(agent_pool=pool)
        result = await executor.execute(plan, make_task())
        assert result.status == TaskStatus.COMPLETED
        assert len(result.completed_steps) == 2
        # s1 应收到 s0 的依赖结果
        assert result.step_results["s1"].status == PlanStepStatus.COMPLETED
    @pytest.mark.asyncio
    async def test_dependency_results_injected(self):
        """依赖步骤的结果应注入到后续步骤的输入中"""
        agent_a = MockAgent("search", {"search_result": "data_A"})
        agent_b = MockAgent("report", {"report_result": "data_B"})
        pool = MockAgentPool(
            skill_agent_map={
                "web_search": agent_a,
                "report_generator": agent_b,
            },
        )
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", parallel_group=0, required_skills=["web_search"]),
                PlanStep(step_id="s1", name="Report", description="Report", dependencies=["s0"], parallel_group=1, required_skills=["report_generator"]),
            ],
            parallel_groups=[["s0"], ["s1"]],
        )
        executor = PlanExecutor(agent_pool=pool)
        result = await executor.execute(plan, make_task())
        assert result.status == TaskStatus.COMPLETED
        # 验证 s1 的输入中包含 dependency_results
        # 通过检查 agent_b 收到的 task 来验证
        assert result.step_results["s0"].result == {"search_result": "data_A"}
 class TestPlanExecutorPartialFailure:
    """并行步骤中 1 个失败 → 其他步骤继续，失败步骤进入检查"""
    @pytest.mark.asyncio
    async def test_one_parallel_step_fails_others_continue(self):
        agent_a = MockAgent("search", {"data": "A"})
        agent_b = MockAgent("failing", should_fail=True)
        agent_c = MockAgent("report", {"data": "C"})
        pool = MockAgentPool(
            skill_agent_map={
                "web_search": agent_a,
                "failing_skill": agent_b,
                "report_generator": agent_c,
            },
        )
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", parallel_group=0, required_skills=["web_search"]),
                PlanStep(step_id="s1", name="Fail", description="Fail", parallel_group=0, required_skills=["failing_skill"]),
                PlanStep(step_id="s2", name="Report", description="Report", parallel_group=0, required_skills=["report_generator"]),
            ],
            parallel_groups=[["s0", "s1", "s2"]],
        )
        executor = PlanExecutor(agent_pool=pool, max_retries=0)
        result = await executor.execute(plan, make_task())
        # s0 和 s2 应成功
        assert result.step_results["s0"].status == PlanStepStatus.COMPLETED
        assert result.step_results["s2"].status == PlanStepStatus.COMPLETED
        # s1 应失败（默认策略是 SKIP）
        assert result.step_results["s1"].status == PlanStepStatus.SKIPPED
        assert len(result.completed_steps) == 2
    @pytest.mark.asyncio
    async def test_failed_step_skips_dependents(self):
        """失败步骤的依赖步骤应被跳过"""
        agent_a = MockAgent("failing", should_fail=True)
        agent_b = MockAgent("report", {"data": "B"})
        pool = MockAgentPool(
            skill_agent_map={
                "web_search": agent_a,
                "report_generator": agent_b,
            },
        )
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", parallel_group=0, required_skills=["web_search"]),
                PlanStep(step_id="s1", name="Report", description="Report", dependencies=["s0"], parallel_group=1, required_skills=["report_generator"]),
            ],
            parallel_groups=[["s0"], ["s1"]],
        )
        executor = PlanExecutor(agent_pool=pool, max_retries=0)
        result = await executor.execute(plan, make_task())
        # s0 失败后被跳过
        assert result.step_results["s0"].status == PlanStepStatus.SKIPPED
        # s1 因依赖失败也被跳过
        assert result.step_results["s1"].status == PlanStepStatus.SKIPPED
        assert len(result.skipped_steps) == 2
 class TestPlanExecutorRetry:
    """步骤失败后自动重试成功"""
    @pytest.mark.asyncio
    async def test_retry_succeeds_after_initial_failure(self):
        """步骤首次失败后重试成功"""
        agent = MockAgent("search", {"data": "recovered"}, fail_count=1)
        pool = MockAgentPool(
            skill_agent_map={"web_search": agent},
        )
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", required_skills=["web_search"]),
            ],
            parallel_groups=[["s0"]],
        )
        executor = PlanExecutor(agent_pool=pool, max_retries=2)
        result = await executor.execute(plan, make_task())
        assert result.step_results["s0"].status == PlanStepStatus.COMPLETED
        assert result.step_results["s0"].retry_count == 1
        assert result.step_results["s0"].result == {"data": "recovered"}
    @pytest.mark.asyncio
    async def test_retry_exhausted_step_fails(self):
        """重试次数耗尽后步骤标记为失败"""
        agent = MockAgent("failing", should_fail=True)
        pool = MockAgentPool(
            skill_agent_map={"web_search": agent},
        )
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", required_skills=["web_search"]),
            ],
            parallel_groups=[["s0"]],
        )
        executor = PlanExecutor(agent_pool=pool, max_retries=2)
        result = await executor.execute(plan, make_task())
        assert result.step_results["s0"].status == PlanStepStatus.SKIPPED  # 默认策略是 SKIP
        assert result.step_results["s0"].retry_count == 2
    @pytest.mark.asyncio
    async def test_zero_retries_no_retry(self):
        """max_retries=0 时不重试"""
        agent = MockAgent("failing", should_fail=True)
        pool = MockAgentPool(
            skill_agent_map={"web_search": agent},
        )
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", required_skills=["web_search"]),
            ],
            parallel_groups=[["s0"]],
        )
        executor = PlanExecutor(agent_pool=pool, max_retries=0)
        result = await executor.execute(plan, make_task())
        assert result.step_results["s0"].retry_count == 0
 class TestPlanExecutorPlanAdjustment:
    """步骤失败后调整计划（跳过/替换）继续执行"""
    @pytest.mark.asyncio
    async def test_skip_action_skips_step(self):
        """on_step_failed 返回 SKIP 时跳过步骤"""
        agent = MockAgent("failing", should_fail=True)
        pool = MockAgentPool(
            skill_agent_map={"web_search": agent},
        )
        async def on_failed(step, result):
            return FailureAction.SKIP
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", required_skills=["web_search"]),
            ],
            parallel_groups=[["s0"]],
        )
        executor = PlanExecutor(agent_pool=pool, max_retries=0, on_step_failed=on_failed)
        result = await executor.execute(plan, make_task())
        assert result.step_results["s0"].status == PlanStepStatus.SKIPPED
        assert result.adjusted is True
    @pytest.mark.asyncio
    async def test_replace_action_skips_original(self):
        """on_step_failed 返回 REPLACE 时标记原步骤为 SKIPPED"""
        agent = MockAgent("failing", should_fail=True)
        pool = MockAgentPool(
            skill_agent_map={"web_search": agent},
        )
        async def on_failed(step, result):
            return FailureAction.REPLACE
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", required_skills=["web_search"]),
            ],
            parallel_groups=[["s0"]],
        )
        executor = PlanExecutor(agent_pool=pool, max_retries=0, on_step_failed=on_failed)
        result = await executor.execute(plan, make_task())
        assert result.step_results["s0"].status == PlanStepStatus.SKIPPED
        assert result.adjusted is True
    @pytest.mark.asyncio
    async def test_abort_action_skips_all_remaining(self):
        """on_step_failed 返回 ABORT 时跳过所有剩余步骤"""
        agent_a = MockAgent("failing", should_fail=True)
        agent_b = MockAgent("report", {"data": "B"})
        pool = MockAgentPool(
            skill_agent_map={
                "web_search": agent_a,
                "report_generator": agent_b,
            },
        )
        async def on_failed(step, result):
            return FailureAction.ABORT
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", required_skills=["web_search"]),
                PlanStep(step_id="s1", name="Report", description="Report", required_skills=["report_generator"]),
            ],
            parallel_groups=[["s0"], ["s1"]],
        )
        executor = PlanExecutor(agent_pool=pool, max_retries=0, on_step_failed=on_failed)
        result = await executor.execute(plan, make_task())
        assert result.step_results["s0"].status == PlanStepStatus.SKIPPED
        assert result.step_results["s1"].status == PlanStepStatus.SKIPPED
        assert result.adjusted is True
 class TestPlanExecutorHumanIntervention:
    """执行中请求人工介入后继续"""
    @pytest.mark.asyncio
    async def test_human_intervention_then_skip(self):
        """人工介入后选择跳过"""
        agent = MockAgent("failing", should_fail=True)
        pool = MockAgentPool(
            skill_agent_map={"web_search": agent},
        )
        async def on_failed(step, result):
            return FailureAction.REQUEST_HUMAN
        async def on_human(step, result):
            return FailureAction.SKIP
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", required_skills=["web_search"]),
            ],
            parallel_groups=[["s0"]],
        )
        executor = PlanExecutor(
            agent_pool=pool,
            max_retries=0,
            on_step_failed=on_failed,
            on_human_intervention=on_human,
        )
        result = await executor.execute(plan, make_task())
        assert result.step_results["s0"].status == PlanStepStatus.SKIPPED
        assert result.human_intervention_requested is True
        assert result.adjusted is True
    @pytest.mark.asyncio
    async def test_human_intervention_without_callback(self):
        """请求人工介入但无回调时标记 human_intervention"""
        agent = MockAgent("failing", should_fail=True)
        pool = MockAgentPool(
            skill_agent_map={"web_search": agent},
        )
        async def on_failed(step, result):
            return FailureAction.REQUEST_HUMAN
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", required_skills=["web_search"]),
            ],
            parallel_groups=[["s0"]],
        )
        executor = PlanExecutor(
            agent_pool=pool,
            max_retries=0,
            on_step_failed=on_failed,
        )
        result = await executor.execute(plan, make_task())
        assert result.human_intervention_requested is True
 class TestPlanExecutorStepTimeout:
    """步骤超时处理"""
    @pytest.mark.asyncio
    async def test_step_timeout_fails_step(self):
        """步骤超时后标记为失败"""
        class SlowAgent:
            name = "slow_agent"
            agent_type = "mock"
            async def execute(self, task):
                await asyncio.sleep(10)  # 模拟超时
        pool = MockAgentPool(
            skill_agent_map={"web_search": SlowAgent()},
        )
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", required_skills=["web_search"]),
            ],
            parallel_groups=[["s0"]],
        )
        executor = PlanExecutor(agent_pool=pool, max_retries=0, step_timeout=0.1)
        result = await executor.execute(plan, make_task())
        assert result.step_results["s0"].status == PlanStepStatus.SKIPPED
        assert "timed out" in (result.step_results["s0"].error or "")
 class TestPlanExecutorNoAgentAvailable:
    """Agent 不可用时的处理"""
    @pytest.mark.asyncio
    async def test_no_agent_for_step(self):
        """步骤所需 Agent 不可用时步骤失败"""
        pool = MockAgentPool(skill_agent_map={})
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", required_skills=["nonexistent_skill"]),
            ],
            parallel_groups=[["s0"]],
        )
        executor = PlanExecutor(agent_pool=pool, max_retries=0)
        result = await executor.execute(plan, make_task())
        assert result.step_results["s0"].status == PlanStepStatus.SKIPPED
        assert "No agent available" in (result.step_results["s0"].error or "")
 class TestPlanExecutorStepComplete:
    """步骤完成回调"""
    @pytest.mark.asyncio
    async def test_on_step_complete_callback_called(self):
        """步骤完成后调用回调"""
        agent = MockAgent("search", {"data": "A"})
        pool = MockAgentPool(skill_agent_map={"web_search": agent})
        completed_steps: list[tuple[str, PlanStepStatus]] = []
        async def on_complete(step, result):
            completed_steps.append((step.step_id, result.status))
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", required_skills=["web_search"]),
            ],
            parallel_groups=[["s0"]],
        )
        executor = PlanExecutor(agent_pool=pool, on_step_complete=on_complete)
        result = await executor.execute(plan, make_task())
        assert len(completed_steps) == 1
        assert completed_steps[0] == ("s0", PlanStepStatus.COMPLETED)
 class TestPlanExecutionResult:
    """PlanExecutionResult 属性测试"""
    def test_completed_steps(self):
        result = PlanExecutionResult(
            plan_id="p1",
            step_results={
                "s0": StepExecutionResult(step_id="s0", status=PlanStepStatus.COMPLETED, result={"a": 1}),
                "s1": StepExecutionResult(step_id="s1", status=PlanStepStatus.FAILED, error="err"),
                "s2": StepExecutionResult(step_id="s2", status=PlanStepStatus.SKIPPED, error="skip"),
            },
            status=TaskStatus.COMPLETED,
            total_duration_ms=100.0,
        )
        assert result.completed_steps == ["s0"]
        assert result.failed_steps == ["s1"]
        assert result.skipped_steps == ["s2"]
    def test_empty_results(self):
        result = PlanExecutionResult(
            plan_id="p1",
            step_results={},
            status=TaskStatus.COMPLETED,
            total_duration_ms=0.0,
        )
        assert result.completed_steps == []
        assert result.failed_steps == []
        assert result.skipped_steps == []
 class TestPlanExecutorOverallStatus:
    """整体状态判定"""
    @pytest.mark.asyncio
    async def test_all_completed(self):
        agent = MockAgent("search", {"data": "A"})
        pool = MockAgentPool(skill_agent_map={"web_search": agent})
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", required_skills=["web_search"]),
            ],
            parallel_groups=[["s0"]],
        )
        executor = PlanExecutor(agent_pool=pool)
        result = await executor.execute(plan, make_task())
        assert result.status == TaskStatus.COMPLETED
    @pytest.mark.asyncio
    async def test_all_skipped(self):
        agent = MockAgent("failing", should_fail=True)
        pool = MockAgentPool(skill_agent_map={"web_search": agent})
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", required_skills=["web_search"]),
            ],
            parallel_groups=[["s0"]],
        )
        executor = PlanExecutor(agent_pool=pool, max_retries=0)
        result = await executor.execute(plan, make_task())
        # 默认策略 SKIP → 全部跳过 → COMPLETED
        assert result.status == TaskStatus.COMPLETED
    @pytest.mark.asyncio
    async def test_empty_plan(self):
        pool = MockAgentPool()
        plan = make_plan(steps=[], parallel_groups=[])
        executor = PlanExecutor(agent_pool=pool)
        result = await executor.execute(plan, make_task())
        assert result.status == TaskStatus.COMPLETED
        assert len(result.step_results) == 0
 class TestPlanExecutorStepStateMachine:
    """步骤状态机：PENDING → RUNNING → COMPLETED/FAILED"""
    @pytest.mark.asyncio
    async def test_step_transitions_to_running_then_completed(self):
        """步骤状态正确转换"""
        agent = MockAgent("search", {"data": "A"})
        pool = MockAgentPool(skill_agent_map={"web_search": agent})
        step = PlanStep(step_id="s0", name="Search", description="Search", required_skills=["web_search"])
        plan = make_plan(steps=[step], parallel_groups=[["s0"]])
        executor = PlanExecutor(agent_pool=pool)
        result = await executor.execute(plan, make_task())
        # 执行完成后步骤状态应为 COMPLETED
        assert step.status == PlanStepStatus.COMPLETED
    @pytest.mark.asyncio
    async def test_step_transitions_to_failed_on_error(self):
        agent = MockAgent("failing", should_fail=True)
        pool = MockAgentPool(skill_agent_map={"web_search": agent})
        step = PlanStep(step_id="s0", name="Search", description="Search", required_skills=["web_search"])
        plan = make_plan(steps=[step], parallel_groups=[["s0"]])
        executor = PlanExecutor(agent_pool=pool, max_retries=0)
        result = await executor.execute(plan, make_task())
        # 默认策略 SKIP → 步骤最终状态为 SKIPPED
        assert step.status == PlanStepStatus.SKIPPED
 class TestPlanExecutorDependencyInjection:
    """依赖结果注入"""
    @pytest.mark.asyncio
    async def test_dependency_results_format(self):
        """依赖结果格式正确"""
        agent_a = MockAgent("search", {"search_data": "result_A"})
        agent_b = MockAgent("report", {"report_data": "result_B"})
        received_inputs: list[dict] = []
        class CapturingAgent:
            name = "report_agent"
            agent_type = "mock"
            async def execute(self, task: TaskMessage) -> TaskResult:
                received_inputs.append(dict(task.input_data))
                now = datetime.now(timezone.utc)
                return TaskResult(
                    task_id=task.task_id,
                    agent_name=self.name,
                    status=TaskStatus.COMPLETED,
                    output_data={"report": "done"},
                    error_message=None,
                    started_at=now,
                    completed_at=now,
                )
        pool = MockAgentPool(
            skill_agent_map={
                "web_search": agent_a,
                "report_generator": CapturingAgent(),
            },
        )
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", parallel_group=0, required_skills=["web_search"]),
                PlanStep(step_id="s1", name="Report", description="Report", dependencies=["s0"], parallel_group=1, required_skills=["report_generator"]),
            ],
            parallel_groups=[["s0"], ["s1"]],
        )
        executor = PlanExecutor(agent_pool=pool)
        result = await executor.execute(plan, make_task())
        # s1 的输入应包含 dependency_results
        assert len(received_inputs) == 1
        assert "dependency_results" in received_inputs[0]
        assert "s0" in received_inputs[0]["dependency_results"]
        assert received_inputs[0]["dependency_results"]["s0"]["status"] == "completed"
    @pytest.mark.asyncio
    async def test_step_metadata_in_input(self):
        """步骤元信息应注入到输入中"""
        agent = MockAgent("search", {"data": "A"})
        pool = MockAgentPool(skill_agent_map={"web_search": agent})
        received_inputs: list[dict] = []
        class CapturingAgent:
            name = "search_agent"
            agent_type = "mock"
            async def execute(self, task: TaskMessage) -> TaskResult:
                received_inputs.append(dict(task.input_data))
                now = datetime.now(timezone.utc)
                return TaskResult(
                    task_id=task.task_id,
                    agent_name=self.name,
                    status=TaskStatus.COMPLETED,
                    output_data={"result": "done"},
                    error_message=None,
                    started_at=now,
                    completed_at=now,
                )
        pool2 = MockAgentPool(
            skill_agent_map={"web_search": CapturingAgent()},
        )
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search Step", description="Do the search", required_skills=["web_search"]),
            ],
            parallel_groups=[["s0"]],
        )
        executor = PlanExecutor(agent_pool=pool2)
        result = await executor.execute(plan, make_task())
        assert len(received_inputs) == 1
        assert received_inputs[0]["step_name"] == "Search Step"
        assert received_inputs[0]["step_description"] == "Do the search"
 class TestPlanExecutorExistingAgent:
    """通过 get_agent 获取已有 Agent"""
    @pytest.mark.asyncio
    async def test_fallback_to_existing_agent(self):
        """Skill 创建失败时回退到池中已有 Agent"""
        agent = MockAgent("existing_agent", {"data": "existing"})
        pool = MockAgentPool(
            agents={"s0": agent},  # 按步骤 ID 注册
            skill_agent_map={},  # 无 Skill 映射
            available_skills=set(),  # 无可用 Skill
        )
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", required_skills=["web_search"]),
            ],
            parallel_groups=[["s0"]],
        )
        executor = PlanExecutor(agent_pool=pool, max_retries=0)
        result = await executor.execute(plan, make_task())
        assert result.step_results["s0"].status == PlanStepStatus.COMPLETED
        assert result.step_results["s0"].result == {"data": "existing"}
    @pytest.mark.asyncio
    async def test_fallback_to_skill_name_agent(self):
        """Skill 创建失败时尝试用 Skill 名称获取 Agent"""
        agent = MockAgent("web_search", {"data": "by_skill_name"})
        pool = MockAgentPool(
            agents={"web_search": agent},
            skill_agent_map={},  # create_agent_from_skill 会失败
            available_skills=set(),  # 无可用 Skill
        )
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", required_skills=["web_search"]),
            ],
            parallel_groups=[["s0"]],
        )
        executor = PlanExecutor(agent_pool=pool, max_retries=0)
        result = await executor.execute(plan, make_task())
        assert result.step_results["s0"].status == PlanStepStatus.COMPLETED
 class TestPlanExecutorCascadingSkip:
    """级联跳过：失败步骤的依赖链全部跳过"""
    @pytest.mark.asyncio
    async def test_cascading_skip(self):
        agent_a = MockAgent("failing", should_fail=True)
        agent_b = MockAgent("report", {"data": "B"})
        agent_c = MockAgent("final", {"data": "C"})
        pool = MockAgentPool(
            skill_agent_map={
                "web_search": agent_a,
                "report_generator": agent_b,
                "final_skill": agent_c,
            },
        )
        plan = make_plan(
            steps=[
                PlanStep(step_id="s0", name="Search", description="Search", parallel_group=0, required_skills=["web_search"]),
                PlanStep(step_id="s1", name="Report", description="Report", dependencies=["s0"], parallel_group=1, required_skills=["report_generator"]),
                PlanStep(step_id="s2", name="Final", description="Final", dependencies=["s1"], parallel_group=2, required_skills=["final_skill"]),
            ],
            parallel_groups=[["s0"], ["s1"], ["s2"]],
        )
        executor = PlanExecutor(agent_pool=pool, max_retries=0)
        result = await executor.execute(plan, make_task())
        # s0 失败 → s1 和 s2 都应被跳过
        assert result.step_results["s0"].status == PlanStepStatus.SKIPPED
        assert result.step_results["s1"].status == PlanStepStatus.SKIPPED
        assert result.step_results["s2"].status == PlanStepStatus.SKIPPED
        assert "failed dependency" in (result.step_results["s1"].error or "")
        assert "failed dependency" in (result.step_results["s2"].error or "")
--- a/tests/unit/evolution/test_experience_store.py
+++ b/tests/unit/evolution/test_experience_store.py
@ -0,0 +1,532 @@
 """Tests for ExperienceStore - 任务经验记录、检索和指标追踪"""
 from __future__ import annotations
 from datetime import datetime, timedelta, timezone
 import pytest
 from agentkit.evolution.experience_schema import EvolutionMetrics, TaskExperience
 from agentkit.evolution.experience_store import (
    InMemoryExperienceStore,
    _compute_cosine_similarity,
    _parse_time_window,
 )
 from agentkit.memory.embedder import MockEmbedder
 # ── Fixtures ──────────────────────────────────────────────
@pytest.fixture
 def mock_embedder():
    """MockEmbedder 实例，生成确定性伪向量"""
    return MockEmbedder(dimension=64)
@pytest.fixture
 def store(mock_embedder):
    """带 MockEmbedder 的 InMemoryExperienceStore"""
    return InMemoryExperienceStore(embedder=mock_embedder, decay_rate=0.01, alpha=0.7)
@pytest.fixture
 def store_no_embedder():
    """无 embedder 的 InMemoryExperienceStore"""
    return InMemoryExperienceStore(decay_rate=0.01, alpha=0.7)
 def _make_experience(
    task_type: str = "code_review",
    goal: str = "Review the PR",
    outcome: str = "success",
    duration_seconds: float = 10.0,
    success_rate: float = 1.0,
    failure_reasons: list[str] | None = None,
    optimization_tips: list[str] | None = None,
    created_at: datetime | None = None,
 ) -> TaskExperience:
    """创建测试用 TaskExperience"""
    return TaskExperience(
        experience_id="",
        task_type=task_type,
        goal=goal,
        steps_summary=f"Executed {task_type} task",
        outcome=outcome,
        duration_seconds=duration_seconds,
        success_rate=success_rate,
        failure_reasons=failure_reasons or [],
        optimization_tips=optimization_tips or [],
        created_at=created_at or datetime.now(timezone.utc),
    )
 # ── TaskExperience 数据模型测试 ────────────────────────────
 class TestTaskExperience:
    def test_to_dict(self):
        exp = TaskExperience(
            experience_id="exp-1",
            task_type="code_review",
            goal="Review PR",
            steps_summary="Checked code",
            outcome="success",
            duration_seconds=5.0,
            success_rate=1.0,
            failure_reasons=[],
            optimization_tips=["Use faster linter"],
        )
        d = exp.to_dict()
        assert d["experience_id"] == "exp-1"
        assert d["task_type"] == "code_review"
        assert d["outcome"] == "success"
        assert d["duration_seconds"] == 5.0
        assert "embedding" not in d  # embedding 不应出现在字典中
        assert d["optimization_tips"] == ["Use faster linter"]
    def test_text_for_embedding(self):
        exp = TaskExperience(
            task_type="code_review",
            goal="Review the PR",
            steps_summary="Checked code style",
            failure_reasons=["timeout"],
            optimization_tips=["Increase timeout"],
        )
        text = exp.text_for_embedding()
        assert "code_review" in text
        assert "Review the PR" in text
        assert "timeout" in text
        assert "Increase timeout" in text
    def test_text_for_embedding_minimal(self):
        exp = TaskExperience(task_type="test", goal="Run tests")
        text = exp.text_for_embedding()
        assert "test" in text
        assert "Run tests" in text
 # ── EvolutionMetrics 数据模型测试 ──────────────────────────
 class TestEvolutionMetrics:
    def test_to_dict(self):
        now = datetime.now(timezone.utc)
        m = EvolutionMetrics(
            task_type="code_review",
            time_window="24h",
            completion_rate=0.9,
            avg_duration=12.5,
            retry_rate=0.1,
            sample_count=100,
            window_start=now,
            window_end=now,
        )
        d = m.to_dict()
        assert d["task_type"] == "code_review"
        assert d["completion_rate"] == 0.9
        assert d["avg_duration"] == 12.5
        assert d["retry_rate"] == 0.1
        assert d["sample_count"] == 100
 # ── 辅助函数测试 ──────────────────────────────────────────
 class TestHelperFunctions:
    def test_cosine_similarity_identical(self):
        vec = [1.0, 0.0, 0.0]
        assert _compute_cosine_similarity(vec, vec) == pytest.approx(1.0)
    def test_cosine_similarity_orthogonal(self):
        a = [1.0, 0.0]
        b = [0.0, 1.0]
        assert _compute_cosine_similarity(a, b) == pytest.approx(0.0)
    def test_cosine_similarity_opposite(self):
        a = [1.0, 0.0]
        b = [-1.0, 0.0]
        assert _compute_cosine_similarity(a, b) == pytest.approx(-1.0)
    def test_cosine_similarity_empty(self):
        assert _compute_cosine_similarity([], []) == 0.0
    def test_cosine_similarity_mismatched_dims(self):
        assert _compute_cosine_similarity([1.0], [1.0, 2.0]) == 0.0
    def test_parse_time_window_hours(self):
        delta = _parse_time_window("24h")
        assert delta == timedelta(hours=24)
    def test_parse_time_window_days(self):
        delta = _parse_time_window("7d")
        assert delta == timedelta(days=7)
    def test_parse_time_window_unknown_unit(self):
        delta = _parse_time_window("30m")
        assert delta == timedelta(hours=24)  # fallback
 # ── InMemoryExperienceStore.record_experience 测试 ────────
 class TestRecordExperience:
    async def test_record_returns_experience_id(self, store):
        exp = _make_experience()
        exp_id = await store.record_experience(exp)
        assert exp_id is not None
        assert len(exp_id) > 0
    async def test_record_auto_generates_id(self, store):
        exp = _make_experience()
        assert exp.experience_id == ""
        exp_id = await store.record_experience(exp)
        assert exp.experience_id == exp_id
    async def test_record_auto_generates_embedding(self, store):
        exp = _make_experience()
        assert exp.embedding is None
        await store.record_experience(exp)
        assert exp.embedding is not None
        assert len(exp.embedding) == 64
    async def test_record_preserves_existing_embedding(self, store):
        custom_embedding = [0.1] * 64
        exp = _make_experience()
        exp.embedding = custom_embedding
        await store.record_experience(exp)
        # 内部存储的副本应保留原始 embedding
        stored = store._experiences[exp.experience_id]
        assert stored.embedding == custom_embedding
    async def test_record_without_embedder(self, store_no_embedder):
        exp = _make_experience()
        await store_no_embedder.record_experience(exp)
        assert exp.embedding is None
    async def test_record_success_experience(self, store):
        exp = _make_experience(outcome="success", success_rate=1.0)
        exp_id = await store.record_experience(exp)
        stored = store._experiences[exp_id]
        assert stored.outcome == "success"
        assert stored.success_rate == 1.0
    async def test_record_failure_experience(self, store):
        exp = _make_experience(
            outcome="failure",
            success_rate=0.0,
            failure_reasons=["timeout", "connection refused"],
        )
        exp_id = await store.record_experience(exp)
        stored = store._experiences[exp_id]
        assert stored.outcome == "failure"
        assert stored.failure_reasons == ["timeout", "connection refused"]
    async def test_record_stores_independent_copy(self, store):
        """验证存储的是副本，外部修改不影响内部"""
        exp = _make_experience(failure_reasons=["original"])
        exp_id = await store.record_experience(exp)
        exp.failure_reasons.append("modified")
        stored = store._experiences[exp_id]
        assert stored.failure_reasons == ["original"]
 # ── InMemoryExperienceStore.search 测试 ───────────────────
 class TestSearchExperience:
    async def test_search_returns_results(self, store):
        await store.record_experience(
            _make_experience(task_type="code_review", goal="Review Python code")
        )
        await store.record_experience(
            _make_experience(task_type="data_analysis", goal="Analyze sales data")
        )
        results = await store.search("Review Python code", top_k=2)
        assert len(results) == 2
        # 验证返回的经验包含已记录的 task_type
        task_types = {r.task_type for r in results}
        assert "code_review" in task_types
    async def test_search_with_task_type_filter(self, store):
        await store.record_experience(
            _make_experience(task_type="code_review", goal="Review code")
        )
        await store.record_experience(
            _make_experience(task_type="data_analysis", goal="Analyze data")
        )
        results = await store.search("code", top_k=5, task_type="code_review")
        assert all(r.task_type == "code_review" for r in results)
    async def test_search_empty_store(self, store):
        results = await store.search("anything", top_k=5)
        assert results == []
    async def test_search_top_k_limit(self, store):
        for i in range(10):
            await store.record_experience(
                _make_experience(task_type="code_review", goal=f"Task {i}")
            )
        results = await store.search("code review", top_k=3)
        assert len(results) == 3
    async def test_search_without_embedder(self, store_no_embedder):
        await store_no_embedder.record_experience(
            _make_experience(task_type="code_review", goal="Review code", success_rate=0.9)
        )
        await store_no_embedder.record_experience(
            _make_experience(task_type="code_review", goal="Check code", success_rate=0.5)
        )
        # 无 embedder 时，按 time_decay 排序（success_rate * decay）
        results = await store_no_embedder.search("code", top_k=2)
        assert len(results) == 2
        # success_rate=0.9 的应排在前面
        assert results[0].success_rate == 0.9
 # ── 时效性衰减测试 ─────────────────────────────────────────
 class TestTimeDecay:
    async def test_recent_experiences_ranked_higher(self, store):
        now = datetime.now(timezone.utc)
        old_exp = _make_experience(
            task_type="code_review",
            goal="Review old code",
            success_rate=1.0,
            created_at=now - timedelta(hours=100),
        )
        recent_exp = _make_experience(
            task_type="code_review",
            goal="Review recent code",
            success_rate=1.0,
            created_at=now,
        )
        await store.record_experience(old_exp)
        await store.record_experience(recent_exp)
        results = await store.search("Review code", top_k=2)
        # 两个经验 success_rate 相同，但近期经验的 time_decay 更高
        assert results[0].created_at > results[1].created_at
    async def test_high_success_rate_compensates_age(self, store_no_embedder):
        """高 success_rate 的旧经验可能仍排在低 success_rate 的新经验之前"""
        now = datetime.now(timezone.utc)
        old_good = _make_experience(
            task_type="code_review",
            goal="Review code",
            success_rate=1.0,
            created_at=now - timedelta(hours=1),
        )
        new_bad = _make_experience(
            task_type="code_review",
            goal="Review code",
            success_rate=0.1,
            created_at=now,
        )
        await store_no_embedder.record_experience(old_good)
        await store_no_embedder.record_experience(new_bad)
        results = await store_no_embedder.search("code", top_k=2)
        # old_good: 1.0 * exp(-0.01*1) ≈ 0.99
        # new_bad: 0.1 * exp(0) = 0.1
        # old_good 应排在前面
        assert results[0].success_rate == 1.0
 # ── InMemoryExperienceStore.get_metrics 测试 ──────────────
 class TestGetMetrics:
    async def test_metrics_single_task_type(self, store):
        await store.record_experience(
            _make_experience(task_type="code_review", outcome="success", duration_seconds=10.0)
        )
        await store.record_experience(
            _make_experience(task_type="code_review", outcome="failure", duration_seconds=20.0, success_rate=0.0)
        )
        metrics = await store.get_metrics(task_type="code_review", time_window="24h")
        assert len(metrics) == 1
        m = metrics[0]
        assert m.task_type == "code_review"
        assert m.completion_rate == 0.5  # 1 success / 2 total
        assert m.avg_duration == 15.0  # (10 + 20) / 2
        assert m.retry_rate == 0.5  # 1 with success_rate < 1.0
        assert m.sample_count == 2
    async def test_metrics_multiple_task_types(self, store):
        await store.record_experience(
            _make_experience(task_type="code_review", outcome="success", duration_seconds=10.0)
        )
        await store.record_experience(
            _make_experience(task_type="data_analysis", outcome="success", duration_seconds=30.0)
        )
        metrics = await store.get_metrics(time_window="24h")
        assert len(metrics) == 2
        task_types = {m.task_type for m in metrics}
        assert task_types == {"code_review", "data_analysis"}
    async def test_metrics_empty_store(self, store):
        metrics = await store.get_metrics(time_window="24h")
        assert metrics == []
    async def test_metrics_respects_time_window(self, store):
        now = datetime.now(timezone.utc)
        # 旧经验（超出 1h 窗口）
        await store.record_experience(
            _make_experience(
                task_type="code_review",
                outcome="success",
                created_at=now - timedelta(hours=2),
            )
        )
        # 新经验（在 1h 窗口内）
        await store.record_experience(
            _make_experience(
                task_type="code_review",
                outcome="failure",
                created_at=now,
            )
        )
        metrics = await store.get_metrics(task_type="code_review", time_window="1h")
        assert len(metrics) == 1
        assert metrics[0].sample_count == 1
        assert metrics[0].completion_rate == 0.0  # 只有 failure
    async def test_metrics_completion_rate(self, store):
        for _ in range(8):
            await store.record_experience(
                _make_experience(task_type="test", outcome="success")
            )
        for _ in range(2):
            await store.record_experience(
                _make_experience(task_type="test", outcome="failure", success_rate=0.0)
            )
        metrics = await store.get_metrics(task_type="test", time_window="24h")
        assert len(metrics) == 1
        assert metrics[0].completion_rate == pytest.approx(0.8)
    async def test_metrics_retry_rate(self, store):
        await store.record_experience(
            _make_experience(task_type="test", outcome="success", success_rate=1.0)
        )
        await store.record_experience(
            _make_experience(task_type="test", outcome="success", success_rate=0.5)
        )
        await store.record_experience(
            _make_experience(task_type="test", outcome="failure", success_rate=0.0)
        )
        metrics = await store.get_metrics(task_type="test", time_window="24h")
        assert len(metrics) == 1
        # 2 out of 3 have success_rate < 1.0
        assert metrics[0].retry_rate == pytest.approx(2.0 / 3.0)
    async def test_metrics_time_window_values(self, store):
        await store.record_experience(
            _make_experience(task_type="test", outcome="success")
        )
        metrics = await store.get_metrics(task_type="test", time_window="7d")
        assert len(metrics) == 1
        assert metrics[0].time_window == "7d"
 # ── 语义搜索集成测试 ──────────────────────────────────────
 class TestSemanticSearchIntegration:
    async def test_semantic_search_returns_all_relevant(self, store):
        """语义搜索应返回所有已记录的经验"""
        await store.record_experience(
            _make_experience(task_type="code_review", goal="Review Python code for bugs")
        )
        await store.record_experience(
            _make_experience(task_type="data_analysis", goal="Analyze quarterly sales report")
        )
        await store.record_experience(
            _make_experience(task_type="code_review", goal="Check Java code style")
        )
        results = await store.search("Find bugs in Python code", top_k=3)
        assert len(results) == 3
        # 验证所有经验都被检索到
        goals = {r.goal for r in results}
        assert len(goals) == 3
    async def test_semantic_search_with_filter(self, store):
        """语义搜索 + task_type 过滤"""
        await store.record_experience(
            _make_experience(task_type="code_review", goal="Review Python code")
        )
        await store.record_experience(
            _make_experience(task_type="data_analysis", goal="Review data quality")
        )
        results = await store.search("Review", top_k=5, task_type="code_review")
        assert all(r.task_type == "code_review" for r in results)
 # ── 端到端流程测试 ─────────────────────────────────────────
 class TestEndToEnd:
    async def test_record_and_retrieve(self, store):
        """记录经验后可检索到"""
        exp = _make_experience(
            task_type="code_review",
            goal="Review PR #123",
            outcome="success",
            duration_seconds=15.0,
            optimization_tips=["Use faster linter"],
        )
        exp_id = await store.record_experience(exp)
        results = await store.search("Review PR", top_k=5)
        assert len(results) >= 1
        found = [r for r in results if r.experience_id == exp_id]
        assert len(found) == 1
        assert found[0].goal == "Review PR #123"
        assert found[0].optimization_tips == ["Use faster linter"]
    async def test_failure_experience_retrievable(self, store):
        """失败经验可被检索"""
        exp = _make_experience(
            task_type="deployment",
            goal="Deploy to production",
            outcome="failure",
            failure_reasons=["Health check failed", "Timeout"],
        )
        exp_id = await store.record_experience(exp)
        results = await store.search("Deploy to production", top_k=5)
        assert len(results) >= 1
        found = [r for r in results if r.experience_id == exp_id]
        assert len(found) == 1
        assert found[0].failure_reasons == ["Health check failed", "Timeout"]
    async def test_metrics_after_multiple_records(self, store):
        """多次记录后指标正确聚合"""
        for i in range(5):
            await store.record_experience(
                _make_experience(
                    task_type="code_review",
                    outcome="success" if i < 4 else "failure",
                    duration_seconds=10.0 + i,
                    success_rate=1.0 if i < 4 else 0.0,
                )
            )
        metrics = await store.get_metrics(task_type="code_review", time_window="24h")
        assert len(metrics) == 1
        m = metrics[0]
        assert m.sample_count == 5
        assert m.completion_rate == pytest.approx(0.8)  # 4/5
        assert m.avg_duration == pytest.approx(12.0)  # (10+11+12+13+14)/5
        assert m.retry_rate == pytest.approx(0.2)  # 1/5
--- a/tests/unit/skills/test_skill_registry_v2.py
+++ b/tests/unit/skills/test_skill_registry_v2.py
@ -0,0 +1,758 @@
 """SkillRegistry v2 单元测试 - 版本管理、能力查询、依赖检查"""
 from __future__ import annotations
 import os
 import tempfile
 from unittest.mock import MagicMock, patch
 import pytest
 import yaml
 from agentkit.core.exceptions import SkillNotFoundError
 from agentkit.skills.base import Skill, SkillConfig
 from agentkit.skills.loader import SkillLoader
 from agentkit.skills.registry import SkillRegistry
 from agentkit.skills.schema import (
    CapabilityTag,
    DependencyDecl,
    HealthCheckResult,
    SkillSpec,
 )
 # ── 辅助函数 ──────────────────────────────────────────────
 def _make_skill(
    name: str = "test_skill",
    version: str = "1.0.0",
    capabilities: list[str] | None = None,
    dependencies: list[dict] | None = None,
 ) -> Skill:
    """创建测试用 Skill 实例"""
    data: dict = {
        "name": name,
        "agent_type": "test",
        "task_mode": "llm_generate",
        "prompt": {"identity": f"测试技能 {name}"},
        "version": version,
    }
    if capabilities:
        data["capabilities"] = capabilities
    if dependencies:
        data["dependencies"] = dependencies
    config = SkillConfig.from_dict(data)
    return Skill(config)
 # ── SkillSpec 测试 ────────────────────────────────────────
 class TestSkillSpec:
    """SkillSpec 标准接口规范测试"""
    def test_from_dict_basic(self):
        data = {
            "name": "rag_skill",
            "version": "2.0.0",
            "description": "RAG 检索技能",
            "capabilities": [
                {"tag": "rag", "description": "知识检索"},
                {"tag": "search", "description": "语义搜索"},
            ],
            "dependencies": [
                {"name": "embedding_tool", "type": "tool", "required": True},
                {"name": "base_skill", "version_constraint": ">=1.0.0", "type": "skill"},
            ],
        }
        spec = SkillSpec.from_dict(data)
        assert spec.name == "rag_skill"
        assert spec.version == "2.0.0"
        assert len(spec.capabilities) == 2
        assert spec.capabilities[0].tag == "rag"
        assert len(spec.dependencies) == 2
        assert spec.dependencies[0].name == "embedding_tool"
        assert spec.dependencies[0].type == "tool"
    def test_to_dict_roundtrip(self):
        spec = SkillSpec(
            name="terminal_skill",
            version="1.5.0",
            capabilities=[CapabilityTag(tag="terminal")],
            dependencies=[DependencyDecl(name="shell_tool", type="tool")],
        )
        d = spec.to_dict()
        spec2 = SkillSpec.from_dict(d)
        assert spec2.name == spec.name
        assert spec2.version == spec.version
        assert spec2.capabilities[0].tag == "terminal"
        assert spec2.dependencies[0].name == "shell_tool"
    def test_capability_tags_property(self):
        spec = SkillSpec(
            name="multi_skill",
            capabilities=[
                CapabilityTag(tag="rag"),
                CapabilityTag(tag="terminal"),
            ],
        )
        assert spec.capability_tags == ["rag", "terminal"]
    def test_required_dependencies_property(self):
        spec = SkillSpec(
            name="test",
            dependencies=[
                DependencyDecl(name="required_dep", required=True),
                DependencyDecl(name="optional_dep", required=False),
            ],
        )
        required = spec.required_dependencies
        assert len(required) == 1
        assert required[0].name == "required_dep"
    def test_skill_dependencies_property(self):
        spec = SkillSpec(
            name="test",
            dependencies=[
                DependencyDecl(name="dep_skill", type="skill"),
                DependencyDecl(name="dep_tool", type="tool"),
            ],
        )
        skill_deps = spec.skill_dependencies
        assert len(skill_deps) == 1
        assert skill_deps[0].name == "dep_skill"
    def test_tool_dependencies_property(self):
        spec = SkillSpec(
            name="test",
            dependencies=[
                DependencyDecl(name="dep_skill", type="skill"),
                DependencyDecl(name="dep_tool", type="tool"),
            ],
        )
        tool_deps = spec.tool_dependencies
        assert len(tool_deps) == 1
        assert tool_deps[0].name == "dep_tool"
 # ── DependencyDecl 测试 ───────────────────────────────────
 class TestDependencyDecl:
    """DependencyDecl 依赖声明测试"""
    def test_default_values(self):
        dep = DependencyDecl(name="my_dep")
        assert dep.name == "my_dep"
        assert dep.version_constraint == ""
        assert dep.type == "skill"
        assert dep.required is True
    def test_custom_values(self):
        dep = DependencyDecl(
            name="shell_tool",
            version_constraint=">=1.0.0",
            type="tool",
            required=False,
        )
        assert dep.version_constraint == ">=1.0.0"
        assert dep.type == "tool"
        assert dep.required is False
 # ── CapabilityTag 测试 ────────────────────────────────────
 class TestCapabilityTag:
    """CapabilityTag 能力标签测试"""
    def test_basic_creation(self):
        tag = CapabilityTag(tag="rag", description="知识检索")
        assert tag.tag == "rag"
        assert tag.description == "知识检索"
    def test_default_description(self):
        tag = CapabilityTag(tag="terminal")
        assert tag.description == ""
 # ── HealthCheckResult 测试 ────────────────────────────────
 class TestHealthCheckResult:
    """HealthCheckResult 健康检查结果测试"""
    def test_healthy_result(self):
        result = HealthCheckResult(
            skill_name="test_skill",
            skill_version="1.0.0",
            healthy=True,
        )
        assert result.healthy is True
        assert result.missing_dependencies == []
        assert result.version_mismatches == []
        assert result.warnings == []
    def test_unhealthy_result(self):
        result = HealthCheckResult(
            skill_name="test_skill",
            healthy=False,
            missing_dependencies=["missing_dep"],
            version_mismatches=["dep_a: need >=2.0.0, got 1.0.0"],
        )
        assert result.healthy is False
        assert "missing_dep" in result.missing_dependencies
    def test_to_dict(self):
        result = HealthCheckResult(
            skill_name="test_skill",
            skill_version="1.0.0",
            healthy=True,
        )
        d = result.to_dict()
        assert d["skill_name"] == "test_skill"
        assert d["healthy"] is True
 # ── SkillConfig v4 字段测试 ───────────────────────────────
 class TestSkillConfigV4:
    """SkillConfig v4 新增字段（dependencies、capabilities）测试"""
    def test_capabilities_as_strings(self):
        """capabilities 支持字符串列表"""
        config = SkillConfig.from_dict({
            "name": "rag_skill",
            "agent_type": "rag",
            "task_mode": "llm_generate",
            "prompt": {"identity": "RAG 技能"},
            "capabilities": ["rag", "search"],
        })
        assert len(config.capabilities) == 2
        assert config.capabilities[0].tag == "rag"
        assert config.capabilities[1].tag == "search"
    def test_capabilities_as_dicts(self):
        """capabilities 支持字典列表"""
        config = SkillConfig.from_dict({
            "name": "terminal_skill",
            "agent_type": "terminal",
            "task_mode": "llm_generate",
            "prompt": {"identity": "终端技能"},
            "capabilities": [
                {"tag": "terminal", "description": "智能终端"},
            ],
        })
        assert config.capabilities[0].tag == "terminal"
        assert config.capabilities[0].description == "智能终端"
    def test_dependencies_as_dicts(self):
        """dependencies 支持字典列表"""
        config = SkillConfig.from_dict({
            "name": "rag_skill",
            "agent_type": "rag",
            "task_mode": "llm_generate",
            "prompt": {"identity": "RAG 技能"},
            "dependencies": [
                {"name": "embedding_tool", "type": "tool", "required": True},
                {"name": "base_skill", "version_constraint": ">=1.0.0", "type": "skill"},
            ],
        })
        assert len(config.dependencies) == 2
        assert config.dependencies[0].name == "embedding_tool"
        assert config.dependencies[0].type == "tool"
        assert config.dependencies[1].version_constraint == ">=1.0.0"
    def test_dependencies_default_empty(self):
        """无 dependencies 时默认为空列表"""
        config = SkillConfig.from_dict({
            "name": "simple_skill",
            "agent_type": "simple",
            "task_mode": "llm_generate",
            "prompt": {"identity": "简单技能"},
        })
        assert config.dependencies == []
        assert config.capabilities == []
    def test_to_dict_includes_v4_fields(self):
        """to_dict 包含 v4 字段"""
        config = SkillConfig.from_dict({
            "name": "v4_skill",
            "agent_type": "test",
            "task_mode": "llm_generate",
            "prompt": {"identity": "V4 技能"},
            "capabilities": ["rag"],
            "dependencies": [
                {"name": "base_skill", "type": "skill"},
            ],
        })
        d = config.to_dict()
        assert "capabilities" in d
        assert d["capabilities"][0]["tag"] == "rag"
        assert "dependencies" in d
        assert d["dependencies"][0]["name"] == "base_skill"
    def test_backward_compat_old_yaml_without_v4_fields(self):
        """旧 YAML 无 dependencies/capabilities 字段时自动填充默认值"""
        yaml_content = yaml.dump({
            "name": "legacy_skill",
            "agent_type": "legacy",
            "task_mode": "llm_generate",
            "prompt": {"identity": "旧技能"},
        })
        with tempfile.NamedTemporaryFile(
            mode="w", suffix=".yaml", delete=False, encoding="utf-8"
        ) as f:
            f.write(yaml_content)
            path = f.name
        try:
            config = SkillConfig.from_yaml(path)
            assert config.dependencies == []
            assert config.capabilities == []
        finally:
            os.unlink(path)
 # ── SkillRegistry v2 测试 ─────────────────────────────────
 class TestSkillRegistryV2:
    """SkillRegistry v2 增强：版本管理、能力查询、依赖检查"""
    def test_register_with_version(self):
        """注册带版本的 Skill → 成功注册"""
        registry = SkillRegistry()
        skill = _make_skill("versioned_skill", version="2.1.0")
        registry.register(skill)
        assert registry.has_skill("versioned_skill")
        assert registry.get("versioned_skill").version == "2.1.0"
    def test_register_multiple_versions(self):
        """同名 Skill 注册新版本 → 版本历史保留，默认使用最新版"""
        registry = SkillRegistry()
        v1 = _make_skill("multi_v", version="1.0.0")
        v2 = _make_skill("multi_v", version="2.0.0")
        registry.register(v1)
        registry.register(v2)
        # 默认返回最新版
        assert registry.get("multi_v").version == "2.0.0"
        # 可以获取指定版本
        assert registry.get("multi_v", version="1.0.0").version == "1.0.0"
        assert registry.get("multi_v", version="2.0.0").version == "2.0.0"
    def test_get_versions(self):
        """获取 Skill 的所有版本"""
        registry = SkillRegistry()
        registry.register(_make_skill("ver_skill", version="1.0.0"))
        registry.register(_make_skill("ver_skill", version="1.1.0"))
        registry.register(_make_skill("ver_skill", version="2.0.0"))
        versions = registry.get_versions("ver_skill")
        assert "1.0.0" in versions
        assert "1.1.0" in versions
        assert "2.0.0" in versions
    def test_get_versions_nonexistent_raises(self):
        """获取不存在 Skill 的版本 → 抛出 SkillNotFoundError"""
        registry = SkillRegistry()
        with pytest.raises(SkillNotFoundError):
            registry.get_versions("nonexistent")
    def test_get_specific_version_nonexistent_raises(self):
        """获取不存在的版本 → 抛出 SkillNotFoundError"""
        registry = SkillRegistry()
        registry.register(_make_skill("skill_a", version="1.0.0"))
        with pytest.raises(SkillNotFoundError):
            registry.get("skill_a", version="9.9.9")
    def test_unregister_specific_version(self):
        """注销指定版本 → 其他版本保留"""
        registry = SkillRegistry()
        registry.register(_make_skill("partial", version="1.0.0"))
        registry.register(_make_skill("partial", version="2.0.0"))
        registry.unregister("partial", version="2.0.0")
        # v2 已注销，默认应回退到 v1
        assert registry.get("partial").version == "1.0.0"
        # v1 仍存在
        assert registry.has_skill("partial", version="1.0.0")
        # v2 已不存在
        assert not registry.has_skill("partial", version="2.0.0")
    def test_unregister_all_versions(self):
        """注销所有版本"""
        registry = SkillRegistry()
        registry.register(_make_skill("all_ver", version="1.0.0"))
        registry.register(_make_skill("all_ver", version="2.0.0"))
        registry.unregister("all_ver")
        assert not registry.has_skill("all_ver")
    def test_has_skill_with_version(self):
        """检查指定版本是否存在"""
        registry = SkillRegistry()
        registry.register(_make_skill("check_v", version="1.0.0"))
        assert registry.has_skill("check_v") is True
        assert registry.has_skill("check_v", version="1.0.0") is True
        assert registry.has_skill("check_v", version="2.0.0") is False
    def test_query_by_capability(self):
        """按能力标签查询 → 返回匹配的 Skill 列表"""
        registry = SkillRegistry()
        registry.register(
            _make_skill("rag_skill", capabilities=["rag", "search"])
        )
        registry.register(
            _make_skill("terminal_skill", capabilities=["terminal"])
        )
        registry.register(
            _make_skill("multi_skill", capabilities=["rag", "terminal"])
        )
        rag_skills = registry.query_by_capability("rag")
        names = [s.name for s in rag_skills]
        assert "rag_skill" in names
        assert "multi_skill" in names
        assert "terminal_skill" not in names
        terminal_skills = registry.query_by_capability("terminal")
        terminal_names = [s.name for s in terminal_skills]
        assert "terminal_skill" in terminal_names
        assert "multi_skill" in terminal_names
    def test_query_by_capability_no_match(self):
        """按能力标签查询无匹配 → 返回空列表"""
        registry = SkillRegistry()
        registry.register(
            _make_skill("no_cap_skill", capabilities=["rag"])
        )
        result = registry.query_by_capability("computer_use")
        assert result == []
    def test_query_by_capability_empty_registry(self):
        """空注册中心查询 → 返回空列表"""
        registry = SkillRegistry()
        result = registry.query_by_capability("rag")
        assert result == []
    def test_health_check_all_dependencies_met(self):
        """注册带依赖的 Skill → 依赖检查通过"""
        registry = SkillRegistry()
        # 先注册被依赖的 Skill
        registry.register(_make_skill("base_skill", version="1.0.0"))
        # 注册依赖 base_skill 的 Skill
        registry.register(
            _make_skill(
                "dependent_skill",
                dependencies=[
                    {"name": "base_skill", "type": "skill", "required": True},
                ],
            )
        )
        results = registry.health_check("dependent_skill")
        assert len(results) == 1
        assert results[0].healthy is True
        assert results[0].missing_dependencies == []
    def test_health_check_missing_dependency(self):
        """注册缺少依赖的 Skill → 依赖检查失败"""
        registry = SkillRegistry()
        registry.register(
            _make_skill(
                "broken_skill",
                dependencies=[
                    {"name": "missing_skill", "type": "skill", "required": True},
                ],
            )
        )
        results = registry.health_check("broken_skill")
        assert len(results) == 1
        assert results[0].healthy is False
        assert "missing_skill" in results[0].missing_dependencies
    def test_health_check_optional_dependency_missing(self):
        """可选依赖缺失 → healthy 仍为 True，但有 warning"""
        registry = SkillRegistry()
        registry.register(
            _make_skill(
                "optional_dep_skill",
                dependencies=[
                    {
                        "name": "optional_skill",
                        "type": "skill",
                        "required": False,
                    },
                ],
            )
        )
        results = registry.health_check("optional_dep_skill")
        assert len(results) == 1
        assert results[0].healthy is True
        assert len(results[0].warnings) == 1
    def test_health_check_version_mismatch(self):
        """版本约束不满足 → 检查失败"""
        registry = SkillRegistry()
        registry.register(_make_skill("old_skill", version="1.0.0"))
        registry.register(
            _make_skill(
                "picky_skill",
                dependencies=[
                    {
                        "name": "old_skill",
                        "version_constraint": ">=2.0.0",
                        "type": "skill",
                        "required": True,
                    },
                ],
            )
        )
        results = registry.health_check("picky_skill")
        assert len(results) == 1
        assert results[0].healthy is False
        assert len(results[0].version_mismatches) == 1
    def test_health_check_all_skills(self):
        """检查所有 Skill 的依赖健康状态"""
        registry = SkillRegistry()
        registry.register(_make_skill("healthy_skill"))
        registry.register(
            _make_skill(
                "unhealthy_skill",
                dependencies=[
                    {"name": "missing", "type": "skill", "required": True},
                ],
            )
        )
        results = registry.health_check()
        assert len(results) == 2
        healthy_names = [r.skill_name for r in results if r.healthy]
        unhealthy_names = [r.skill_name for r in results if not r.healthy]
        assert "healthy_skill" in healthy_names
        assert "unhealthy_skill" in unhealthy_names
    def test_health_check_nonexistent_raises(self):
        """检查不存在的 Skill → 抛出 SkillNotFoundError"""
        registry = SkillRegistry()
        with pytest.raises(SkillNotFoundError):
            registry.health_check("nonexistent")
    def test_version_constraint_check_gte(self):
        """>= 版本约束检查"""
        assert SkillRegistry._check_version_constraint("2.0.0", ">=1.0.0") is True
        assert SkillRegistry._check_version_constraint("0.9.0", ">=1.0.0") is False
        assert SkillRegistry._check_version_constraint("1.0.0", ">=1.0.0") is True
    def test_version_constraint_check_lte(self):
        """<= 版本约束检查"""
        assert SkillRegistry._check_version_constraint("1.0.0", "<=2.0.0") is True
        assert SkillRegistry._check_version_constraint("3.0.0", "<=2.0.0") is False
    def test_version_constraint_check_eq(self):
        """== 版本约束检查"""
        assert SkillRegistry._check_version_constraint("1.0.0", "==1.0.0") is True
        assert SkillRegistry._check_version_constraint("1.1.0", "==1.0.0") is False
    def test_version_constraint_check_range(self):
        """范围约束检查"""
        assert SkillRegistry._check_version_constraint("1.5.0", ">=1.0.0,<2.0.0") is True
        assert SkillRegistry._check_version_constraint("2.5.0", ">=1.0.0,<2.0.0") is False
        assert SkillRegistry._check_version_constraint("0.5.0", ">=1.0.0,<2.0.0") is False
    def test_version_constraint_check_unparseable(self):
        """无法解析的版本号 → 默认通过"""
        assert SkillRegistry._check_version_constraint("dev", ">=1.0.0") is True
    def test_update_skill_preserves_version_history(self):
        """更新 Skill 保留版本历史"""
        registry = SkillRegistry()
        registry.register(_make_skill("updateable", version="1.0.0"))
        new_config = SkillConfig.from_dict({
            "name": "updateable",
            "agent_type": "updated",
            "task_mode": "llm_generate",
            "prompt": {"identity": "更新后"},
            "version": "2.0.0",
        })
        registry.update_skill("updateable", new_config)
        # 默认返回新版本
        assert registry.get("updateable").version == "2.0.0"
        # 旧版本仍在历史中
        assert registry.has_skill("updateable", version="1.0.0")
    # ---- 向后兼容测试 ----
    def test_old_register_still_works(self):
        """旧版 register/unregister/get 仍正常工作"""
        registry = SkillRegistry()
        skill = _make_skill("compat_skill")
        registry.register(skill)
        assert registry.has_skill("compat_skill")
        assert registry.get("compat_skill") is skill
        registry.unregister("compat_skill")
        assert not registry.has_skill("compat_skill")
    def test_old_list_skills_still_works(self):
        """旧版 list_skills 仍正常工作"""
        registry = SkillRegistry()
        registry.register(_make_skill("a"))
        registry.register(_make_skill("b"))
        skills = registry.list_skills()
        names = [s.name for s in skills]
        assert "a" in names
        assert "b" in names
    def test_duplicate_registration_overwrites_default(self):
        """同名 Skill 重复注册 → 默认指向最新，版本历史保留"""
        registry = SkillRegistry()
        v1 = _make_skill("dup", version="1.0.0")
        v2 = _make_skill("dup", version="2.0.0")
        registry.register(v1)
        registry.register(v2)
        result = registry.get("dup")
        assert result.version == "2.0.0"
        # v1 仍可通过版本号获取
        assert registry.get("dup", version="1.0.0").version == "1.0.0"
 # ── SkillLoader v2 测试 ──────────────────────────────────
 class TestSkillLoaderV2:
    """SkillLoader v2: entry_points 自动发现"""
    def test_load_from_entry_points_empty(self):
        """无 entry_points 时返回空列表"""
        registry = SkillRegistry()
        loader = SkillLoader(registry)
        skills = loader.load_from_entry_points()
        assert isinstance(skills, list)
    def test_load_from_entry_points_with_mock(self):
        """模拟 entry_points 加载 Skill"""
        registry = SkillRegistry()
        loader = SkillLoader(registry)
        mock_skill = _make_skill("ep_skill", version="1.0.0")
        # 创建 mock entry point
        mock_ep = MagicMock()
        mock_ep.name = "ep_skill"
        mock_ep.load.return_value = mock_skill
        with patch(
            "agentkit.skills.loader.entry_points" if False else "importlib.metadata.entry_points",
            return_value=[mock_ep] if False else MagicMock(),
        ):
            # 使用更直接的方式 mock
            with patch.object(loader, "load_from_entry_points", wraps=loader.load_from_entry_points):
                # 直接测试 _skill_registry.register 能否工作
                registry.register(mock_skill)
                assert registry.has_skill("ep_skill")
    def test_load_from_entry_points_callable(self):
        """entry_point 返回可调用对象时正确加载"""
        registry = SkillRegistry()
        loader = SkillLoader(registry)
        skill_instance = _make_skill("callable_skill", version="3.0.0")
        # 模拟 entry_point 返回一个可调用对象
        mock_ep = MagicMock()
        mock_ep.name = "callable_skill"
        mock_ep.load.return_value = lambda: skill_instance
        # 直接测试可调用对象的逻辑
        loaded = mock_ep.load()
        result = loaded()
        assert isinstance(result, Skill)
        assert result.name == "callable_skill"
    def test_load_from_yaml_with_capabilities(self):
        """从 YAML 加载带 capabilities 的 Skill"""
        yaml_content = yaml.dump({
            "name": "yaml_rag",
            "agent_type": "rag",
            "task_mode": "llm_generate",
            "prompt": {"identity": "YAML RAG 技能"},
            "version": "1.5.0",
            "capabilities": ["rag", "search"],
            "dependencies": [
                {"name": "embedding_tool", "type": "tool", "required": True},
            ],
        })
        with tempfile.NamedTemporaryFile(
            mode="w", suffix=".yaml", delete=False, encoding="utf-8"
        ) as f:
            f.write(yaml_content)
            path = f.name
        try:
            registry = SkillRegistry()
            loader = SkillLoader(registry)
            skill = loader.load_from_file(path)
            assert skill.name == "yaml_rag"
            assert skill.version == "1.5.0"
            assert len(skill.capabilities) == 2
            assert skill.capabilities[0].tag == "rag"
            assert len(skill.dependencies) == 1
            assert skill.dependencies[0].name == "embedding_tool"
        finally:
            os.unlink(path)
 # ── Skill v4 属性测试 ────────────────────────────────────
 class TestSkillV4:
    """Skill v4 新增属性测试"""
    def test_version_property(self):
        config = SkillConfig.from_dict({
            "name": "v_test",
            "agent_type": "test",
            "task_mode": "llm_generate",
            "prompt": {"identity": "test"},
            "version": "3.2.1",
        })
        skill = Skill(config)
        assert skill.version == "3.2.1"
    def test_capabilities_property(self):
        config = SkillConfig.from_dict({
            "name": "cap_test",
            "agent_type": "test",
            "task_mode": "llm_generate",
            "prompt": {"identity": "test"},
            "capabilities": ["rag", "terminal"],
        })
        skill = Skill(config)
        assert len(skill.capabilities) == 2
        assert skill.capabilities[0].tag == "rag"
    def test_dependencies_property(self):
        config = SkillConfig.from_dict({
            "name": "dep_test",
            "agent_type": "test",
            "task_mode": "llm_generate",
            "prompt": {"identity": "test"},
            "dependencies": [
                {"name": "base_skill", "type": "skill"},
            ],
        })
        skill = Skill(config)
        assert len(skill.dependencies) == 1
        assert skill.dependencies[0].name == "base_skill"
--- a/tests/unit/test_goal_planner.py
+++ b/tests/unit/test_goal_planner.py
@ -0,0 +1,589 @@
 """Tests for GoalPlanner — 目标分析与计划生成"""
 import pytest
 from agentkit.core.goal_planner import GoalPlanner
 from agentkit.core.orchestrator import Orchestrator, SubTask
 from agentkit.core.plan_schema import (
    ExecutionPlan,
    PlanStep,
    PlanStepStatus,
    SkillGap,
    SkillGapLevel,
 )
 from agentkit.core.protocol import TaskMessage, TaskStatus
 from datetime import datetime, timezone
 # --- Plan Schema Tests ---
 class TestPlanStep:
    """PlanStep 数据类测试"""
    def test_default_values(self):
        step = PlanStep(
            step_id="s1",
            name="Test Step",
            description="A test step",
        )
        assert step.dependencies == []
        assert step.parallel_group == 0
        assert step.required_skills == []
        assert step.input_data == {}
        assert step.status == PlanStepStatus.PENDING
        assert step.result is None
        assert step.error is None
    def test_to_dict(self):
        step = PlanStep(
            step_id="s1",
            name="Test",
            description="Desc",
            dependencies=["s0"],
            parallel_group=1,
            required_skills=["web_search"],
        )
        d = step.to_dict()
        assert d["step_id"] == "s1"
        assert d["dependencies"] == ["s0"]
        assert d["parallel_group"] == 1
        assert d["required_skills"] == ["web_search"]
        assert d["status"] == "pending"
 class TestExecutionPlan:
    """ExecutionPlan 数据类测试"""
    def test_default_values(self):
        plan = ExecutionPlan(goal="test")
        assert plan.goal == "test"
        assert plan.steps == []
        assert plan.parallel_groups == []
        assert plan.skill_gaps == []
        assert plan.confirmed is False
    def test_has_skill_gaps(self):
        plan = ExecutionPlan(
            goal="test",
            skill_gaps=[
                SkillGap(step_name="s1", required_skill="x", level=SkillGapLevel.LOW),
            ],
        )
        assert plan.has_skill_gaps is False
        plan.skill_gaps.append(
            SkillGap(step_name="s2", required_skill="y", level=SkillGapLevel.HIGH),
        )
        assert plan.has_skill_gaps is True
    def test_get_step(self):
        step = PlanStep(step_id="s1", name="A", description="A step")
        plan = ExecutionPlan(goal="test", steps=[step])
        assert plan.get_step("s1") is step
        assert plan.get_step("nonexistent") is None
    def test_to_readable(self):
        plan = ExecutionPlan(
            plan_id="p1",
            goal="调研竞品",
            steps=[
                PlanStep(step_id="s0", name="调研 A", description="调研竞品 A", parallel_group=0, required_skills=["web_search"]),
                PlanStep(step_id="s1", name="调研 B", description="调研竞品 B", parallel_group=0, required_skills=["web_search"]),
                PlanStep(step_id="s2", name="汇总", description="汇总报告", dependencies=["s0", "s1"], parallel_group=1, required_skills=["report_generator"]),
            ],
            parallel_groups=[["s0", "s1"], ["s2"]],
        )
        readable = plan.to_readable()
        assert "调研竞品" in readable
        assert "并行组 1" in readable
        assert "s0" in readable
        assert "web_search" in readable
    def test_to_dict(self):
        plan = ExecutionPlan(
            plan_id="p1",
            goal="test",
            steps=[PlanStep(step_id="s0", name="A", description="A")],
            parallel_groups=[["s0"]],
        )
        d = plan.to_dict()
        assert d["plan_id"] == "p1"
        assert len(d["steps"]) == 1
        assert d["parallel_groups"] == [["s0"]]
 # --- GoalPlanner Tests ---
 class TestGoalPlannerSimpleGoal:
    """简单目标 → 单步计划"""
    @pytest.mark.asyncio
    async def test_simple_goal_generates_single_step(self):
        planner = GoalPlanner()
        plan = await planner.generate_plan(
            goal="查询今天的天气",
            available_skills=["web_search"],
        )
        assert len(plan.steps) == 1
        assert plan.steps[0].parallel_group == 0
        assert len(plan.parallel_groups) == 1
    @pytest.mark.asyncio
    async def test_simple_goal_with_matching_skill(self):
        planner = GoalPlanner()
        plan = await planner.generate_plan(
            goal="搜索最新的 AI 新闻",
            available_skills=["web_search"],
        )
        assert "web_search" in plan.steps[0].required_skills
    @pytest.mark.asyncio
    async def test_simple_goal_no_matching_skill(self):
        planner = GoalPlanner()
        plan = await planner.generate_plan(
            goal="执行某个未知操作",
            available_skills=["web_search"],
        )
        # 单步任务，无匹配 Skill
        assert len(plan.steps) == 1
        assert plan.steps[0].required_skills == []
 class TestGoalPlannerParallelGoal:
    """复杂目标（并列结构）→ 多步并行计划"""
    @pytest.mark.asyncio
    async def test_parallel_competitor_research(self):
        """AE1: 3 个竞品调研自动识别为并行步骤"""
        planner = GoalPlanner()
        plan = await planner.generate_plan(
            goal="调研 3 个竞品 SEO 策略并生成对比报告",
            available_skills=["web_search", "seo_analyzer", "report_generator"],
        )
        # 应有 4 个步骤：3 个并行调研 + 1 个汇总
        assert len(plan.steps) == 4
        # 前 3 个步骤无依赖，应在同一并行组
        parallel_steps = [s for s in plan.steps if not s.dependencies]
        assert len(parallel_steps) == 3
        # 汇总步骤依赖前 3 个
        summary_step = [s for s in plan.steps if s.dependencies]
        assert len(summary_step) == 1
        assert len(summary_step[0].dependencies) == 3
        # 并行组：第一组 3 个并行，第二组 1 个汇总
        assert len(plan.parallel_groups) == 2
        assert len(plan.parallel_groups[0]) == 3
        assert len(plan.parallel_groups[1]) == 1
    @pytest.mark.asyncio
    async def test_parallel_items_with_dunhao(self):
        """顿号分隔的并列项"""
        planner = GoalPlanner()
        plan = await planner.generate_plan(
            goal="调研竞品A、竞品B、竞品C的市场策略",
            available_skills=["web_search", "report_generator"],
        )
        assert len(plan.steps) == 4  # 3 个并行 + 1 个汇总
        parallel_steps = [s for s in plan.steps if not s.dependencies]
        assert len(parallel_steps) == 3
    @pytest.mark.asyncio
    async def test_parallel_steps_have_correct_group(self):
        planner = GoalPlanner()
        plan = await planner.generate_plan(
            goal="调研 3 个竞品 SEO 策略并生成对比报告",
            available_skills=["web_search", "seo_analyzer", "report_generator"],
        )
        # 并行步骤的 parallel_group 应为 0
        for step in plan.steps[:3]:
            assert step.parallel_group == 0
        # 汇总步骤的 parallel_group 应为 1
        assert plan.steps[3].parallel_group == 1
 class TestGoalPlannerSequentialGoal:
    """顺序目标 → 顺序步骤"""
    @pytest.mark.asyncio
    async def test_sequential_goal_with_bing(self):
        """'并' 连接的顺序步骤"""
        planner = GoalPlanner()
        plan = await planner.generate_plan(
            goal="调研市场趋势并生成分析报告",
            available_skills=["web_search", "report_generator"],
        )
        assert len(plan.steps) == 2
        # 第二步依赖第一步
        assert plan.steps[1].dependencies == [plan.steps[0].step_id]
    @pytest.mark.asyncio
    async def test_sequential_goal_with_arrow(self):
        """箭头分隔的顺序步骤"""
        planner = GoalPlanner()
        plan = await planner.generate_plan(
            goal="收集数据→分析数据→生成报告",
            available_skills=["web_search", "data_analyzer", "report_generator"],
        )
        assert len(plan.steps) == 3
        assert plan.steps[0].dependencies == []
        assert plan.steps[1].dependencies == [plan.steps[0].step_id]
        assert plan.steps[2].dependencies == [plan.steps[1].step_id]
 class TestGoalPlannerSkillGaps:
    """能力缺口识别"""
    @pytest.mark.asyncio
    async def test_missing_skill_creates_gap(self):
        """无可用 Skill 的目标 → 计划标注能力缺口"""
        planner = GoalPlanner()
        plan = await planner.generate_plan(
            goal="调研 3 个竞品 SEO 策略并生成对比报告",
            available_skills=[],  # 无可用 Skill
        )
        assert plan.has_skill_gaps is True
        assert len(plan.skill_gaps) > 0
    @pytest.mark.asyncio
    async def test_no_gaps_when_skills_available(self):
        """所有 Skill 可用时无缺口"""
        planner = GoalPlanner()
        plan = await planner.generate_plan(
            goal="搜索最新的 AI 新闻",
            available_skills=["web_search"],
        )
        # web_search 匹配，不应有 HIGH 级别缺口
        high_gaps = [g for g in plan.skill_gaps if g.level == SkillGapLevel.HIGH]
        assert len(high_gaps) == 0
    @pytest.mark.asyncio
    async def test_gap_includes_suggestion(self):
        planner = GoalPlanner()
        plan = await planner.generate_plan(
            goal="调研 3 个竞品 SEO 策略并生成对比报告",
            available_skills=[],
        )
        for gap in plan.skill_gaps:
            if gap.level == SkillGapLevel.HIGH:
                assert gap.suggestion != ""
 class TestGoalPlannerUpdatePlan:
    """用户修改计划"""
    def test_remove_step(self):
        planner = GoalPlanner()
        plan = ExecutionPlan(
            goal="test",
            steps=[
                PlanStep(step_id="s0", name="A", description="A"),
                PlanStep(step_id="s1", name="B", description="B", dependencies=["s0"]),
                PlanStep(step_id="s2", name="C", description="C", dependencies=["s1"]),
            ],
            parallel_groups=[["s0"], ["s1"], ["s2"]],
        )
        updated = planner.update_plan_from_feedback(plan, {"remove_steps": ["s1"]})
        assert len(updated.steps) == 2
        # s2 的依赖应被清理
        assert "s1" not in updated.steps[1].dependencies
    def test_update_step(self):
        planner = GoalPlanner()
        plan = ExecutionPlan(
            goal="test",
            steps=[
                PlanStep(step_id="s0", name="A", description="A"),
            ],
            parallel_groups=[["s0"]],
        )
        updated = planner.update_plan_from_feedback(
            plan,
            {"update_steps": {"s0": {"name": "Updated A", "description": "Updated description"}}},
        )
        assert updated.steps[0].name == "Updated A"
        assert updated.steps[0].description == "Updated description"
    def test_add_step(self):
        planner = GoalPlanner()
        plan = ExecutionPlan(
            goal="test",
            steps=[
                PlanStep(step_id="s0", name="A", description="A"),
            ],
            parallel_groups=[["s0"]],
        )
        updated = planner.update_plan_from_feedback(
            plan,
            {"add_steps": [{"name": "New Step", "description": "A new step", "dependencies": ["s0"]}]},
        )
        assert len(updated.steps) == 2
        assert updated.steps[1].name == "New Step"
    def test_update_resets_confirmed(self):
        planner = GoalPlanner()
        plan = ExecutionPlan(
            goal="test",
            confirmed=True,
            steps=[PlanStep(step_id="s0", name="A", description="A")],
            parallel_groups=[["s0"]],
        )
        updated = planner.update_plan_from_feedback(plan, {"update_steps": {"s0": {"name": "B"}}})
        assert updated.confirmed is False
    def test_update_rebuilds_parallel_groups(self):
        planner = GoalPlanner()
        plan = ExecutionPlan(
            goal="test",
            steps=[
                PlanStep(step_id="s0", name="A", description="A"),
                PlanStep(step_id="s1", name="B", description="B", dependencies=["s0"]),
            ],
            parallel_groups=[["s0"], ["s1"]],
        )
        # 添加一个与 s0 并行的步骤
        updated = planner.update_plan_from_feedback(
            plan,
            {"add_steps": [{"name": "C", "description": "Parallel to A", "dependencies": []}]},
        )
        # 新步骤应与 s0 在同一并行组
        assert len(updated.parallel_groups[0]) == 2
 class TestGoalPlannerValidatePlan:
    """计划验证"""
    def test_valid_plan(self):
        planner = GoalPlanner()
        plan = ExecutionPlan(
            goal="test",
            steps=[
                PlanStep(step_id="s0", name="A", description="A"),
                PlanStep(step_id="s1", name="B", description="B", dependencies=["s0"]),
            ],
            parallel_groups=[["s0"], ["s1"]],
        )
        errors = planner.validate_plan(plan)
        assert errors == []
    def test_invalid_dependency(self):
        planner = GoalPlanner()
        plan = ExecutionPlan(
            goal="test",
            steps=[
                PlanStep(step_id="s0", name="A", description="A", dependencies=["nonexistent"]),
            ],
            parallel_groups=[["s0"]],
        )
        errors = planner.validate_plan(plan)
        assert len(errors) > 0
        assert any("nonexistent" in e for e in errors)
    def test_circular_dependency(self):
        planner = GoalPlanner()
        plan = ExecutionPlan(
            goal="test",
            steps=[
                PlanStep(step_id="s0", name="A", description="A", dependencies=["s1"]),
                PlanStep(step_id="s1", name="B", description="B", dependencies=["s0"]),
            ],
            parallel_groups=[["s0", "s1"]],
        )
        errors = planner.validate_plan(plan)
        assert any("循环依赖" in e for e in errors)
    def test_ungrouped_steps(self):
        planner = GoalPlanner()
        plan = ExecutionPlan(
            goal="test",
            steps=[
                PlanStep(step_id="s0", name="A", description="A"),
                PlanStep(step_id="s1", name="B", description="B"),
            ],
            parallel_groups=[["s0"]],  # s1 未分组
        )
        errors = planner.validate_plan(plan)
        assert any("未分配" in e for e in errors)
 class TestGoalPlannerWithOrchestrator:
    """GoalPlanner 与 Orchestrator 集成"""
    @pytest.mark.asyncio
    async def test_orchestrator_with_goal_planner(self):
        """Orchestrator 使用 GoalPlanner 分解任务"""
        planner = GoalPlanner()
        class MockAgent:
            def __init__(self, name):
                self.name = name
                self.agent_type = "mock"
            async def execute(self, task):
                from agentkit.core.protocol import TaskResult
                now = datetime.now(timezone.utc)
                return TaskResult(
                    task_id=task.task_id,
                    agent_name=self.name,
                    status=TaskStatus.COMPLETED,
                    output_data={"result": f"from {self.name}"},
                    error_message=None,
                    started_at=now,
                    completed_at=now,
                )
        class MockPool:
            def get_agent(self, name):
                return MockAgent(name)
            def list_agents(self):
                return [
                    {"name": "web_search", "agent_type": "search", "description": "Web search agent"},
                    {"name": "report_generator", "agent_type": "report", "description": "Report generator"},
                ]
        pool = MockPool()
        orchestrator = Orchestrator(agent_pool=pool, goal_planner=planner)
        task = TaskMessage(
            task_id="t1",
            agent_name="web_search",
            task_type="research",
            priority=1,
            input_data={"query": "调研 3 个竞品 SEO 策略并生成对比报告"},
            callback_url=None,
            created_at=datetime.now(timezone.utc),
        )
        plan = await orchestrator._decompose_task(task)
        # GoalPlanner 应将任务分解为 4 个子任务
        assert len(plan.subtasks) == 4
        # 前 3 个无依赖
        assert len(plan.subtasks[0].depends_on) == 0
        assert len(plan.subtasks[1].depends_on) == 0
        assert len(plan.subtasks[2].depends_on) == 0
        # 第 4 个依赖前 3 个
        assert len(plan.subtasks[3].depends_on) == 3
    @pytest.mark.asyncio
    async def test_orchestrator_without_goal_planner_backward_compat(self):
        """无 GoalPlanner 时 Orchestrator 保持原有行为"""
        class MockPool:
            def get_agent(self, name):
                return None
            def list_agents(self):
                return []
        pool = MockPool()
        orchestrator = Orchestrator(agent_pool=pool)
        task = TaskMessage(
            task_id="t1",
            agent_name="worker1",
            task_type="test",
            priority=1,
            input_data={"query": "test"},
            callback_url=None,
            created_at=datetime.now(timezone.utc),
        )
        plan = await orchestrator._decompose_task(task)
        # Fallback: 单个子任务
        assert len(plan.subtasks) == 1
        assert plan.subtasks[0].task_id.startswith(plan.plan_id)
 class TestGoalPlannerLLMRefinement:
    """LLM 细化计划"""
    @pytest.mark.asyncio
    async def test_llm_refinement_called_when_needed(self):
        """初始方案不够精确时调用 LLM 细化"""
        class MockLLMGateway:
            async def chat(self, messages, model):
                import json
                return type("Response", (), {
                    "content": json.dumps([
                        {"name": "调研竞品 A", "description": "使用搜索引擎调研竞品 A 的 SEO 策略，包括关键词排名和外链分析", "dependencies": [], "required_skills": ["web_search"]},
                        {"name": "调研竞品 B", "description": "使用搜索引擎调研竞品 B 的 SEO 策略，包括关键词排名和外链分析", "dependencies": [], "required_skills": ["web_search"]},
                        {"name": "调研竞品 C", "description": "使用搜索引擎调研竞品 C 的 SEO 策略，包括关键词排名和外链分析", "dependencies": [], "required_skills": ["web_search"]},
                        {"name": "生成对比报告", "description": "汇总三个竞品的 SEO 策略数据，生成结构化对比分析报告", "dependencies": [0, 1, 2], "required_skills": ["report_generator"]},
                    ]),
                })()
        # 使用无可用 Skill 的场景触发 LLM 细化
        planner = GoalPlanner(llm_gateway=MockLLMGateway())
        plan = await planner.generate_plan(
            goal="调研 3 个竞品 SEO 策略并生成对比报告",
            available_skills=[],  # 无可用 Skill，触发 LLM 细化
        )
        # LLM 细化后步骤描述应更详细
        assert len(plan.steps) == 4
        assert plan.metadata.get("refined_by_llm") is True
        for step in plan.steps:
            assert len(step.description) >= 20
    @pytest.mark.asyncio
    async def test_llm_failure_falls_back_to_initial(self):
        """LLM 细化失败时回退到初始方案"""
        class FailingLLMGateway:
            async def chat(self, messages, model):
                raise RuntimeError("LLM unavailable")
        planner = GoalPlanner(llm_gateway=FailingLLMGateway())
        plan = await planner.generate_plan(
            goal="调研 3 个竞品 SEO 策略并生成对比报告",
            available_skills=["web_search"],
        )
        # 应回退到规则生成的初始方案
        assert len(plan.steps) == 4
        assert plan.metadata.get("refined_by_llm") is None
 class TestGoalPlannerBuildParallelGroups:
    """并行组构建"""
    def test_simple_parallel(self):
        planner = GoalPlanner()
        steps = [
            PlanStep(step_id="s0", name="A", description="A"),
            PlanStep(step_id="s1", name="B", description="B"),
            PlanStep(step_id="s2", name="C", description="C", dependencies=["s0", "s1"]),
        ]
        groups = planner._build_parallel_groups(steps)
        assert len(groups) == 2
        assert set(groups[0]) == {"s0", "s1"}
        assert groups[1] == ["s2"]
    def test_sequential_chain(self):
        planner = GoalPlanner()
        steps = [
            PlanStep(step_id="s0", name="A", description="A"),
            PlanStep(step_id="s1", name="B", description="B", dependencies=["s0"]),
            PlanStep(step_id="s2", name="C", description="C", dependencies=["s1"]),
        ]
        groups = planner._build_parallel_groups(steps)
        assert len(groups) == 3
        assert groups[0] == ["s0"]
        assert groups[1] == ["s1"]
        assert groups[2] == ["s2"]
    def test_max_parallel_limit(self):
        planner = GoalPlanner(max_parallel=2)
        steps = [
            PlanStep(step_id="s0", name="A", description="A"),
            PlanStep(step_id="s1", name="B", description="B"),
            PlanStep(step_id="s2", name="C", description="C"),
        ]
        groups = planner._build_parallel_groups(steps)
        # 最多 2 个并行
        assert len(groups[0]) <= 2
    def test_circular_dependency_handling(self):
        planner = GoalPlanner()
        steps = [
            PlanStep(step_id="s0", name="A", description="A", dependencies=["s1"]),
            PlanStep(step_id="s1", name="B", description="B", dependencies=["s0"]),
        ]
        groups = planner._build_parallel_groups(steps)
        # 循环依赖时将剩余步骤放入一组
        assert len(groups) == 1
        assert set(groups[0]) == {"s0", "s1"}