feat: autonomous task execution - connect PlanExecEngine + TeamOrchestrator

U1: TeamOrchestrator._execute_phase real execution (Expert.agent.execute)
U2: LLM-based merge strategies (BEST/VOTE/FUSION) with fallback
U3: ReActStepExecutor replacing _LLMStepAgent for tool-enabled steps
U4: SharedWorkspace integration for cross-phase/cross-execution state
U5: GoalPlanner prompt tuning with few-shot and verb pattern matching
U6: Replan-before-fallback in TeamOrchestrator
U7: End-to-end validation tests for multi-step research tasks
U8: WebSocket progress events (step_event_callback + new event types)

Code review fixes: P0 response.strip fix, P1 competitor status check,
milestone real impl, VOTE self-bias fix, confirmation_handler wiring,
ExpertTeam public API, DRY _build_result_summaries, replan tests

Also: geo_server.py refactor (ServerConfig.from_yaml), delete llm_config.yaml
This commit is contained in:
chiguyong 2026-06-15 12:41:32 +08:00
parent 0c63d813dc
commit 64d62a2b60
15 changed files with 2175 additions and 368 deletions

View File

@ -152,7 +152,7 @@ Config search: `--config` path > `./agentkit.yaml` > `~/.agentkit/agentkit.yaml`
## Conventions ## Conventions
- Skill configs: `configs/skills/*.yaml` (15 presets) - Skill configs: `configs/skills/*.yaml` (15 presets)
- LLM configs: `configs/llm_config.yaml` - LLM configs: `agentkit.yaml` llm section (unified with server config)
- Pipeline configs: `configs/pipelines/*.yaml` - Pipeline configs: `configs/pipelines/*.yaml`
- Expert templates: registered via `ExpertTemplateRegistry` - Expert templates: registered via `ExpertTemplateRegistry`
- All Pydantic models use `model_config = ConfigDict(...)` not `class Config` - All Pydantic models use `model_config = ConfigDict(...)` not `class Config`

View File

@ -1,23 +1,21 @@
"""GEO AgentKit Server 启动入口 """GEO AgentKit Server 启动入口
工厂函数 create_geo_app() 初始化 LLM GatewayTool RegistrySkill Registry 工厂函数 create_geo_app() 使用 agentkit.yaml 统一配置
然后创建 FastAPI 应用 初始化 LLM GatewayTool RegistrySkill Registry然后创建 FastAPI 应用
使用方式 使用方式
uvicorn configs.geo_server:create_geo_app --factory --host 0.0.0.0 --port 8001 uvicorn configs.geo_server:create_geo_app --factory --host 0.0.0.0 --port 8001
""" """
from __future__ import annotations
import logging import logging
import os import os
from agentkit.core.agent_pool import AgentPool from fastapi import FastAPI
from agentkit.llm.config import LLMConfig
from agentkit.llm.gateway import LLMGateway from agentkit.server.app import _build_llm_gateway, create_app
from agentkit.llm.providers.openai import OpenAICompatibleProvider from agentkit.server.config import ServerConfig
from agentkit.quality.gate import QualityGate
from agentkit.quality.output import OutputStandardizer
from agentkit.router.intent import IntentRouter
from agentkit.server.app import create_app
from agentkit.skills.loader import SkillLoader from agentkit.skills.loader import SkillLoader
from agentkit.skills.registry import SkillRegistry from agentkit.skills.registry import SkillRegistry
from agentkit.tools.registry import ToolRegistry from agentkit.tools.registry import ToolRegistry
@ -27,58 +25,25 @@ logger = logging.getLogger(__name__)
# ─── 配置路径 ─── # ─── 配置路径 ───
CONFIGS_DIR = os.path.dirname(os.path.abspath(__file__)) CONFIGS_DIR = os.path.dirname(os.path.abspath(__file__))
LLM_CONFIG_PATH = os.path.join(CONFIGS_DIR, "llm_config.yaml")
SKILLS_DIR = os.path.join(CONFIGS_DIR, "skills") SKILLS_DIR = os.path.join(CONFIGS_DIR, "skills")
# 查找 agentkit.yaml项目根目录 > configs 目录
def _substitute_env_vars(config_path: str) -> dict: _PROJECT_ROOT = os.path.dirname(CONFIGS_DIR)
"""加载 YAML 配置并替换 ${VAR} 环境变量""" _AGENTKIT_YAML = os.path.join(_PROJECT_ROOT, "agentkit.yaml")
import yaml
with open(config_path, encoding="utf-8") as f:
raw = f.read()
# 递归替换 ${VAR_NAME} 和 ${VAR_NAME:-default} 格式
import re
def _replace_env(match):
var_expr = match.group(1)
if ":-" in var_expr:
var_name, default = var_expr.split(":-", 1)
return os.getenv(var_name, default)
return os.getenv(var_expr, match.group(0))
resolved = re.sub(r"\$\{([^}]+)\}", _replace_env, raw)
return yaml.safe_load(resolved)
def _init_llm_gateway() -> LLMGateway: def _load_server_config() -> ServerConfig:
"""初始化 LLM Gateway 并注册 Provider""" """Load ServerConfig from agentkit.yaml with env var resolution."""
config_data = _substitute_env_vars(LLM_CONFIG_PATH) if os.path.isfile(_AGENTKIT_YAML):
config = LLMConfig.from_dict(config_data) return ServerConfig.from_yaml(_AGENTKIT_YAML)
raise FileNotFoundError(f"agentkit.yaml not found at {_AGENTKIT_YAML}")
gateway = LLMGateway(config)
for provider_name, pconf in config.providers.items():
if not pconf.api_key:
logger.warning(f"Skipping provider '{provider_name}': no API key")
continue
models = list(pconf.models.keys()) if pconf.models else []
default_model = models[0] if models else "gpt-4o-mini"
provider = OpenAICompatibleProvider(
api_key=pconf.api_key,
base_url=pconf.base_url,
default_model=default_model,
)
gateway.register_provider(provider_name, provider)
logger.info(f"Provider '{provider_name}' registered with model '{default_model}'")
return gateway
def _init_tool_registry() -> ToolRegistry: def _init_tool_registry() -> ToolRegistry:
"""初始化 Tool Registry 并注册 GEO Tools""" """初始化 Tool Registry 并注册 GEO Tools"""
registry = ToolRegistry() registry = ToolRegistry()
from configs.geo_tools import register_geo_tools from configs.geo_tools import register_geo_tools
register_geo_tools(registry) register_geo_tools(registry)
return registry return registry
@ -92,9 +57,10 @@ def _init_skill_registry(tool_registry: ToolRegistry) -> SkillRegistry:
return registry return registry
def create_geo_app() -> "FastAPI": def create_geo_app() -> FastAPI:
"""GEO AgentKit Server FastAPI 工厂函数""" """GEO AgentKit Server FastAPI 工厂函数"""
llm_gateway = _init_llm_gateway() config = _load_server_config()
llm_gateway = _build_llm_gateway(config)
tool_registry = _init_tool_registry() tool_registry = _init_tool_registry()
skill_registry = _init_skill_registry(tool_registry) skill_registry = _init_skill_registry(tool_registry)
@ -105,7 +71,9 @@ def create_geo_app() -> "FastAPI":
) )
app.title = "GEO AgentKit Server" app.title = "GEO AgentKit Server"
logger.info(f"GEO AgentKit Server initialized: {len(skill_registry.list_skills())} skills, " logger.info(
f"{len(tool_registry.list_tools())} tools") f"GEO AgentKit Server initialized: {len(skill_registry.list_skills())} skills, "
f"{len(tool_registry.list_tools())} tools"
)
return app return app

View File

@ -1,31 +0,0 @@
# LLM Provider 配置 — 仅 Docker/GEO 部署模式使用
# 标准 CLI 模式 (agentkit serve/gui/chat) 使用 agentkit.yaml 的 llm 段
# 环境变量替换:${VAR_NAME} 由 geo_server._substitute_env_vars() 处理
providers:
dashscope:
api_key: "${DASHSCOPE_API_KEY}"
base_url: "${DASHSCOPE_BASE_URL:-https://coding.dashscope.aliyuncs.com/v1}"
models:
qwen3-coder-plus:
max_tokens: 64000
cost_per_1k_input: 0.00014
cost_per_1k_output: 0.00028
qwen-plus:
max_tokens: 128000
cost_per_1k_input: 0.0008
cost_per_1k_output: 0.002
qwen3-max:
max_tokens: 128000
cost_per_1k_input: 0.002
cost_per_1k_output: 0.006
qwen-turbo:
max_tokens: 128000
cost_per_1k_input: 0.0003
cost_per_1k_output: 0.0006
model_aliases:
default: "dashscope/qwen3-coder-plus"
fast: "dashscope/qwen-turbo"
powerful: "dashscope/qwen3-max"
chat: "dashscope/qwen-plus"

View File

@ -0,0 +1,142 @@
---
date: 2026-06-15
topic: autonomous-task-execution
---
## Summary
打通 PlanExecEngine 和 TeamOrchestrator 的执行层,让 AgentKit 能真正自主拆解和执行多步任务。用户一句话描述复杂需求Agent 自主生成可执行计划、逐步调用工具、返回完整结果。
## Problem Frame
AgentKit 已有完整的任务规划框架骨架——ReAct/PlanExec/ReWOO/Reflexion 四种推理引擎、TeamOrchestrator 多 Agent 协作、PipelineEngine DAG 编排——但执行层未跑通。TeamOrchestrator 的 `_execute_phase` 返回模拟字符串PlanExecEngine 的步骤执行器功能简单SharedWorkspace 未集成到执行层。结果是用户提出复杂需求后Agent 只能简单对话回复,无法真正拆解执行。当前最需要的是把已有框架跑通,而不是新建能力。
---
## Key Decisions
**执行层对接优先于新能力建设。** 现有四种引擎和 TeamOrchestrator 框架已完整,最关键的差距是执行层模拟代码未替换为真实 Agent 调用。先跑通再迭代。
**验证场景选多步研究任务。** "帮我分析竞品并生成报告"这类任务天然需要搜索→分析→生成三步闭环,且不依赖本地开发环境,最能体现自主执行能力。
**步骤间状态通过 SharedWorkspace 传递。** PlanExecEngine 已有 `dependency_results` 机制但仅通过 prompt 注入需要结构化状态管理。SharedWorkspace 已存在但未在执行层集成。
---
## Requirements
**执行层打通**
- R1. TeamOrchestrator._execute_phase 调用 Agent.execute() 执行真实任务,替代当前返回模拟字符串的实现
- R2. PlanExecEngine._LLMStepAgent 升级为完整 ReAct 循环执行器,支持工具调用和多步推理
- R3. SharedWorkspace 集成到 PlanExecEngine 和 TeamOrchestrator 执行层,步骤间可读写结构化状态
- R4. GoalPlanner 的 LLM prompt 调优,确保任务分解质量(子任务可执行、依赖关系正确、无遗漏)
**执行闭环**
- R5. 每个执行步骤完成后,结果自动写入 SharedWorkspace 并通知下游依赖步骤
- R6. 步骤执行失败时PlanExecEngine 触发重规划(已有 PipelineReflector/PipelineReplanner需集成
- R7. TeamOrchestrator 的 COMPETITIVE_PARALLEL 模式下合并策略BEST/VOTE/FUSION从真实执行结果中选择
**验证场景**
- R8. 多步研究任务端到端验证:用户输入"分析 X 竞品并生成报告"Agent 自主拆解为搜索→分析→生成三步,逐步执行并返回完整报告
- R9. 执行过程通过 WebSocket 实时推送进度事件plan_created / step_started / step_completed / plan_completed
---
## Key Flows
- F1. 自主任务执行主流程
- **Trigger:** 用户提交复杂任务(复杂度 > 0.7,路由到 TEAM_COLLAB 或 SKILL_REACT
- **Actors:** 用户, CostAwareRouter, GoalPlanner, PlanExecEngine/TeamOrchestrator, SharedWorkspace
- **Steps:**
1. CostAwareRouter 路由到合适的执行模式
2. GoalPlanner 将目标分解为子任务计划(含依赖关系)
3. PlanExecEngine 按依赖拓扑执行子任务,每个子任务通过 ReAct 循环调用工具
4. 每步结果写入 SharedWorkspace触发下游就绪检查
5. 全部完成后综合结果返回用户
- **Outcome:** 用户收到完整的任务执行结果,过程可观测
- F2. 执行失败重规划
- **Trigger:** 某步骤执行失败(工具调用异常/LLM 返回无效结果/超时)
- **Actors:** PlanExecEngine, PipelineReflector, PipelineReplanner
- **Steps:**
1. PlanExecEngine 捕获步骤失败
2. PipelineReflector 分析失败原因
3. PipelineReplanner 生成修正后的计划片段
4. PlanExecEngine 用修正计划替换失败步骤,继续执行
- **Outcome:** 任务从失败中恢复,无需用户手动干预
---
## Acceptance Examples
- AE1. **多步研究任务成功执行**
- **Covers R1, R2, R3, R5, R8.**
- **Given:** 用户输入"分析飞书和钉钉的竞品对比并生成报告"
- **When:** Agent 自主执行搜索→分析→生成三步
- **Then:** 返回包含竞品对比分析的完整报告,每步结果可在 SharedWorkspace 中追溯
- AE2. **步骤失败自动重规划**
- **Covers R6.**
- **Given:** Agent 执行"搜索竞品信息"步骤时搜索工具返回空结果
- **When:** PlanExecEngine 触发重规划
- **Then:** Agent 调整策略(如换搜索关键词/换搜索工具),重新执行该步骤
- AE3. **团队竞争模式真实执行**
- **Covers R1, R7.**
- **Given:** 两个 Expert 竞争执行同一分析任务
- **When:** 各自独立执行并返回结果
- **Then:** Lead Expert 根据合并策略选择/融合最佳结果
---
## Success Criteria
- 多步研究任务端到端成功率 > 80%(任务完成且结果包含所有子步骤输出)
- 执行过程通过 WebSocket 实时可观测(每个步骤有 started/completed 事件)
- 步骤失败时自动重规划成功率 > 50%(至少一次重规划后任务完成)
---
## Scope Boundaries
**Deferred for later:**
- 执行持久化与断点恢复(集成 PipelineState 到 PlanExecEngine
- 自适应执行监控token 预算控制、耗时趋势、策略动态调整)
- 人机协作规划 UI用户实时调整计划
- 计划模板库(复用历史成功计划)
**Outside this scope:**
- 动态工具发现与运行时组合Agent 自主发现新工具)
- 跨任务长期记忆(任务间经验迁移)
- 多层级嵌套计划(子计划递归分解)
---
## Dependencies / Assumptions
- LLM Gateway 已配置且可用(至少一个 provider 有有效 API key
- 搜索工具WebSearchTool/BaiduSearchTool已注册且可用
- SharedWorkspace 数据结构已定义(需确认与现有 `expert_team.SharedWorkspace` 的兼容性)
- GoalPlanner 的 LLM 分解质量足够支撑多步任务(可能需要 prompt 迭代)
---
## Outstanding Questions
- **Resolve Before Planning:** SharedWorkspace 的现有实现(`expert_team.SharedWorkspace`)是否可直接复用于 PlanExecEngine还是需要新建
- **Deferred to Planning:** TeamOrchestrator 的 `_execute_phase` 如何与 Agent.execute() 的异步签名对接execute 是 async 方法,当前 _execute_phase 也是 async
---
## Sources / Research
- `src/agentkit/core/react.py` — ReActEngine 完整实现think-act-observe 循环
- `src/agentkit/core/plan_exec_engine.py` — PlanExecEngine含 _LLMStepAgent 和 dependency_results
- `src/agentkit/experts/orchestrator.py` — TeamOrchestrator_execute_phase 为模拟代码
- `src/agentkit/experts/team.py` — ExpertTeam 和 SharedWorkspace 定义
- `src/agentkit/orchestrator/pipeline_engine.py` — PipelineEngine含反思-重规划闭环
- `src/agentkit/orchestrator/reflection.py` — PipelineReflector / PipelineReplanner
- `src/agentkit/core/goal_planner.py` — GoalPlanner规则+LLM混合分解

View File

@ -0,0 +1,402 @@
---
date: 2026-06-15
status: active
origin: docs/brainstorms/2026-06-15-autonomous-task-execution-requirements.md
---
## Summary
打通 PlanExecEngine 和 TeamOrchestrator 的执行层,将模拟代码替换为真实的 Agent/ReActEngine 调用,集成 SharedWorkspace 实现步骤间状态传递,并添加 WebSocket 进度事件。用多步研究任务端到端验证闭环。
## Problem Frame
AgentKit 的任务规划框架骨架完整(四种推理引擎 + TeamOrchestrator + PipelineEngine但执行层未跑通`_execute_phase` 返回模拟字符串,`_LLMStepAgent` 只做单次 LLM 调用不支持工具SharedWorkspace 未集成到执行层。用户提出复杂需求后 Agent 无法真正拆解执行。本计划将已有框架跑通,而非新建能力。
---
## Key Technical Decisions
**KTD-1. _LLMStepAgent 替换为 ReActStepExecutor。** 现有 `_LLMStepAgent` 只做单次 `llm_gateway.chat()` 调用,不支持工具。新建 `ReActStepExecutor` 类,内部创建 `ReActEngine` 实例执行步骤,支持工具调用和多步推理。保留 `_LLMStepAgent` 作为无工具场景的轻量回退。
**KTD-2. SharedWorkspace 直接复用。** 现有 `SharedWorkspace``core/shared_workspace.py`)是通用 key-value 存储支持版本控制和分布式锁。PlanExecEngine 直接注入 SharedWorkspace 实例,步骤结果写入 `plan:{plan_id}:step:{step_id}:result`,无需新建状态管理。
**KTD-3. TeamOrchestrator 通过 Expert.agent.execute() 执行。** `Expert.agent``ConfigDrivenAgent` 实例,其 `execute(TaskMessage)` 是 final 方法,内部根据 execution_mode 选择 ReAct/PlanExec/ReWOO/Reflexion 引擎。直接调用即可,无需手动创建 ReActEngine。
**KTD-4. 进度事件通过 HandoffTransport -> WebSocket 桥接。** TeamOrchestrator 已通过 `_broadcast_event` 向 HandoffTransport 发送事件。在 Chat WebSocket handler 中注册 HandoffTransport handler将 team 事件转发为 WebSocket 消息。PlanExecEngine 的步骤事件通过回调函数注入。
---
## Requirements Trace
| R-ID | Implementation Units |
|------|---------------------|
| R1 | U1, U2 |
| R2 | U3 |
| R3 | U4 |
| R4 | U5 |
| R5 | U4 |
| R6 | U6 |
| R7 | U2 |
| R8 | U7 |
| R9 | U8 |
---
## High-Level Technical Design
```mermaid
flowchart TB
subgraph User Request
A[用户输入复杂任务] --> B[CostAwareRouter]
end
subgraph Routing
B -->|complexity > 0.7| C[TEAM_COLLAB]
B -->|0.3-0.7| D[SKILL_REACT / REACT]
end
subgraph PlanExecEngine Path
D --> E[GoalPlanner]
E --> F[ExecutionPlan]
F --> G[ReActStepExecutor]
G -->|per step| H[ReActEngine.execute]
H --> I[Tool Calls]
I --> J[SharedWorkspace.write]
J -->|next step| G
end
subgraph TeamOrchestrator Path
C --> K[ExpertTeam.form]
K --> L[CollaborationPlan]
L --> M[_execute_phase]
M -->|real call| N[Expert.agent.execute]
N --> O[TaskResult]
O --> P[_merge_results]
end
subgraph Events
H --> Q[StepEvent callback]
M --> R[HandoffTransport broadcast]
Q --> S[WebSocket emit]
R --> S
end
```
---
## Implementation Units
### U1. TeamOrchestrator._execute_phase 真实执行
**Goal:** 将 `_execute_phase` 从模拟代码改为调用 `Expert.agent.execute(TaskMessage)` 执行真实任务
**Requirements:** R1
**Dependencies:** None
**Files:**
- `src/agentkit/experts/orchestrator.py` — 修改 `_execute_phase``_run_competitor`
- `tests/unit/experts/test_orchestrator.py` — 新增/更新测试
**Approach:**
1. 在 `_execute_phase` 中,获取 `expert = self._team._experts.get(phase.assigned_expert)`
2. 构建 `TaskMessage``task_id=phase.phase_id`, `agent_name=expert.config.name`, `task_type="team_phase"`, `input_data={"phase_name": phase.name, "phase_description": phase.description, "team_id": self.team_id}`
3. 从 SharedWorkspace 读取前置阶段结果,注入 `input_data["dependency_results"]`
4. 调用 `result = await expert.agent.execute(task_msg)`
5. 处理 `TaskResult`:成功则写入 SharedWorkspace 并广播 `phase_completed`,失败则广播 `phase_failed`
6. 同样修改 `_run_competitor`,调用 `expert.agent.execute()` 替代模拟返回
**Patterns to follow:** `BaseAgent.execute()` 的 final 方法模式(`core/base.py``TaskMessage`/`TaskResult` 协议(`core/protocol.py`
**Test scenarios:**
- Happy path: _execute_phase 调用 expert.agent.execute() 并返回 TaskResult
- Expert not found: assigned_expert 不在 _experts 中时回退到 lead_expert
- Execution failure: agent.execute() 返回 FAILED 状态时广播 phase_failed
- Covers AE3: 两个 Expert 竞争执行,各自调用 agent.execute()
**Verification:** 单元测试通过mock Agent 返回 TaskResult验证 _execute_phase 正确处理成功/失败
---
### U2. TeamOrchestrator 合并策略从真实结果选择
**Goal:** COMPETITIVE_PARALLEL 模式下合并策略BEST/VOTE/FUSION从真实 TaskResult 中选择/融合
**Requirements:** R1, R7
**Dependencies:** U1
**Files:**
- `src/agentkit/experts/orchestrator.py` — 修改 `_merge_results`
- `tests/unit/experts/test_orchestrator.py`
**Approach:**
1. `_merge_results` 当前接收 `list[dict]`,改为接收 `list[tuple[Expert, TaskResult]]`
2. BEST 策略Lead Expert 的 LLM 评估各 TaskResult.output_data选择最佳
3. VOTE 策略:每个 Expert 的 LLM 对其他结果评分,最高分胜出
4. FUSION 策略Lead Expert 的 LLM 融合所有 output_data
5. 无 LLM Gateway 时回退到当前简化逻辑(选择第一个结果)
**Patterns to follow:** `PipelineReflector` 的 LLM 调用模式(`orchestrator/reflection.py`
**Test scenarios:**
- BEST: 3 个 Expert 结果Lead Expert 选择最佳
- VOTE: 3 个 Expert 结果,投票选择
- FUSION: 3 个 Expert 结果Lead Expert 融合
- No LLM Gateway: 回退到选择第一个结果
**Verification:** 单元测试验证三种合并策略从真实 TaskResult 中选择
---
### U3. ReActStepExecutor 替代 _LLMStepAgent
**Goal:** 新建 ReActStepExecutor内部使用 ReActEngine 执行步骤,支持工具调用和多步推理
**Requirements:** R2
**Dependencies:** None
**Files:**
- `src/agentkit/core/plan_exec_engine.py` — 新增 `ReActStepExecutor` 类,修改 `PlanExecutor` 使用新执行器
- `tests/unit/core/test_plan_exec_engine.py` — 新增测试
**Approach:**
1. 新建 `ReActStepExecutor` 类,构造函数接收 `llm_gateway`, `tools`, `max_steps=5`, `model="default"`, `system_prompt=None`
2. `execute(task_msg: TaskMessage) -> TaskResult` 方法:
- 从 `task_msg.input_data` 提取 `step_name`, `step_description`, `dependency_results`
- 构建 messages`[{"role": "user", "content": step_description}]`
- 如有 `dependency_results`,追加到 content
- 创建 `ReActEngine(llm_gateway, max_steps=max_steps)`
- 调用 `react_engine.execute(messages, tools, model, system_prompt)`
- 将 `ReActResult.output` 包装为 `TaskResult(output_data={"content": result.output, "steps": result.total_steps, "tokens": result.total_tokens})`
3. `PlanExecutor` 新增 `step_executor_type` 参数:`"react"`(默认)或 `"llm"`(回退到 _LLMStepAgent
4. `PlanExecutor._execute_step` 根据 `step_executor_type` 选择执行器
**Patterns to follow:** `ReActEngine.execute()` 的签名和返回值(`core/react.py``_LLMStepAgent` 的接口(`plan_exec_engine.py`
**Test scenarios:**
- Happy path: ReActStepExecutor 调用 ReActEngine返回包含工具调用结果的 TaskResult
- No tools: 无工具时回退到纯 LLM 调用
- Multi-step: ReActEngine 执行 3 步 think-act-observe 循环
- Tool failure: 工具调用异常时 ReActEngine 返回 partial status
**Verification:** 单元测试 mock ReActEngine验证 ReActStepExecutor 正确调用和包装结果
---
### U4. SharedWorkspace 集成到执行层
**Goal:** PlanExecEngine 和 TeamOrchestrator 通过 SharedWorkspace 传递步骤间状态
**Requirements:** R3, R5
**Dependencies:** U1, U3
**Files:**
- `src/agentkit/core/plan_exec_engine.py` — 注入 SharedWorkspace步骤结果写入/读取
- `src/agentkit/experts/orchestrator.py` — 阶段结果写入/读取 SharedWorkspace
- `tests/unit/core/test_plan_exec_engine.py`
- `tests/unit/experts/test_orchestrator.py`
**Approach:**
1. `PlanExecutor` 构造函数新增 `workspace: SharedWorkspace | None = None` 参数
2. 步骤完成后:`workspace.write(f"plan:{plan_id}:step:{step_id}:result", result_data, agent_id=step_id)`
3. 步骤执行前:从 workspace 读取依赖步骤结果,注入 `input_data["dependency_results"]`
4. `TeamOrchestrator` 构造函数新增 `workspace: SharedWorkspace | None = None`,默认使用 `team._workspace`
5. 阶段完成后写入 `workspace.write(f"team:{team_id}:phase:{phase_id}:result", ...)`
6. 阶段执行前读取前置阶段结果
**Patterns to follow:** `ExpertTeam._workspace` 的使用模式(`experts/team.py``SharedWorkspace.write/read` API`core/shared_workspace.py`
**Test scenarios:**
- PlanExecEngine: 步骤 A 完成后结果写入 workspace步骤 B 执行前从 workspace 读取
- TeamOrchestrator: 阶段 A 结果写入 workspace阶段 B 读取
- No workspace: workspace=None 时回退到原有 dependency_results 机制
- Concurrent write: 两个并行步骤同时写入 workspace版本号递增
**Verification:** 单元测试验证 workspace 读写和依赖传递
---
### U5. GoalPlanner prompt 调优
**Goal:** 提升 GoalPlanner 的任务分解质量,确保子任务可执行、依赖关系正确
**Requirements:** R4
**Dependencies:** None
**Files:**
- `src/agentkit/core/goal_planner.py` — 优化 LLM prompt 和规则分解逻辑
- `tests/unit/core/test_goal_planner.py`
**Approach:**
1. 优化 `_llm_decompose` 的 prompt明确要求输出 JSON 格式,包含 step_id/name/description/dependencies/required_tools 字段
2. 添加 few-shot 示例:展示"分析竞品并生成报告"的标准分解(搜索→分析→生成)
3. 规则分解增强:识别"搜索/查找/分析/生成/报告/对比"等常见任务动词,映射到标准步骤模板
4. 添加分解质量自检LLM 分解后,用第二次 LLM 调用验证步骤是否完整、依赖是否合理
5. 添加 `required_tools` 字段到 PlanStep指定步骤需要的工具如搜索步骤需要 web_search
**Patterns to follow:** 现有 `_rule_based_decompose``_llm_decompose` 模式
**Test scenarios:**
- "分析竞品并生成报告" → 3 步分解(搜索→分析→生成),依赖关系正确
- "搜索最新AI论文" → 1 步分解required_tools=["web_search"]
- "对比A和B的优缺点" → 2 步分解(分别搜索→对比分析)
- LLM 分解失败 → 回退到规则分解
**Verification:** 单元测试验证分解质量和依赖关系
---
### U6. PlanExecEngine 失败重规划集成
**Goal:** 步骤执行失败时,集成 PipelineReflector/PipelineReplanner 触发自动重规划
**Requirements:** R6
**Dependencies:** U3
**Files:**
- `src/agentkit/core/plan_exec_engine.py` — 修改 `_execute_plan` 失败处理逻辑
- `tests/unit/core/test_plan_exec_engine.py`
**Approach:**
1. `PlanExecutor` 已有 `_plan_to_pipeline` / `_pipeline_to_plan` 桥接方法plan_exec_engine.py 第549-664行
2. 在 `_execute_plan` 的步骤失败分支中:
- 调用 `reflector.reflect(pipeline, pipeline_result, replan_count)` 获取 ReflectionReport
- 调用 `replanner.replan(pipeline, pipeline_result, reflection_report)` 获取修正后的 Pipeline
- 将修正后的 Pipeline 转回 ExecutionPlan
- 用 `_merge_completed_results` 保留已完成步骤的结果
- 继续执行修正后的计划
3. 添加 `max_replan_attempts` 参数(默认 2超过后回退到单 Agent 模式
4. 广播 `replanning` 事件,包含失败原因和修正计划
**Patterns to follow:** `PipelineEngine` 的反思-重规划闭环(`orchestrator/pipeline_engine.py`),现有 `_plan_to_pipeline` 桥接
**Test scenarios:**
- Covers AE2: 搜索步骤失败 → Reflector 分析原因 → Replanner 生成修正计划 → 重新执行成功
- Max replan exceeded: 连续 2 次重规划仍失败 → 回退到单 Agent
- Partial completion: 3 步中第 2 步失败,重规划后保留第 1 步结果
**Verification:** 单元测试 mock Reflector/Replanner验证重规划流程
---
### U7. 多步研究任务端到端验证
**Goal:** 用"分析竞品并生成报告"场景验证完整闭环
**Requirements:** R8
**Dependencies:** U1, U3, U4, U5, U6
**Files:**
- `tests/integration/test_autonomous_research_task.py` — 新增集成测试
- `src/agentkit/core/plan_exec_engine.py` — 确保 ReActStepExecutor 与搜索工具集成
- `configs/skills/research.yaml` — 新增研究任务 Skill 配置
**Approach:**
1. 创建 `research` Skill 配置,绑定 `web_search` + `web_crawl` + `ask_human` 工具
2. 集成测试mock LLM Gateway 返回预设响应mock 搜索工具返回预设结果
3. 验证流程:用户输入 → GoalPlanner 分解 → PlanExecEngine 执行 → SharedWorkspace 状态传递 → 最终报告
4. 验证步骤间依赖:搜索步骤结果被分析步骤读取
5. 验证失败重规划:搜索工具返回空结果时触发重规划
**Test scenarios:**
- Covers AE1: "分析飞书和钉钉的竞品对比" → 搜索→分析→生成完整报告
- Tool integration: ReActStepExecutor 调用 web_search 工具
- Dependency chain: 搜索结果传递到分析步骤
- Failure recovery: 搜索失败 → 重规划 → 换关键词重新搜索
**Verification:** 集成测试通过,端到端输出包含搜索结果和分析报告
---
### U8. WebSocket 进度事件
**Goal:** 执行过程通过 WebSocket 实时推送进度事件
**Requirements:** R9
**Dependencies:** U1, U3
**Files:**
- `src/agentkit/server/routes/chat.py` — 注册 HandoffTransport handler转发 team 事件
- `src/agentkit/core/plan_exec_engine.py` — 添加 step_event_callback 参数
- `src/agentkit/server/routes/portal.py` — 添加 plan/step 事件类型
- `tests/unit/server/test_chat_ws_events.py` — 新增测试
**Approach:**
1. PlanExecEngine 新增 `step_event_callback: Callable[[str, dict], Awaitable[None]] | None` 参数
2. 步骤状态变更时调用 callback`plan_created`, `step_started`, `step_completed`, `step_failed`, `plan_completed`, `replanning`
3. Chat WebSocket handler 中,当 ExpertTeam 模式激活时,注册 HandoffTransport handler 将 team 事件转发为 WebSocket 消息
4. Portal WebSocket 添加新事件类型:`plan_step`(步骤进度)和 `plan_update`(计划变更)
5. 前端 `WsServerMessage` 类型添加 `plan_step``plan_update` 事件支持
**Patterns to follow:** 现有 `emit_team_event` 模式(`server/routes/chat.py`Portal WebSocket 事件格式
**Test scenarios:**
- PlanExecEngine: 步骤开始/完成时 callback 被调用,事件类型正确
- TeamOrchestrator: HandoffTransport 事件转发到 WebSocket
- Portal: plan_step 事件包含 step_id, step_name, status
- No callback: callback=None 时不影响执行
**Verification:** 单元测试验证事件回调被正确调用
---
## Scope Boundaries
**In scope:**
- 打通 PlanExecEngine 和 TeamOrchestrator 执行层
- SharedWorkspace 集成
- GoalPlanner prompt 调优
- 失败重规划集成
- WebSocket 进度事件
- 多步研究任务验证
**Deferred to follow-up work:**
- 执行持久化与断点恢复
- 自适应执行监控token 预算、耗时趋势)
- 人机协作规划 UI
- 计划模板库
- 前端进度可视化组件
**Outside this scope:**
- 动态工具发现与运行时组合
- 跨任务长期记忆
- 多层级嵌套计划
---
## Risks & Mitigations
| Risk | Impact | Mitigation |
|------|--------|-----------|
| ReActEngine 步骤级执行 token 消耗高 | 每步骤可能消耗大量 token | ReActStepExecutor 默认 max_steps=5限制循环次数 |
| GoalPlanner 分解质量不稳定 | 复杂任务可能分解不合理 | 添加分解质量自检 + few-shot 示例 |
| SharedWorkspace 并发写入冲突 | 并行步骤同时写入可能冲突 | SharedWorkspace 内置版本控制和分布式锁 |
| HandoffTransport -> WebSocket 桥接延迟 | 事件转发可能增加延迟 | InProcess 模式下延迟极低asyncio.Queue |
---
## Open Questions
- **Deferred to implementation:** ReActStepExecutor 的 system_prompt 是否需要根据步骤类型动态生成(如搜索步骤 vs 分析步骤)
- **Deferred to implementation:** 前端 WsServerMessage 类型更新是否需要同步修改 chat store 的事件处理逻辑
---
## Sources & Research
- `src/agentkit/core/react.py` — ReActEngine 完整实现
- `src/agentkit/core/plan_exec_engine.py` — PlanExecEngine 和 _LLMStepAgent
- `src/agentkit/experts/orchestrator.py` — TeamOrchestrator mock 实现
- `src/agentkit/experts/team.py` — ExpertTeam 和 SharedWorkspace
- `src/agentkit/core/shared_workspace.py` — SharedWorkspace API
- `src/agentkit/orchestrator/reflection.py` — PipelineReflector / PipelineReplanner
- `src/agentkit/core/goal_planner.py` — GoalPlanner
- `src/agentkit/core/protocol.py` — TaskMessage / TaskResult 协议
- `src/agentkit/server/routes/chat.py` — Chat WebSocket 和 emit_team_event

View File

@ -44,10 +44,13 @@ dev = [
"pytest-asyncio>=0.23", "pytest-asyncio>=0.23",
"pytest-cov>=5.0", "pytest-cov>=5.0",
"pytest-httpx>=0.30", "pytest-httpx>=0.30",
"pytest-timeout>=2.2",
"pytest-html>=4.1",
"testcontainers[postgres,redis]>=4.0", "testcontainers[postgres,redis]>=4.0",
"ruff>=0.4", "ruff>=0.4",
"fastapi>=0.110", "fastapi>=0.110",
"uvicorn>=0.27", "uvicorn>=0.27",
"websockets>=12.0",
] ]
[tool.setuptools.packages.find] [tool.setuptools.packages.find]
@ -60,6 +63,9 @@ markers = [
"integration: mark test as integration test (requires docker)", "integration: mark test as integration test (requires docker)",
"redis: mark test as requiring Redis", "redis: mark test as requiring Redis",
"postgres: mark test as requiring PostgreSQL", "postgres: mark test as requiring PostgreSQL",
"e2e: end-to-end backtest (requires server)",
"e2e_basic: basic function correctness test",
"e2e_capability: agent intelligence capability test",
] ]
[tool.ruff] [tool.ruff]

File diff suppressed because it is too large Load Diff

View File

@ -21,7 +21,6 @@ from typing import Any
from agentkit.core.plan_schema import ( from agentkit.core.plan_schema import (
ExecutionPlan, ExecutionPlan,
PlanStep, PlanStep,
PlanStepStatus,
SkillGap, SkillGap,
SkillGapLevel, SkillGapLevel,
) )
@ -268,7 +267,62 @@ class GoalPlanner:
goal: str, goal: str,
available_skills: list[str], available_skills: list[str],
) -> list[PlanStep]: ) -> list[PlanStep]:
"""分解简单目标为单步计划""" """分解简单目标
尝试通过常见任务动词模式识别多步骤结构
如果无法识别则回退到单步计划
"""
# 常见多步骤任务模式
task_patterns: list[tuple[list[str], str]] = [
# (verb_patterns, task_type)
(["搜索", "查找", "调研", "search", "find", "research"], "research"),
(["分析", "对比", "比较", "analyze", "compare"], "analysis"),
(["生成", "撰写", "", "报告", "generate", "write", "report"], "generation"),
(["部署", "发布", "deploy", "release"], "deployment"),
(["测试", "验证", "test", "verify"], "testing"),
]
# 检查目标是否包含多个阶段的动词
matched_phases: list[tuple[str, str]] = [] # (verb, phase_type)
for verbs, phase_type in task_patterns:
for verb in verbs:
if verb in goal.lower():
matched_phases.append((verb, phase_type))
break
# 如果匹配到 2+ 个不同阶段,生成多步计划
unique_phases = list(dict.fromkeys(pt for _, pt in matched_phases))
if len(unique_phases) >= 2:
steps: list[PlanStep] = []
for i, phase_type in enumerate(unique_phases):
phase_names = {
"research": "搜索调研",
"analysis": "分析处理",
"generation": "生成输出",
"deployment": "部署执行",
"testing": "测试验证",
}
phase_descs = {
"research": "搜索和收集相关信息",
"analysis": "分析收集到的信息,提取关键洞察",
"generation": "基于分析结果生成最终输出",
"deployment": "执行部署操作",
"testing": "验证执行结果",
}
required_skills = self._infer_required_skills(
phase_descs.get(phase_type, phase_type), available_skills
)
steps.append(PlanStep(
step_id=f"step-{i}",
name=phase_names.get(phase_type, f"Phase {i}"),
description=phase_descs.get(phase_type, f"Execute {phase_type} phase"),
dependencies=[f"step-{i - 1}"] if i > 0 else [],
parallel_group=i,
required_skills=required_skills,
))
return steps
# 回退到单步计划
required_skills = self._infer_required_skills(goal, available_skills) required_skills = self._infer_required_skills(goal, available_skills)
return [ return [
PlanStep( PlanStep(
@ -386,17 +440,31 @@ class GoalPlanner:
skills_str = ", ".join(available_skills) if available_skills else "" skills_str = ", ".join(available_skills) if available_skills else ""
prompt = ( prompt = (
f"Refine the following execution plan for the given goal.\n\n" "You are a task decomposition expert. Break down the given goal into a structured "
"execution plan with clear, actionable steps.\n\n"
f"Goal: {goal}\n\n" f"Goal: {goal}\n\n"
f"Initial Plan (generated by rules):\n{initial_summary}\n\n" f"Initial Plan (generated by rules, may need improvement):\n{initial_summary}\n\n"
f"Available Skills: {skills_str}\n\n" f"Available Skills/Tools: {skills_str}\n\n"
f"Context: {json.dumps(context, ensure_ascii=False) if context else 'None'}\n\n" f"Context: {json.dumps(context, ensure_ascii=False) if context else 'None'}\n\n"
'Respond ONLY with a JSON array of steps: ' "## Requirements for each step:\n"
'[{"name": "...", "description": "...", "dependencies": [], ' "- name: Short descriptive name (5-10 words)\n"
'"required_skills": [...]}]\n' "- description: Detailed description of what to do (at least 20 characters)\n"
"The dependencies field lists step indices (0-based) that must complete first.\n" "- dependencies: List of step indices (0-based) that must complete before this step\n"
"Each step should have a clear, specific description (at least 20 characters).\n" "- required_tools: List of tool/skill names from Available Skills that this step needs\n\n"
"Do not include any other text." "## Example (goal: 'Analyze competitor products and generate report'):\n"
'[\n'
' {"name": "Search competitor info", "description": "Search the web for detailed '
'information about each competitor product, including features, pricing, and reviews", '
'"dependencies": [], "required_tools": ["web_search"]},\n'
' {"name": "Analyze and compare", "description": "Analyze the gathered information, '
'identify key differences, strengths and weaknesses of each competitor", '
'"dependencies": [0], "required_tools": []},\n'
' {"name": "Generate comparison report", "description": "Compile the analysis into '
'a structured comparison report with recommendations", "dependencies": [1], '
'"required_tools": []}\n'
']\n\n'
"Respond ONLY with a JSON array of steps in the same format. "
"Do not include any other text or markdown."
) )
try: try:
@ -412,13 +480,15 @@ class GoalPlanner:
steps: list[PlanStep] = [] steps: list[PlanStep] = []
for i, defn in enumerate(step_defs): for i, defn in enumerate(step_defs):
depends_on = [f"step-{j}" for j in defn.get("dependencies", [])] depends_on = [f"step-{j}" for j in defn.get("dependencies", [])]
# Support both required_tools (new) and required_skills (legacy)
required = defn.get("required_tools", defn.get("required_skills", []))
steps.append(PlanStep( steps.append(PlanStep(
step_id=f"step-{i}", step_id=f"step-{i}",
name=defn.get("name", f"Step {i}"), name=defn.get("name", f"Step {i}"),
description=defn.get("description", ""), description=defn.get("description", ""),
dependencies=depends_on, dependencies=depends_on,
parallel_group=0, # 后续由 _build_parallel_groups 重新计算 parallel_group=0, # 后续由 _build_parallel_groups 重新计算
required_skills=defn.get("required_skills", []), required_skills=required,
)) ))
return ExecutionPlan( return ExecutionPlan(

View File

@ -14,22 +14,22 @@ from __future__ import annotations
import asyncio import asyncio
import json import json
import logging import logging
import time
from dataclasses import dataclass, field from dataclasses import dataclass, field
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import TYPE_CHECKING, Any from typing import TYPE_CHECKING, Any, Awaitable, Callable
from agentkit.core.exceptions import TaskCancelledError, TaskTimeoutError from agentkit.core.exceptions import TaskCancelledError, TaskTimeoutError
from agentkit.core.goal_planner import GoalPlanner from agentkit.core.goal_planner import GoalPlanner
from agentkit.core.plan_executor import PlanExecutor, PlanExecutionResult, StepExecutionResult from agentkit.core.plan_executor import PlanExecutor, PlanExecutionResult
from agentkit.core.plan_schema import ExecutionPlan, PlanStep, PlanStepStatus from agentkit.core.plan_schema import ExecutionPlan, PlanStep, PlanStepStatus
from agentkit.core.protocol import CancellationToken, TaskMessage, TaskResult, TaskStatus from agentkit.core.protocol import CancellationToken, TaskMessage, TaskResult, TaskStatus
from agentkit.core.react import ReActEvent, ReActResult, ReActStep from agentkit.core.react import ReActEvent, ReActResult, ReActStep
from agentkit.core.shared_workspace import SharedWorkspace
from agentkit.orchestrator.reflection import PipelineReflector, PipelineReplanner from agentkit.orchestrator.reflection import PipelineReflector, PipelineReplanner
from agentkit.orchestrator.pipeline_schema import Pipeline, PipelineResult, ReflectionReport, StageResult, StageStatus from agentkit.orchestrator.pipeline_schema import Pipeline, PipelineResult, StageResult, StageStatus
if TYPE_CHECKING: if TYPE_CHECKING:
from agentkit.core.compressor import CompressionStrategy, ContextCompressor from agentkit.core.compressor import CompressionStrategy
from agentkit.core.trace import TraceRecorder from agentkit.core.trace import TraceRecorder
from agentkit.memory.retriever import MemoryRetriever from agentkit.memory.retriever import MemoryRetriever
from agentkit.llm.gateway import LLMGateway from agentkit.llm.gateway import LLMGateway
@ -71,16 +71,23 @@ class PlanExecEngine:
llm_gateway: "LLMGateway | None" = None, llm_gateway: "LLMGateway | None" = None,
max_replans: int = _DEFAULT_MAX_REPLANS, max_replans: int = _DEFAULT_MAX_REPLANS,
default_timeout: float = 300.0, default_timeout: float = 300.0,
workspace: SharedWorkspace | None = None,
step_event_callback: "Callable[[str, dict[str, Any]], Awaitable[None]] | None" = None,
): ):
""" """
Args: Args:
llm_gateway: LLM Gateway传递给 GoalPlanner / PipelineReplanner llm_gateway: LLM Gateway传递给 GoalPlanner / PipelineReplanner
max_replans: 最大重规划次数 max_replans: 最大重规划次数
default_timeout: 默认超时秒数 default_timeout: 默认超时秒数
workspace: SharedWorkspace 实例用于步骤间状态传递
step_event_callback: 步骤事件回调用于非流式执行时推送进度
""" """
self._llm_gateway = llm_gateway self._llm_gateway = llm_gateway
self._max_replans = max_replans self._max_replans = max_replans
self._default_timeout = default_timeout self._default_timeout = default_timeout
self._workspace = workspace
self._step_event_callback = step_event_callback
self._confirmation_handler: Any | None = None
# 组合子组件 # 组合子组件
self._planner = GoalPlanner(llm_gateway=llm_gateway) self._planner = GoalPlanner(llm_gateway=llm_gateway)
@ -106,6 +113,7 @@ class PlanExecEngine:
retrieval_config: dict[str, Any] | None = None, retrieval_config: dict[str, Any] | None = None,
cancellation_token: CancellationToken | None = None, cancellation_token: CancellationToken | None = None,
timeout_seconds: float | None = None, timeout_seconds: float | None = None,
confirmation_handler: Any | None = None,
) -> ReActResult: ) -> ReActResult:
"""执行 Plan-and-Execute 流程 """执行 Plan-and-Execute 流程
@ -113,6 +121,7 @@ class PlanExecEngine:
2. Executor Phase: 逐步执行 2. Executor Phase: 逐步执行
3. Replanner Phase: 失败时重规划 3. Replanner Phase: 失败时重规划
""" """
self._confirmation_handler = confirmation_handler
effective_timeout = timeout_seconds if timeout_seconds is not None else self._default_timeout effective_timeout = timeout_seconds if timeout_seconds is not None else self._default_timeout
try: try:
@ -174,6 +183,7 @@ class PlanExecEngine:
retrieval_config: dict[str, Any] | None = None, retrieval_config: dict[str, Any] | None = None,
cancellation_token: CancellationToken | None = None, cancellation_token: CancellationToken | None = None,
timeout_seconds: float | None = None, timeout_seconds: float | None = None,
confirmation_handler: Any | None = None,
): ):
"""执行 Plan-and-Execute 流程,逐步 yield ReActEvent """执行 Plan-and-Execute 流程,逐步 yield ReActEvent
@ -185,6 +195,7 @@ class PlanExecEngine:
- "replanning": 触发重规划 - "replanning": 触发重规划
- "final_answer": 最终结果 - "final_answer": 最终结果
""" """
self._confirmation_handler = confirmation_handler
# Memory retrieval # Memory retrieval
if memory_retriever: if memory_retriever:
try: try:
@ -274,6 +285,16 @@ class PlanExecEngine:
plan_result = await executor.execute(current_plan, task_msg) plan_result = await executor.execute(current_plan, task_msg)
# Write step results to workspace for cross-execution state sharing
if self._workspace:
for sid, step_result in plan_result.step_results.items():
if step_result.status == PlanStepStatus.COMPLETED and step_result.result:
await self._workspace.write(
f"plan:{current_plan.plan_id}:step:{sid}:result",
step_result.result,
agent_id=agent_name or "plan_exec",
)
# 将步骤结果映射到 trajectory 并 yield 事件 # 将步骤结果映射到 trajectory 并 yield 事件
for sid, step_result in plan_result.step_results.items(): for sid, step_result in plan_result.step_results.items():
plan_step = current_plan.get_step(sid) plan_step = current_plan.get_step(sid)
@ -470,6 +491,17 @@ class PlanExecEngine:
available_skills=available_skills, available_skills=available_skills,
) )
# Emit plan_generated event
if self._step_event_callback:
try:
await self._step_event_callback("plan_generated", {
"plan_id": plan.plan_id,
"goal": plan.goal,
"steps": [s.to_dict() for s in plan.steps],
})
except Exception as e:
logger.warning(f"Step event callback failed: {e}")
trajectory.append(ReActStep( trajectory.append(ReActStep(
step=1, step=1,
action="plan_generated", action="plan_generated",
@ -591,6 +623,16 @@ class PlanExecEngine:
plan_result = await executor.execute(current_plan, task_msg) plan_result = await executor.execute(current_plan, task_msg)
# Write step results to workspace for cross-execution state sharing
if self._workspace:
for sid, step_result in plan_result.step_results.items():
if step_result.status == PlanStepStatus.COMPLETED and step_result.result:
await self._workspace.write(
f"plan:{current_plan.plan_id}:step:{sid}:result",
step_result.result,
agent_id=agent_name or "plan_exec",
)
# 将步骤结果映射到 trajectory # 将步骤结果映射到 trajectory
for sid, step_result in plan_result.step_results.items(): for sid, step_result in plan_result.step_results.items():
plan_step = current_plan.get_step(sid) plan_step = current_plan.get_step(sid)
@ -603,6 +645,20 @@ class PlanExecEngine:
tokens=0, tokens=0,
)) ))
# Emit step event callback
if self._step_event_callback:
event_type = "step_completed" if step_result.status == PlanStepStatus.COMPLETED else "step_failed"
try:
await self._step_event_callback(event_type, {
"step_id": sid,
"step_name": step_name,
"status": step_result.status.value,
"result": step_result.result,
"error": step_result.error,
})
except Exception as e:
logger.warning(f"Step event callback failed: {e}")
if trace_recorder is not None: if trace_recorder is not None:
trace_recorder.record_step( trace_recorder.record_step(
step=len(trajectory), step=len(trajectory),
@ -640,6 +696,17 @@ class PlanExecEngine:
# 保留已完成步骤的结果到新计划 # 保留已完成步骤的结果到新计划
self._merge_completed_results(current_plan, plan_result) self._merge_completed_results(current_plan, plan_result)
# Emit replanning event
if self._step_event_callback:
try:
await self._step_event_callback("replanning", {
"replan_count": replan_count,
"root_cause": reflection_report.root_cause,
"new_plan_id": current_plan.plan_id,
})
except Exception as e:
logger.warning(f"Step event callback failed: {e}")
trajectory.append(ReActStep( trajectory.append(ReActStep(
step=len(trajectory) + 1, step=len(trajectory) + 1,
action="replanning", action="replanning",
@ -712,18 +779,31 @@ class PlanExecEngine:
model: str, model: str,
system_prompt: str | None, system_prompt: str | None,
tools: list["Tool"] | None, tools: list["Tool"] | None,
step_executor_type: str = "react",
) -> PlanExecutor: ) -> PlanExecutor:
"""创建 PlanExecutor 实例 """创建 PlanExecutor 实例
使用 _LLMStepExecutor 作为 agent_pool使每个步骤通过 LLM 直接调用执行 Args:
step_executor_type: "react" 使用 ReActStepExecutor默认支持工具调用
"llm" 使用 _LLMStepExecutor LLM 调用无工具
""" """
step_executor = _LLMStepExecutor( if step_executor_type == "llm":
llm_gateway=self._llm_gateway, step_executor: _LLMStepExecutor | ReActStepExecutor = _LLMStepExecutor(
messages=messages, llm_gateway=self._llm_gateway,
model=model, messages=messages,
system_prompt=system_prompt, model=model,
tools=tools, system_prompt=system_prompt,
) tools=tools,
)
else:
step_executor = ReActStepExecutor(
llm_gateway=self._llm_gateway,
messages=messages,
model=model,
system_prompt=system_prompt,
tools=tools,
confirmation_handler=self._confirmation_handler,
)
return PlanExecutor( return PlanExecutor(
agent_pool=step_executor, agent_pool=step_executor,
max_retries=1, max_retries=1,
@ -845,7 +925,7 @@ class PlanExecEngine:
name = plan_step.name if plan_step else sid name = plan_step.name if plan_step else sid
failed_info.append(f"- {name}: {sr.error if sr else 'unknown error'}") failed_info.append(f"- {name}: {sr.error if sr else 'unknown error'}")
if failed_info: if failed_info:
return f"Plan execution failed.\nFailed steps:\n" + "\n".join(failed_info) return "Plan execution failed.\nFailed steps:\n" + "\n".join(failed_info)
return "Plan execution completed with no output." return "Plan execution completed with no output."
# 简单聚合:将所有成功步骤结果格式化 # 简单聚合:将所有成功步骤结果格式化
@ -909,6 +989,151 @@ class _LLMStepExecutor:
return agent return agent
class ReActStepExecutor:
"""ReAct 循环步骤执行器
使用 ReActEngine 执行每个 PlanStep支持工具调用和多步推理
作为 PlanExecutor agent_pool 替代品
"""
def __init__(
self,
llm_gateway: "LLMGateway | None" = None,
messages: list[dict[str, str]] | None = None,
model: str = "default",
system_prompt: str | None = None,
tools: list["Tool"] | None = None,
max_steps: int = 5,
confirmation_handler: Any | None = None,
):
self._llm_gateway = llm_gateway
self._messages = messages or []
self._model = model
self._system_prompt = system_prompt
self._tools = tools or []
self._max_steps = max_steps
self._confirmation_handler = confirmation_handler
self._agents: dict[str, _ReActStepAgent] = {}
async def create_agent_from_skill(self, skill_name: str):
"""创建 ReAct 步骤 Agent"""
agent = _ReActStepAgent(
name=skill_name,
llm_gateway=self._llm_gateway,
messages=self._messages,
model=self._model,
system_prompt=self._system_prompt,
tools=self._tools,
max_steps=self._max_steps,
confirmation_handler=self._confirmation_handler,
)
self._agents[skill_name] = agent
return agent
def get_agent(self, key: str):
"""获取已创建的 Agent"""
if key in self._agents:
return self._agents[key]
agent = _ReActStepAgent(
name=key,
llm_gateway=self._llm_gateway,
messages=self._messages,
model=self._model,
system_prompt=self._system_prompt,
tools=self._tools,
max_steps=self._max_steps,
)
self._agents[key] = agent
return agent
class _ReActStepAgent:
"""ReAct 循环步骤 Agent
PlanStep 的描述作为任务交给 ReActEngine 执行
支持工具调用和多步 think-act-observe 循环
"""
def __init__(
self,
name: str,
llm_gateway: "LLMGateway | None" = None,
messages: list[dict[str, str]] | None = None,
model: str = "default",
system_prompt: str | None = None,
tools: list["Tool"] | None = None,
max_steps: int = 5,
confirmation_handler: Any | None = None,
):
self.name = name
self._llm_gateway = llm_gateway
self._messages = messages or []
self._model = model
self._system_prompt = system_prompt
self._tools = tools or []
self._max_steps = max_steps
self._confirmation_handler = confirmation_handler
async def execute(self, task_msg: TaskMessage) -> "TaskResult":
"""执行步骤:通过 ReActEngine 循环调用"""
if self._llm_gateway is None:
raise RuntimeError(f"No LLM gateway available for step '{task_msg.task_id}'")
from agentkit.core.react import ReActEngine
input_data = task_msg.input_data
step_name = input_data.get("step_name", task_msg.task_id)
step_description = input_data.get("step_description", "")
dep_results = input_data.get("dependency_results", {})
# 构建步骤 prompt
prompt_parts = [f"Execute the following task step:\n\nStep: {step_name}\nDescription: {step_description}"]
if dep_results:
prompt_parts.append(
f"\nResults from previous steps:\n{json.dumps(dep_results, ensure_ascii=False, indent=2)}"
)
prompt_parts.append("\nProvide a clear, structured result for this step.")
# 构建 ReActEngine
engine = ReActEngine(
llm_gateway=self._llm_gateway,
max_steps=self._max_steps,
)
# 构建 messages
step_messages: list[dict[str, str]] = list(self._messages)
step_messages.append({"role": "user", "content": "\n".join(prompt_parts)})
# 执行 ReAct 循环
react_result = await engine.execute(
messages=step_messages,
tools=self._tools if self._tools else None,
model=self._model,
agent_name=self.name,
system_prompt=self._system_prompt,
confirmation_handler=self._confirmation_handler,
)
now = datetime.now(timezone.utc)
status = TaskStatus.COMPLETED.value
if react_result.status in ("timeout", "cancelled"):
status = TaskStatus.FAILED.value
return TaskResult(
task_id=task_msg.task_id,
agent_name=self.name,
status=status,
output_data={
"content": react_result.output,
"steps": react_result.total_steps,
"tokens": react_result.total_tokens,
},
error_message=None if react_result.status == "success" else react_result.status,
started_at=now,
completed_at=now,
)
class _LLMStepAgent: class _LLMStepAgent:
"""LLM 直接调用步骤 Agent """LLM 直接调用步骤 Agent

View File

@ -12,8 +12,12 @@ from __future__ import annotations
import asyncio import asyncio
import logging import logging
from datetime import datetime, timezone
from typing import Any from typing import Any
from agentkit.core.protocol import TaskMessage, TaskResult, TaskStatus
from agentkit.core.shared_workspace import SharedWorkspace
from .expert import Expert from .expert import Expert
from .plan import ( from .plan import (
CollaborationPlan, CollaborationPlan,
@ -33,10 +37,18 @@ class TeamOrchestrator:
MAX_RETRIES = 1 # Retry once on failure before fallback MAX_RETRIES = 1 # Retry once on failure before fallback
MAX_INTERACTION_ROUNDS = 20 # Prevent infinite collaboration loops MAX_INTERACTION_ROUNDS = 20 # Prevent infinite collaboration loops
MAX_REPLANS = 2 # Maximum replanning attempts before fallback
def __init__(self, team: ExpertTeam) -> None: def __init__(
self,
team: ExpertTeam,
workspace: "SharedWorkspace | None" = None,
max_replans: int = 2,
) -> None:
self._team = team self._team = team
self._workspace = workspace or team._workspace
self._interaction_count = 0 self._interaction_count = 0
self._max_replans = max_replans
async def execute_plan(self, plan: CollaborationPlan) -> dict[str, Any]: async def execute_plan(self, plan: CollaborationPlan) -> dict[str, Any]:
"""Execute a CollaborationPlan within the team. """Execute a CollaborationPlan within the team.
@ -58,11 +70,12 @@ class TeamOrchestrator:
} }
plan.status = PlanStatus.EXECUTING plan.status = PlanStatus.EXECUTING
self._team._status = TeamStatus.EXECUTING self._team.set_status(TeamStatus.EXECUTING)
self._interaction_count = 0 # Reset for each plan execution self._interaction_count = 0 # Reset for each plan execution
phase_results: dict[str, dict[str, Any]] = {} phase_results: dict[str, dict[str, Any]] = {}
retry_counts: dict[str, int] = {} # Per-phase retry tracking retry_counts: dict[str, int] = {} # Per-phase retry tracking
replan_count = 0
try: try:
while True: while True:
@ -144,13 +157,33 @@ class TeamOrchestrator:
result = await self._execute_phase(phase, plan, phase_results) result = await self._execute_phase(phase, plan, phase_results)
if result is None: if result is None:
# Still failed after retry — fallback to single agent # Still failed after retry — try replanning before fallback
logger.warning( if replan_count < self._max_replans:
f"Phase {phase.id} failed after retry, falling back to single agent" replan_count += 1
) logger.info(
return await self._fallback_to_single_agent( f"Phase {phase.id} failed after retry, "
plan, phase_results f"attempting replan ({replan_count}/{self._max_replans})"
) )
await self._broadcast_event(
"replanning",
{
"phase_id": phase.id,
"replan_count": replan_count,
"reason": "phase_failed",
},
)
# Reset phase status for replan
plan.update_phase_status(phase.id, PhaseStatus.PENDING)
result = await self._execute_phase(phase, plan, phase_results)
if result is None:
# Still failed after replan — fallback to single agent
logger.warning(
f"Phase {phase.id} failed after replan, falling back to single agent"
)
return await self._fallback_to_single_agent(
plan, phase_results
)
phase_results[phase.id] = result phase_results[phase.id] = result
@ -205,11 +238,11 @@ class TeamOrchestrator:
# Synthesize final result # Synthesize final result
plan.status = PlanStatus.COMPLETED plan.status = PlanStatus.COMPLETED
self._team._status = TeamStatus.SYNTHESIZING self._team.set_status(TeamStatus.SYNTHESIZING)
final_result = await self._synthesize_results(plan, phase_results) final_result = await self._synthesize_results(plan, phase_results)
self._team._status = TeamStatus.COMPLETED self._team.set_status(TeamStatus.COMPLETED)
return { return {
"status": "completed", "status": "completed",
"result": final_result, "result": final_result,
@ -247,18 +280,75 @@ class TeamOrchestrator:
) )
# Get the assigned expert # Get the assigned expert
expert = self._team._experts.get(phase.assigned_expert) expert = self._team.get_expert(phase.assigned_expert)
if not expert or not expert.is_active: if not expert or not expert.is_active:
raise RuntimeError( # Fallback to lead expert or first active expert
f"Expert '{phase.assigned_expert}' not available" expert = self._team.lead_expert
if not expert or not expert.is_active:
active = self._team.active_experts
if not active:
raise RuntimeError(
f"Expert '{phase.assigned_expert}' not available and no active fallback"
)
expert = active[0]
logger.warning(
f"Expert '{phase.assigned_expert}' not available, "
f"falling back to '{expert.config.name}'"
) )
# Execute the task via the expert's agent # Build TaskMessage for real execution
# In a real implementation, this would call expert.agent.execute(task) input_data: dict[str, Any] = {
# For now, we simulate by having the expert process the task "phase_name": phase.name,
result: dict[str, Any] = { "phase_description": phase.task_description or phase.name,
"output": f"Phase '{phase.name}' completed by {phase.assigned_expert}" "team_id": self._team.team_id,
} }
# Inject dependency results from previous phases
if phase.depends_on:
dep_results: dict[str, dict[str, Any]] = {}
for dep_id in phase.depends_on:
# Try workspace first, then fall back to in-memory phase_results
if self._workspace:
ws_data = await self._workspace.read(
f"team:{self._team.team_id}:phase:{dep_id}:result"
)
if ws_data:
dep_results[dep_id] = ws_data.get("value", {})
continue
if dep_id in phase_results:
dep_results[dep_id] = phase_results[dep_id]
if dep_results:
input_data["dependency_results"] = dep_results
task_msg = TaskMessage(
task_id=phase.id,
agent_name=expert.config.name,
task_type="team_phase",
priority=0,
input_data=input_data,
callback_url=None,
created_at=datetime.now(timezone.utc),
)
# Execute the task via the expert's agent
task_result: TaskResult = await expert.agent.execute(task_msg)
if task_result.status != TaskStatus.COMPLETED.value:
raise RuntimeError(
f"Agent execution failed: {task_result.error_message or 'unknown error'}"
)
result = task_result.output_data or {"content": ""}
# Write result to workspace for cross-phase state sharing
if self._workspace:
try:
await self._workspace.write(
f"team:{self._team.team_id}:phase:{phase.id}:result",
result,
agent_id=expert.config.name,
)
except Exception as e:
logger.warning(f"Workspace write failed for phase {phase.id}: {e}")
# Check milestone # Check milestone
if phase.milestone: if phase.milestone:
@ -334,60 +424,268 @@ class TeamOrchestrator:
self, expert: Expert, phase: PlanPhase self, expert: Expert, phase: PlanPhase
) -> dict[str, Any]: ) -> dict[str, Any]:
"""Run a single competitor for a competitive phase.""" """Run a single competitor for a competitive phase."""
# Simulate expert execution # Build TaskMessage for real execution
task_msg = TaskMessage(
task_id=f"{phase.id}_{expert.config.name}",
agent_name=expert.config.name,
task_type="team_competitive",
priority=0,
input_data={
"phase_name": phase.name,
"phase_description": phase.task_description or phase.name,
"team_id": self._team.team_id,
},
callback_url=None,
created_at=datetime.now(timezone.utc),
)
task_result: TaskResult = await expert.agent.execute(task_msg)
if task_result.status != TaskStatus.COMPLETED.value:
raise RuntimeError(
f"Competitor {expert.config.name} failed: {task_result.error_message or 'unknown'}"
)
return { return {
"expert": expert.config.name, "expert": expert.config.name,
"output": f"Competitive result from {expert.config.name}", "output": task_result.output_data or {},
"status": task_result.status,
} }
def _get_llm_gateway(self) -> Any:
"""Get LLM gateway from the lead expert's agent."""
lead = self._team.lead_expert
if lead and hasattr(lead, "agent") and hasattr(lead.agent, "_llm_gateway"):
return lead.agent._llm_gateway
# Fallback: try first active expert
for expert in self._team.active_experts:
if hasattr(expert, "agent") and hasattr(expert.agent, "_llm_gateway"):
return expert.agent._llm_gateway
return None
@staticmethod
def _build_result_summaries(results: list[dict[str, Any]], max_len: int = 500) -> list[str]:
"""Build text summaries from competitor results for LLM evaluation."""
summaries = []
for i, r in enumerate(results):
output = r.get("output", {})
content = output.get("content", str(output)) if isinstance(output, dict) else str(output)
summaries.append(f"Result {i + 1} (by {r.get('expert', 'unknown')}):\n{content[:max_len]}")
return summaries
async def _llm_pick_best(
self, task: str, results: list[dict[str, Any]]
) -> dict[str, Any]:
"""Use LLM to evaluate and pick the best result."""
gateway = self._get_llm_gateway()
if not gateway:
return results[0]
# Build evaluation prompt
result_summaries = self._build_result_summaries(results)
prompt = (
f"Task: {task}\n\n"
f"Below are {len(results)} candidate results. Pick the BEST one based on "
f"completeness, accuracy, and relevance to the task.\n\n"
+ "\n---\n".join(result_summaries)
+ "\n\nReply with ONLY the number of the best result (e.g., '1' or '2')."
)
try:
response = await gateway.chat(
messages=[{"role": "user", "content": prompt}],
model="default",
)
choice = response.content.strip()
# Parse the number from the response
for ch in choice:
if ch.isdigit():
idx = int(ch) - 1
if 0 <= idx < len(results):
return results[idx]
except Exception as e:
logger.warning(f"LLM best-pick failed, falling back to first result: {e}")
return results[0]
async def _llm_vote(
self, task: str, results: list[dict[str, Any]]
) -> dict[str, Any]:
"""Use LLM voting to select the best result."""
gateway = self._get_llm_gateway()
if not gateway:
return results[0]
scores: dict[int, float] = {}
result_summaries = self._build_result_summaries(results)
# Each expert votes by ranking results (excluding their own)
for voter_idx, r in enumerate(results):
# Build summaries excluding the voter's own result to avoid self-voting bias
other_indices = [i for i in range(len(results)) if i != voter_idx]
other_summaries = [result_summaries[i] for i in other_indices]
prompt = (
f"Task: {task}\n\n"
f"Below are {len(other_summaries)} candidate results. Rank them from best to worst.\n\n"
+ "\n---\n".join(other_summaries)
+ "\n\nReply with ONLY a comma-separated list of result numbers, best first (e.g., '2,1,3')."
)
try:
response = await gateway.chat(
messages=[{"role": "user", "content": prompt}],
model="default",
)
# Parse ranking: map back to original indices
for rank_pos, ch in enumerate(response.content.strip().split(",")):
ch = ch.strip()
if ch.isdigit():
local_idx = int(ch) - 1
if 0 <= local_idx < len(other_indices):
original_idx = other_indices[local_idx]
scores[original_idx] = scores.get(original_idx, 0) + (
len(other_indices) - rank_pos
)
except Exception as e:
logger.warning(f"Voter {voter_idx} vote failed: {e}")
# On failure, distribute 1 point evenly across other results
for oi in other_indices:
scores[oi] = scores.get(oi, 0) + 1
if not scores:
return results[0]
best_idx = max(scores, key=scores.get) # type: ignore[arg-type]
return results[best_idx]
async def _llm_fuse(
self, task: str, results: list[dict[str, Any]]
) -> dict[str, Any]:
"""Use LLM to fuse multiple results into one."""
gateway = self._get_llm_gateway()
if not gateway:
# Fallback: concatenate all results
combined = "\n\n".join(
str(r.get("output", {})) for r in results
)
return {"content": combined, "fused_from": len(results)}
result_summaries = self._build_result_summaries(results, max_len=800)
prompt = (
f"Task: {task}\n\n"
f"Below are {len(results)} results from different experts working on the same task. "
f"Fuse them into a single comprehensive result that combines the best elements.\n\n"
+ "\n---\n".join(result_summaries)
+ "\n\nProvide the fused result directly."
)
try:
response = await gateway.chat(
messages=[{"role": "user", "content": prompt}],
model="default",
)
return {
"content": response.content.strip(),
"fused_from": len(results),
"strategy": "fusion",
}
except Exception as e:
logger.warning(f"LLM fusion failed, falling back to concatenation: {e}")
combined = "\n\n".join(
str(r.get("output", {})) for r in results
)
return {"content": combined, "fused_from": len(results)}
async def _merge_results( async def _merge_results(
self, phase: PlanPhase, results: list[dict[str, Any]] self, phase: PlanPhase, results: list[dict[str, Any]]
) -> dict[str, Any]: ) -> dict[str, Any]:
"""Merge competitive parallel results based on merge strategy.""" """Merge competitive parallel results based on merge strategy."""
if not results:
return {}
strategy = phase.merge_strategy or MergeStrategy.BEST strategy = phase.merge_strategy or MergeStrategy.BEST
task_desc = phase.task_description or phase.name
if strategy == MergeStrategy.BEST: if strategy == MergeStrategy.BEST:
# Lead Expert picks the best result selected = await self._llm_pick_best(task_desc, results)
lead = self._team.lead_expert return {
if lead: "merged": True,
return { "strategy": "best",
"merged": True, "selected": selected,
"strategy": "best", "all_results": results,
"selected": results[0], }
"all_results": results,
}
return results[0]
elif strategy == MergeStrategy.VOTE: elif strategy == MergeStrategy.VOTE:
# All experts vote — for now, simple majority with Lead Expert tie-breaking selected = await self._llm_vote(task_desc, results)
return { return {
"merged": True, "merged": True,
"strategy": "vote", "strategy": "vote",
"selected": results[0], "selected": selected,
"all_results": results, "all_results": results,
} }
elif strategy == MergeStrategy.FUSION: elif strategy == MergeStrategy.FUSION:
# Lead Expert fuses all results fused = await self._llm_fuse(task_desc, results)
lead = self._team.lead_expert return {
if lead: "merged": True,
return { "strategy": "fusion",
"merged": True, "fused_from": len(results),
"strategy": "fusion", "selected": fused,
"fused_from": len(results), "all_results": results,
"all_results": results, }
}
return results[0]
return results[0] return results[0]
async def _check_milestone( async def _check_milestone(
self, phase: PlanPhase, result: dict[str, Any] self, phase: PlanPhase, result: dict[str, Any]
) -> bool: ) -> bool:
"""Check if a phase result passes its milestone checkpoint.""" """Check if a phase result passes its milestone checkpoint.
# In a real implementation, this would use LLM evaluation
# For now, always pass if there's a result Uses LLM evaluation when available, falls back to basic content check.
return result is not None """
milestone = phase.milestone
if not milestone:
return True
# Basic check: result must have non-empty content
output = result.get("output", result) if isinstance(result, dict) else result
content = output.get("content", str(output)) if isinstance(output, dict) else str(output)
if not content or content.strip() == "":
return False
# LLM-based milestone evaluation
gateway = self._get_llm_gateway()
if not gateway:
# Without LLM, do basic keyword matching
milestone_lower = milestone.lower()
content_lower = content.lower()
# Check if milestone keywords appear in content
keywords = [w for w in milestone_lower.split() if len(w) > 2]
if keywords and not any(kw in content_lower for kw in keywords):
return False
return True
prompt = (
f"Task: {phase.task_description or phase.name}\n"
f"Milestone requirement: {milestone}\n"
f"Result:\n{content[:500]}\n\n"
f"Does this result meet the milestone requirement? "
f"Reply with ONLY 'yes' or 'no'."
)
try:
response = await gateway.chat(
messages=[{"role": "user", "content": prompt}],
model="default",
)
answer = response.content.strip().lower()
return answer.startswith("yes")
except Exception as e:
logger.warning(f"Milestone LLM check failed for phase {phase.id}: {e}")
# On LLM failure, pass the milestone (conservative — don't block on infra issues)
return True
async def _synthesize_results( async def _synthesize_results(
self, plan: CollaborationPlan, phase_results: dict[str, dict[str, Any]] self, plan: CollaborationPlan, phase_results: dict[str, dict[str, Any]]
@ -431,10 +729,23 @@ class TeamOrchestrator:
fallback_result = None fallback_result = None
if expert: if expert:
try: try:
# Execute the original task with a single expert # Execute the original task with a single expert via real agent
fallback_result = { task_msg = TaskMessage(
"output": f"Task completed by {expert.config.name} (fallback mode)", task_id=f"fallback_{plan.id}",
"task": plan.task, agent_name=expert.config.name,
task_type="fallback",
priority=0,
input_data={
"task": plan.task,
"phase_results": phase_results,
"team_id": self._team.team_id,
},
callback_url=None,
created_at=datetime.now(timezone.utc),
)
task_result: TaskResult = await expert.agent.execute(task_msg)
fallback_result = task_result.output_data or {
"content": f"Task completed by {expert.config.name} (fallback mode)"
} }
except Exception as e: except Exception as e:
logger.error(f"Fallback agent execution failed: {e}") logger.error(f"Fallback agent execution failed: {e}")
@ -452,7 +763,7 @@ class TeamOrchestrator:
self, event_type: str, data: dict[str, Any] self, event_type: str, data: dict[str, Any]
) -> None: ) -> None:
"""Broadcast an orchestration event to the team channel.""" """Broadcast an orchestration event to the team channel."""
if self._team._handoff_transport: if self._team.handoff_transport:
await self._team._handoff_transport.send( await self._team.handoff_transport.send(
self._team._team_channel, {"type": event_type, **data} self._team.team_channel, {"type": event_type, **data}
) )

View File

@ -3,9 +3,8 @@
import logging import logging
import re import re
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Any
from .config import ExpertConfig, ExpertTemplate from .config import ExpertConfig
from .registry import ExpertTemplateRegistry from .registry import ExpertTemplateRegistry
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -67,7 +66,9 @@ class ExpertTeamRouter:
result.matched = True result.matched = True
result.team_mode = True result.team_mode = True
result.task_content = task if task else content # Fall back to full content when no task after prefix result.task_content = (
task if task else content
) # Fall back to full content when no task after prefix
result.match_method = "explicit_team" result.match_method = "explicit_team"
if expert_list_str: if expert_list_str:
@ -84,7 +85,9 @@ class ExpertTeamRouter:
for name in result.specified_experts: for name in result.specified_experts:
template = self._registry.get(name) template = self._registry.get(name)
if template is None: if template is None:
logger.warning(f"ExpertTemplate '{name}' not found, will be dynamically generated") logger.warning(
f"ExpertTemplate '{name}' not found, will be dynamically generated"
)
else: else:
# No specific experts — auto-compose # No specific experts — auto-compose
result.auto_compose = True result.auto_compose = True
@ -108,6 +111,28 @@ class ExpertTeamRouter:
result.complexity = complexity result.complexity = complexity
return result return result
def can_handle(self, content: str) -> bool:
"""Check whether any registered expert template can handle the given content.
Used by CostAwareRouter to decide whether to upgrade REACT TEAM_COLLAB.
Returns True if at least one template's name or description overlaps with
content tokens, or if any templates exist (auto-compose can always form a team).
"""
if not self._registry or not self._registry._templates:
return False
content_lower = content.lower()
for template in self._registry._templates.values():
if template.name.lower() in content_lower:
return True
if template.description and any(
word in content_lower
for word in template.description.lower().split()
if len(word) > 2
):
return True
# Auto-compose can form a team from any available templates
return bool(self._registry._templates)
def resolve_expert_configs(self, specified_experts: list[str]) -> list[ExpertConfig]: def resolve_expert_configs(self, specified_experts: list[str]) -> list[ExpertConfig]:
"""Resolve expert names to ExpertConfig instances. """Resolve expert names to ExpertConfig instances.

View File

@ -78,6 +78,29 @@ class ExpertTeam:
def active_experts(self) -> list[Expert]: def active_experts(self) -> list[Expert]:
return [e for e in self._experts.values() if e.is_active] return [e for e in self._experts.values() if e.is_active]
@property
def workspace(self) -> SharedWorkspace:
"""Public read access to the team's shared workspace."""
return self._workspace
@property
def handoff_transport(self):
"""Public read access to the team's handoff transport."""
return self._handoff_transport
@property
def team_channel(self) -> str:
"""Public read access to the team's communication channel."""
return self._team_channel
def get_expert(self, name: str) -> Expert | None:
"""Get an expert by name. Returns None if not found."""
return self._experts.get(name)
def set_status(self, status: TeamStatus) -> None:
"""Update the team's status."""
self._status = status
async def create_team( async def create_team(
self, self,
lead_config: ExpertConfig, lead_config: ExpertConfig,

View File

@ -99,6 +99,8 @@ chat_manager = ChatConnectionManager()
_VALID_TEAM_EVENT_TYPES = frozenset({ _VALID_TEAM_EVENT_TYPES = frozenset({
"team_formed", "expert_step", "expert_result", "team_formed", "expert_step", "expert_result",
"plan_update", "team_synthesis", "team_dissolved", "plan_update", "team_synthesis", "team_dissolved",
"plan_step", "phase_started", "phase_completed", "phase_failed",
"replanning",
}) })

View File

@ -0,0 +1,250 @@
"""U7: 多步研究任务端到端验证
mock LLM Gateway 验证 PlanExecEngine + SharedWorkspace + ReActStepExecutor 的完整闭环
场景"分析竞品并生成报告" 搜索分析生成
"""
from __future__ import annotations
import json
from unittest.mock import AsyncMock, MagicMock
import pytest
from agentkit.core.plan_exec_engine import PlanExecEngine
from agentkit.core.shared_workspace import SharedWorkspace
from agentkit.llm.protocol import LLMResponse, TokenUsage
def _make_mock_gateway(responses: list[str]) -> MagicMock:
"""Create a mock LLM Gateway that returns preset responses in order.
Each response is wrapped in an LLMResponse dataclass to match the real
LLMGateway.chat() return format.
"""
gateway = MagicMock()
call_count = 0
async def _chat(messages, model="default", **kwargs):
nonlocal call_count
if call_count < len(responses):
resp = responses[call_count]
call_count += 1
return LLMResponse(
content=resp,
model=model,
usage=TokenUsage(prompt_tokens=10, completion_tokens=len(resp) // 4),
)
return LLMResponse(
content='{"error": "no more responses"}',
model=model,
usage=TokenUsage(prompt_tokens=0, completion_tokens=0),
)
gateway.chat = AsyncMock(side_effect=_chat)
return gateway
class TestMultiStepResearchTask:
"""多步研究任务端到端验证"""
@pytest.mark.asyncio
async def test_plan_exec_research_task_with_workspace(self):
"""验证 PlanExecEngine 执行多步研究任务SharedWorkspace 传递状态"""
workspace = SharedWorkspace()
# Mock LLM responses:
# 1. GoalPlanner: decompose "分析竞品并生成报告" into 3 steps
# 2-4. Step executor responses for each step
planner_response = json.dumps({
"goal": "分析竞品并生成报告",
"steps": [
{
"step_id": "search",
"name": "搜索竞品信息",
"description": "搜索飞书和钉钉的竞品信息",
"dependencies": [],
"required_tools": ["web_search"],
},
{
"step_id": "analyze",
"name": "分析竞品对比",
"description": "基于搜索结果分析飞书和钉钉的优缺点",
"dependencies": ["search"],
"required_tools": [],
},
{
"step_id": "report",
"name": "生成报告",
"description": "基于分析结果生成竞品对比报告",
"dependencies": ["analyze"],
"required_tools": [],
},
],
})
step_responses = [
planner_response,
"搜索完成:飞书是字节跳动的企业协作平台,钉钉是阿里巴巴的企业通讯工具",
"分析完成:飞书优势在于文档协作,钉钉优势在于即时通讯和考勤管理",
"报告:飞书与钉钉竞品对比报告\n1. 文档协作:飞书领先\n2. 即时通讯:钉钉领先\n3. 考勤管理:钉钉领先",
]
gateway = _make_mock_gateway(step_responses)
engine = PlanExecEngine(
llm_gateway=gateway,
workspace=workspace,
max_replans=0,
)
result = await engine.execute(
messages=[{"role": "user", "content": "分析竞品并生成报告"}],
tools=[],
model="default",
agent_name="research_agent",
)
# Verify execution completed
assert result.status in ("success", "partial")
assert result.output is not None
assert len(result.output) > 0
# Verify workspace has step results
keys = await workspace.list_keys()
plan_keys = [k for k in keys if k.startswith("plan:")]
assert len(plan_keys) > 0, f"Expected workspace keys for plan steps, got: {keys}"
@pytest.mark.asyncio
async def test_workspace_dependency_chain(self):
"""验证 SharedWorkspace 在步骤间正确传递依赖结果"""
workspace = SharedWorkspace()
# Write a dependency result
await workspace.write(
"plan:test_plan:step:search:result",
{"content": "搜索结果:飞书和钉钉的信息"},
agent_id="search_agent",
)
# Read it back
data = await workspace.read("plan:test_plan:step:search:result")
assert data is not None
assert data["value"]["content"] == "搜索结果:飞书和钉钉的信息"
assert data["version"] == 1
# Write a second version
version = await workspace.write(
"plan:test_plan:step:search:result",
{"content": "更新后的搜索结果"},
agent_id="search_agent",
)
assert version == 2
# Verify version incremented
data = await workspace.read("plan:test_plan:step:search:result")
assert data["version"] == 2
@pytest.mark.asyncio
async def test_step_event_callback(self):
"""验证 step_event_callback 在步骤完成时被调用"""
events: list[tuple[str, dict]] = []
async def on_step_event(event_type: str, data: dict):
events.append((event_type, data))
planner_response = json.dumps({
"goal": "简单任务",
"steps": [
{
"step_id": "step1",
"name": "执行步骤1",
"description": "执行一个简单步骤",
"dependencies": [],
"required_tools": [],
},
],
})
gateway = _make_mock_gateway([planner_response, "步骤1完成"])
engine = PlanExecEngine(
llm_gateway=gateway,
step_event_callback=on_step_event,
max_replans=0,
)
await engine.execute(
messages=[{"role": "user", "content": "简单任务"}],
tools=[],
)
# Verify callback was called
event_types = [e[0] for e in events]
assert "plan_generated" in event_types, f"Expected plan_generated event, got: {event_types}"
# Verify step event contains expected data
step_events = [e for e in events if e[0] in ("step_completed", "step_failed")]
assert len(step_events) > 0, f"Expected step events, got: {events}"
@pytest.mark.asyncio
async def test_no_workspace_fallback(self):
"""验证 workspace=None 时不影响执行"""
planner_response = json.dumps({
"goal": "无workspace任务",
"steps": [
{
"step_id": "step1",
"name": "步骤1",
"description": "执行步骤",
"dependencies": [],
"required_tools": [],
},
],
})
gateway = _make_mock_gateway([planner_response, "完成"])
engine = PlanExecEngine(
llm_gateway=gateway,
workspace=None,
max_replans=0,
)
result = await engine.execute(
messages=[{"role": "user", "content": "无workspace任务"}],
tools=[],
)
assert result.status in ("success", "partial")
@pytest.mark.asyncio
async def test_no_callback_fallback(self):
"""验证 callback=None 时不影响执行"""
planner_response = json.dumps({
"goal": "无callback任务",
"steps": [
{
"step_id": "step1",
"name": "步骤1",
"description": "执行步骤",
"dependencies": [],
"required_tools": [],
},
],
})
gateway = _make_mock_gateway([planner_response, "完成"])
engine = PlanExecEngine(
llm_gateway=gateway,
step_event_callback=None,
max_replans=0,
)
result = await engine.execute(
messages=[{"role": "user", "content": "无callback任务"}],
tools=[],
)
assert result.status in ("success", "partial")

View File

@ -7,6 +7,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
import pytest import pytest
from agentkit.core.handoff_transport import InProcessHandoffTransport from agentkit.core.handoff_transport import InProcessHandoffTransport
from agentkit.core.protocol import TaskResult, TaskStatus
from agentkit.experts.config import ExpertConfig from agentkit.experts.config import ExpertConfig
from agentkit.experts.expert import Expert from agentkit.experts.expert import Expert
from agentkit.experts.orchestrator import TeamOrchestrator from agentkit.experts.orchestrator import TeamOrchestrator
@ -59,6 +60,18 @@ def _make_mock_expert(
"bound_skills": config.bound_skills, "bound_skills": config.bound_skills,
"is_lead": is_lead, "is_lead": is_lead,
} }
# Mock agent.execute() to return a successful TaskResult
mock_agent = MagicMock()
mock_agent.execute = AsyncMock(return_value=TaskResult(
task_id="test",
agent_name=name,
status=TaskStatus.COMPLETED.value,
output_data={"content": f"Result from {name}"},
error_message=None,
started_at=None,
completed_at=None,
))
expert.agent = mock_agent
return expert return expert
@ -189,7 +202,7 @@ class TestSerialPhaseExecution:
await orchestrator.execute_plan(plan) await orchestrator.execute_plan(plan)
assert plan.status == PlanStatus.COMPLETED assert plan.status == PlanStatus.COMPLETED
assert team._status == TeamStatus.COMPLETED assert team.status == TeamStatus.COMPLETED
# ── 子任务并行阶段执行测试 ──────────────────────────────── # ── 子任务并行阶段执行测试 ────────────────────────────────
@ -227,7 +240,7 @@ class TestSubtaskParallelExecution:
call_count += 1 call_count += 1
if phase.id == "phase_1": if phase.id == "phase_1":
raise RuntimeError("Simulated failure") raise RuntimeError("Simulated failure")
return await original_execute_phase(phase, p, pr) return await original_execute(phase, p, pr)
with patch.object( with patch.object(
orchestrator, "_execute_phase", side_effect=mock_execute_phase orchestrator, "_execute_phase", side_effect=mock_execute_phase
@ -421,6 +434,55 @@ class TestRetryAndFallback:
assert result["status"] == "fallback" assert result["status"] == "fallback"
assert plan.status == PlanStatus.FALLBACK assert plan.status == PlanStatus.FALLBACK
@pytest.mark.asyncio
async def test_replan_before_fallback_on_failure(self):
"""重试失败后尝试 replanreplan 成功则不 fallback"""
team = _make_team_with_experts()
orchestrator = TeamOrchestrator(team, max_replans=1)
plan = _make_serial_plan(num_phases=1)
call_count = 0
async def mock_execute_phase(phase, p, pr):
nonlocal call_count
call_count += 1
if call_count <= 2:
# First call + retry both fail
p.update_phase_status(phase.id, PhaseStatus.FAILED)
return None
# Replan attempt succeeds
p.update_phase_status(phase.id, PhaseStatus.COMPLETED, {"output": "replan ok"})
return {"output": "replan ok"}
with patch.object(
orchestrator, "_execute_phase", side_effect=mock_execute_phase
):
result = await orchestrator.execute_plan(plan)
# After retry fails → replan → succeeds, should complete
assert call_count == 3 # 1 initial + 1 retry + 1 replan
assert result["status"] == "completed"
@pytest.mark.asyncio
async def test_replan_exhausted_then_fallback(self):
"""replan 次数用尽后 fallback"""
team = _make_team_with_experts()
orchestrator = TeamOrchestrator(team, max_replans=2)
plan = _make_serial_plan(num_phases=1)
async def mock_execute_phase(phase, p, pr):
# Always fail
p.update_phase_status(phase.id, PhaseStatus.FAILED)
return None
with patch.object(
orchestrator, "_execute_phase", side_effect=mock_execute_phase
):
result = await orchestrator.execute_plan(plan)
# Exhausted retries + replans → fallback
assert result["status"] == "fallback"
# ── 最大交互轮次测试 ────────────────────────────────────── # ── 最大交互轮次测试 ──────────────────────────────────────
@ -438,7 +500,7 @@ class TestMaxInteractionRounds:
# Create a plan with many phases that would take many rounds # Create a plan with many phases that would take many rounds
plan = _make_serial_plan(num_phases=5) plan = _make_serial_plan(num_phases=5)
result = await orchestrator.execute_plan(plan) await orchestrator.execute_plan(plan)
# Should stop after 1 round, not completing all phases # Should stop after 1 round, not completing all phases
# Only the first phase should complete (1 interaction round) # Only the first phase should complete (1 interaction round)
@ -629,8 +691,8 @@ class TestExpertUnavailable:
"""Expert 不可用测试""" """Expert 不可用测试"""
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_inactive_expert_causes_phase_failure(self): async def test_inactive_expert_falls_back_to_active(self):
"""分配的 Expert 不活跃导致阶段失败""" """分配的 Expert 不活跃时自动降级到其他可用 Expert"""
team = _make_team_with_experts() team = _make_team_with_experts()
# Mark the lead expert as inactive # Mark the lead expert as inactive
team._experts["lead"].is_active = False team._experts["lead"].is_active = False
@ -639,12 +701,12 @@ class TestExpertUnavailable:
result = await orchestrator.execute_plan(plan) result = await orchestrator.execute_plan(plan)
# Phase should fail because expert is not active → retry → still fail → fallback # Phase should complete via fallback expert (member1)
assert result["status"] == "fallback" assert result["status"] == "completed"
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_nonexistent_expert_causes_phase_failure(self): async def test_nonexistent_expert_falls_back_to_lead(self):
"""分配的 Expert 不存在导致阶段失败""" """分配的 Expert 不存在时自动降级到 lead expert"""
team = _make_team_with_experts() team = _make_team_with_experts()
orchestrator = TeamOrchestrator(team) orchestrator = TeamOrchestrator(team)
@ -664,5 +726,20 @@ class TestExpertUnavailable:
result = await orchestrator.execute_plan(plan) result = await orchestrator.execute_plan(plan)
# Expert doesn't exist → phase fails → retry → still fails → fallback # Phase should complete via fallback to lead expert
assert result["status"] == "completed"
@pytest.mark.asyncio
async def test_all_experts_unavailable_causes_failure(self):
"""所有 Expert 都不可用时阶段失败"""
team = _make_team_with_experts()
# Mark all experts as inactive
for expert in team._experts.values():
expert.is_active = False
orchestrator = TeamOrchestrator(team)
plan = _make_serial_plan(num_phases=1)
result = await orchestrator.execute_plan(plan)
# No expert available → phase fails → fallback
assert result["status"] == "fallback" assert result["status"] == "fallback"