758 lines
30 KiB
Python
758 lines
30 KiB
Python
"""TeamOrchestrator - 流水线模式专家团队执行引擎
|
||
|
||
驱动 ExpertTeam 在流水线模式下执行任务:
|
||
|
||
1. Lead Expert 接收任务,分解为阶段(PlanPhase),阶段间有依赖关系(depends_on)
|
||
2. 按依赖拓扑排序,同层无依赖阶段并行(asyncio.gather),层间串行
|
||
3. 每个阶段创建独立 ConfigDrivenAgent 实例(上下文隔离,KTD3)
|
||
4. 阶段间数据通过 SharedWorkspace 传递({task_id}/phase/{phase_id}/output)
|
||
5. Lead Expert 汇总所有阶段结果(BEST 策略)
|
||
6. 返回最终结果
|
||
|
||
生命周期:FORMING → PLANNING → EXECUTING → SYNTHESIZING → COMPLETED
|
||
|
||
设计依据:
|
||
- KTD2: Lead 分解为阶段而非子任务,支持流水线串行阶段
|
||
- KTD3: 上下文隔离,独立 ConfigDrivenAgent 实例
|
||
- KTD6: PLANNING 状态在分解阶段设置
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import copy
|
||
import json
|
||
import logging
|
||
import re
|
||
from datetime import datetime, timezone
|
||
from typing import Any
|
||
|
||
from agentkit.core.config_driven import ConfigDrivenAgent
|
||
from agentkit.core.protocol import TaskMessage, TaskResult, TaskStatus
|
||
from agentkit.llm.gateway import LLMGateway
|
||
|
||
from .expert import Expert
|
||
from .plan import PhaseStatus, PlanPhase, PlanStatus, TeamPlan
|
||
from .team import ExpertTeam, TeamStatus
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class TeamOrchestrator:
|
||
"""Pipeline orchestration engine.
|
||
|
||
Lead Expert decomposes the task into phases with dependencies (depends_on).
|
||
Phases are executed in topological order: same-layer phases run in parallel
|
||
(asyncio.gather), layers run sequentially. Each phase gets an independent
|
||
ConfigDrivenAgent instance for context isolation (KTD3).
|
||
"""
|
||
|
||
MAX_PHASES = 10 # Maximum phases Lead Expert can decompose
|
||
MAX_RETRIES = 1 # Retry once on phase failure before marking failed
|
||
|
||
def __init__(self, team: ExpertTeam) -> None:
|
||
self._team = team
|
||
# Track temporary agent names created for context isolation (KTD3)
|
||
# Maps phase_id -> temp_agent_name for cleanup
|
||
self._temp_agents: dict[str, str] = {}
|
||
|
||
async def execute(self, task: str) -> dict[str, Any]:
|
||
"""Execute a task in pipeline mode.
|
||
|
||
Flow:
|
||
1. Emit team_formed event
|
||
2. Set PLANNING status, Lead Expert decomposes task into phases
|
||
3. Emit plan_update with phase list
|
||
4. Set EXECUTING status, topological sort, execute layers:
|
||
- Same-layer phases parallel (asyncio.gather)
|
||
- Layer-by-layer sequential
|
||
5. Set SYNTHESIZING status, Lead synthesizes results (BEST strategy)
|
||
6. Set COMPLETED status, emit team_synthesis event
|
||
|
||
Returns a dict with:
|
||
- "status": "completed" | "failed" | "fallback"
|
||
- "result": final synthesized result
|
||
- "phase_results": dict of phase_id -> result
|
||
- "plan": TeamPlan instance
|
||
"""
|
||
lead = self._team.lead_expert
|
||
if not lead or not lead.is_active:
|
||
active = self._team.active_experts
|
||
if not active:
|
||
return {
|
||
"status": "failed",
|
||
"result": None,
|
||
"phase_results": {},
|
||
"error": "No active expert available",
|
||
}
|
||
lead = active[0]
|
||
logger.warning(f"Lead expert not available, falling back to '{lead.config.name}'")
|
||
|
||
plan = TeamPlan(
|
||
task=task,
|
||
lead_expert=lead.config.name,
|
||
status=PlanStatus.EXECUTING,
|
||
)
|
||
|
||
# 1. Emit team_formed event
|
||
# Send experts as IExpertInfo-compatible dicts + plan_phases: [] to match frontend contract
|
||
await self._broadcast_event(
|
||
"team_formed",
|
||
{
|
||
"team_id": self._team.team_id,
|
||
"status": self._team.status.value,
|
||
"lead_expert": lead.config.name,
|
||
"experts": [
|
||
{
|
||
"id": e.config.name,
|
||
"name": e.config.name,
|
||
"persona": e.config.persona,
|
||
"avatar": e.config.avatar,
|
||
"color": e.config.color,
|
||
"is_lead": e.config.name == lead.config.name,
|
||
"bound_skills": list(e.config.bound_skills),
|
||
"status": "active",
|
||
}
|
||
for e in self._team.active_experts
|
||
],
|
||
"plan_phases": [],
|
||
},
|
||
)
|
||
|
||
# 2. Set PLANNING status, Lead decomposes task into phases
|
||
self._team.set_status(TeamStatus.PLANNING)
|
||
phases = await self._decompose_task(lead, task)
|
||
if not phases:
|
||
logger.warning("Task decomposition returned no phases, executing as single phase")
|
||
phases = [PlanPhase(name="执行", assigned_expert=lead.config.name, task_description=task)]
|
||
|
||
plan.phases = phases[: self.MAX_PHASES]
|
||
|
||
# 3. Emit plan_update with phase list
|
||
await self._broadcast_event(
|
||
"plan_update",
|
||
{
|
||
"plan_id": plan.id,
|
||
"plan_phases": [ph.to_dict() for ph in plan.phases],
|
||
},
|
||
)
|
||
|
||
# 4. Set EXECUTING status, execute phases
|
||
self._team.set_status(TeamStatus.EXECUTING)
|
||
phase_results: dict[str, dict[str, Any]] = {}
|
||
|
||
try:
|
||
# Topological sort phases into execution layers
|
||
layers = plan.topological_sort()
|
||
|
||
# Execute layers sequentially, phases within layer in parallel
|
||
for layer in layers:
|
||
# Filter out already-failed phases (from dependency failures)
|
||
ready = [ph for ph in layer if ph.status == PhaseStatus.PENDING]
|
||
if not ready:
|
||
continue
|
||
|
||
# Execute all phases in this layer in parallel
|
||
results = await asyncio.gather(
|
||
*[self._execute_phase(ph, plan) for ph in ready],
|
||
return_exceptions=True,
|
||
)
|
||
|
||
for ph, result in zip(ready, results):
|
||
if isinstance(result, (Exception, asyncio.CancelledError)):
|
||
logger.error(f"Phase {ph.id} ({ph.name}) failed: {result}")
|
||
plan.update_phase_status(
|
||
ph.id, PhaseStatus.FAILED, {"error": str(result)}
|
||
)
|
||
phase_results[ph.id] = {"error": str(result)}
|
||
# Emit phase_failed event
|
||
await self._broadcast_event(
|
||
"phase_failed",
|
||
{
|
||
"phase_id": ph.id,
|
||
"phase_name": ph.name,
|
||
"error": str(result),
|
||
},
|
||
)
|
||
# Mark dependent phases as failed
|
||
await self._mark_dependents_failed(ph.id, plan, phase_results)
|
||
else:
|
||
phase_results[ph.id] = result
|
||
|
||
# 5. Check if all phases failed
|
||
completed = plan.completed_phases
|
||
if not completed:
|
||
logger.warning("All phases failed, falling back to single agent")
|
||
return await self._fallback_to_single_agent(task, plan, phase_results)
|
||
|
||
# 6. Lead Expert synthesizes results (BEST strategy)
|
||
self._team.set_status(TeamStatus.SYNTHESIZING)
|
||
plan.status = PlanStatus.COMPLETED
|
||
|
||
final_result = await self._synthesize_results(lead, task, completed)
|
||
|
||
self._team.set_status(TeamStatus.COMPLETED)
|
||
|
||
# 7. Emit team_synthesis event
|
||
await self._broadcast_event(
|
||
"team_synthesis",
|
||
{
|
||
"content": final_result.get("content", ""),
|
||
"phases_completed": len(completed),
|
||
"phases_total": len(plan.phases),
|
||
},
|
||
)
|
||
|
||
# 8. Emit team_dissolved event
|
||
await self._broadcast_event(
|
||
"team_dissolved",
|
||
{"team_id": self._team.team_id},
|
||
)
|
||
|
||
return {
|
||
"status": "completed",
|
||
"result": final_result,
|
||
"phase_results": phase_results,
|
||
"plan": plan,
|
||
}
|
||
|
||
except ValueError as e:
|
||
# Circular dependency or invalid reference from topological_sort
|
||
logger.error(f"Pipeline execution failed (invalid plan): {e}")
|
||
plan.status = PlanStatus.FAILED
|
||
await self._broadcast_event(
|
||
"team_dissolved", {"team_id": self._team.team_id}
|
||
)
|
||
return await self._fallback_to_single_agent(task, plan, phase_results)
|
||
except Exception as e:
|
||
logger.error(f"Pipeline execution failed: {e}")
|
||
plan.status = PlanStatus.FAILED
|
||
await self._broadcast_event(
|
||
"team_dissolved", {"team_id": self._team.team_id}
|
||
)
|
||
return await self._fallback_to_single_agent(task, plan, phase_results)
|
||
|
||
async def _decompose_task(self, lead: Expert, task: str) -> list[PlanPhase]:
|
||
"""Lead Expert decomposes task into phases using LLM.
|
||
|
||
Returns a list of PlanPhase instances. If LLM decomposition fails,
|
||
returns a single phase with the original task.
|
||
"""
|
||
gateway = self._get_llm_gateway(lead)
|
||
if not gateway:
|
||
logger.warning("No LLM gateway available, treating task as single phase")
|
||
return [PlanPhase(name="执行", assigned_expert=lead.config.name, task_description=task)]
|
||
|
||
member_names = [
|
||
e.config.name for e in self._team.active_experts if e.config.name != lead.config.name
|
||
]
|
||
available_experts = member_names if member_names else [lead.config.name]
|
||
|
||
prompt = (
|
||
f"You are the Lead Expert in a pipeline team. Decompose the following task into "
|
||
f"at most {self.MAX_PHASES} phases with dependencies.\n\n"
|
||
f"Task: {task}\n\n"
|
||
f"Available experts: {', '.join(available_experts)}\n\n"
|
||
f"Return a JSON array of phase objects, each with:\n"
|
||
f'- "name": phase name (e.g., "规划", "前端", "后端", "QA", "评审")\n'
|
||
f'- "assigned_expert": name of the expert to assign '
|
||
f'(must be one of: {", ".join(available_experts)})\n'
|
||
f'- "task_description": clear phase task description\n'
|
||
f'- "depends_on": array of phase names this phase depends on (empty array if none)\n\n'
|
||
f"Example:\n"
|
||
f'[{{"name":"规划","assigned_expert":"tech_lead",'
|
||
f'"task_description":"设计架构","depends_on":[]}},'
|
||
f'{{"name":"前端","assigned_expert":"frontend",'
|
||
f'"task_description":"实现UI","depends_on":["规划"]}}]\n\n'
|
||
f"Return ONLY the JSON array, no other text."
|
||
)
|
||
|
||
try:
|
||
response = await gateway.chat(
|
||
messages=[{"role": "user", "content": prompt}],
|
||
model=self._get_model(lead),
|
||
)
|
||
phases = self._parse_phases(response.content, available_experts, lead.config.name)
|
||
if phases:
|
||
return phases
|
||
logger.warning("LLM decomposition returned no valid phases")
|
||
except Exception as e:
|
||
logger.warning(f"LLM task decomposition failed: {e}")
|
||
|
||
return [PlanPhase(name="执行", assigned_expert=lead.config.name, task_description=task)]
|
||
|
||
@staticmethod
|
||
def _parse_phases(
|
||
content: str, available_experts: list[str], lead_name: str
|
||
) -> list[PlanPhase]:
|
||
"""Parse LLM response into PlanPhase list.
|
||
|
||
Extracts JSON array from the response content and creates PlanPhase instances.
|
||
Resolves depends_on from phase names to phase IDs. Validates assigned_expert
|
||
against available_experts list.
|
||
"""
|
||
# Try to extract JSON array from the response
|
||
json_match = re.search(r"\[.*\]", content, re.DOTALL)
|
||
if not json_match:
|
||
return []
|
||
|
||
try:
|
||
items = json.loads(json_match.group(0))
|
||
except json.JSONDecodeError:
|
||
return []
|
||
|
||
if not isinstance(items, list):
|
||
return []
|
||
|
||
# First pass: create phases with IDs, build name->id mapping
|
||
name_to_id: dict[str, str] = {}
|
||
raw_phases: list[dict[str, Any]] = []
|
||
|
||
for item in items:
|
||
if not isinstance(item, dict):
|
||
continue
|
||
name = item.get("name", "").strip()
|
||
if not name:
|
||
continue
|
||
assigned = item.get("assigned_expert", "").strip()
|
||
# Validate assigned expert; fall back to lead if invalid
|
||
if assigned not in available_experts:
|
||
assigned = lead_name
|
||
task_desc = item.get("task_description", "").strip() or name
|
||
depends_on_names = item.get("depends_on", [])
|
||
if not isinstance(depends_on_names, list):
|
||
depends_on_names = []
|
||
|
||
phase = PlanPhase(
|
||
name=name,
|
||
assigned_expert=assigned,
|
||
task_description=task_desc,
|
||
depends_on=[], # Will resolve to IDs in second pass
|
||
)
|
||
raw_phases.append({"phase": phase, "depends_on_names": depends_on_names})
|
||
name_to_id[name] = phase.id
|
||
|
||
# Second pass: resolve depends_on from names to IDs
|
||
phases: list[PlanPhase] = []
|
||
for entry in raw_phases:
|
||
phase = entry["phase"]
|
||
for dep_name in entry["depends_on_names"]:
|
||
dep_id = name_to_id.get(dep_name)
|
||
if dep_id:
|
||
phase.depends_on.append(dep_id)
|
||
else:
|
||
logger.warning(
|
||
f"Phase '{phase.name}' depends on unknown phase '{dep_name}', ignoring"
|
||
)
|
||
phases.append(phase)
|
||
|
||
return phases
|
||
|
||
async def _execute_phase(self, phase: PlanPhase, plan: TeamPlan) -> dict[str, Any]:
|
||
"""Execute a single phase using the assigned expert.
|
||
|
||
Creates an independent ConfigDrivenAgent instance for context isolation (KTD3).
|
||
Reads dependency outputs from SharedWorkspace, executes the phase task,
|
||
writes the phase output to SharedWorkspace.
|
||
"""
|
||
# Resolve the assigned expert
|
||
expert = self._team.get_expert(phase.assigned_expert)
|
||
if not expert or not expert.is_active:
|
||
expert = self._team.lead_expert
|
||
if not expert or not expert.is_active:
|
||
active = self._team.active_experts
|
||
if not active:
|
||
raise RuntimeError(
|
||
f"Expert '{phase.assigned_expert}' not available and no active fallback"
|
||
)
|
||
expert = active[0]
|
||
logger.warning(
|
||
f"Expert '{phase.assigned_expert}' not available, "
|
||
f"falling back to '{expert.config.name}'"
|
||
)
|
||
phase.assigned_expert = expert.config.name
|
||
|
||
# Update phase status
|
||
phase.status = PhaseStatus.RUNNING
|
||
|
||
# Emit phase_started event
|
||
await self._broadcast_event(
|
||
"phase_started",
|
||
{
|
||
"phase_id": phase.id,
|
||
"phase_name": phase.name,
|
||
"assigned_expert": phase.assigned_expert,
|
||
"depends_on": list(phase.depends_on),
|
||
},
|
||
)
|
||
|
||
# Read dependency outputs from in-memory phase results (faster than workspace)
|
||
dependency_outputs: dict[str, Any] = {}
|
||
for dep_id in phase.depends_on:
|
||
dep_phase = plan.get_phase(dep_id)
|
||
if dep_phase and dep_phase.status == PhaseStatus.COMPLETED and dep_phase.result:
|
||
dependency_outputs[dep_phase.name] = dep_phase.result.get(
|
||
"content", str(dep_phase.result)
|
||
)
|
||
|
||
# Emit expert_step event
|
||
await self._broadcast_event(
|
||
"expert_step",
|
||
{
|
||
"expert_id": expert.config.name,
|
||
"expert_name": expert.config.name,
|
||
"expert_color": expert.config.color,
|
||
"content": phase.task_description,
|
||
"step": phase.id,
|
||
"phase_id": phase.id,
|
||
"phase_name": phase.name,
|
||
},
|
||
)
|
||
|
||
# Build TaskMessage for execution with context isolation
|
||
# Context includes: task description + persona + dependency outputs
|
||
input_data: dict[str, Any] = {
|
||
"task": phase.task_description,
|
||
"team_id": self._team.team_id,
|
||
"phase_id": phase.id,
|
||
"phase_name": phase.name,
|
||
"is_phase": True,
|
||
"dependency_outputs": dependency_outputs,
|
||
}
|
||
if dependency_outputs:
|
||
input_data["context"] = (
|
||
"前置阶段输出:\n"
|
||
+ "\n---\n".join(
|
||
f"[{name}]:\n{output[:500] if isinstance(output, str) else str(output)[:500]}"
|
||
for name, output in dependency_outputs.items()
|
||
)
|
||
)
|
||
|
||
task_msg = TaskMessage(
|
||
task_id=phase.id,
|
||
agent_name=expert.config.name,
|
||
task_type="team_phase",
|
||
priority=0,
|
||
input_data=input_data,
|
||
callback_url=None,
|
||
created_at=datetime.now(timezone.utc),
|
||
)
|
||
|
||
# Execute with context isolation: try creating independent agent via pool
|
||
agent = await self._get_isolated_agent(expert, phase)
|
||
last_error: str | None = None
|
||
result: dict[str, Any] | None = None
|
||
|
||
try:
|
||
for attempt in range(self.MAX_RETRIES + 1):
|
||
try:
|
||
task_result: TaskResult = await agent.execute(task_msg)
|
||
|
||
if task_result.status != TaskStatus.COMPLETED.value:
|
||
last_error = task_result.error_message or "unknown error"
|
||
if attempt < self.MAX_RETRIES:
|
||
logger.info(f"Retrying phase {phase.id} (attempt {attempt + 1})")
|
||
continue
|
||
raise RuntimeError(f"Agent execution failed: {last_error}")
|
||
|
||
result = task_result.output_data or {"content": ""}
|
||
|
||
# Update phase status
|
||
phase.status = PhaseStatus.COMPLETED
|
||
phase.result = result
|
||
|
||
# Write phase output to SharedWorkspace
|
||
output_key = f"{plan.id}/phase/{phase.id}/output"
|
||
await self._team.workspace.write(
|
||
output_key,
|
||
result.get("content", str(result)),
|
||
expert.config.name,
|
||
)
|
||
|
||
# Emit expert_result event
|
||
await self._broadcast_event(
|
||
"expert_result",
|
||
{
|
||
"expert_id": expert.config.name,
|
||
"expert_name": expert.config.name,
|
||
"expert_color": expert.config.color,
|
||
"content": result.get("content", str(result)),
|
||
"phase_id": phase.id,
|
||
},
|
||
)
|
||
|
||
# Emit phase_completed event
|
||
result_summary = result.get("content", str(result))
|
||
if isinstance(result_summary, str) and len(result_summary) > 200:
|
||
result_summary = result_summary[:200] + "..."
|
||
await self._broadcast_event(
|
||
"phase_completed",
|
||
{
|
||
"phase_id": phase.id,
|
||
"phase_name": phase.name,
|
||
"result_summary": result_summary,
|
||
},
|
||
)
|
||
|
||
return result
|
||
|
||
except Exception as e:
|
||
last_error = str(e)
|
||
if attempt < self.MAX_RETRIES:
|
||
logger.info(f"Retrying phase {phase.id} (attempt {attempt + 1})")
|
||
continue
|
||
raise
|
||
|
||
finally:
|
||
# Clean up isolated agent if we created one
|
||
await self._cleanup_isolated_agent(phase)
|
||
|
||
# Should not reach here
|
||
phase.status = PhaseStatus.FAILED
|
||
# Emit phase_failed event
|
||
await self._broadcast_event(
|
||
"phase_failed",
|
||
{
|
||
"phase_id": phase.id,
|
||
"phase_name": phase.name,
|
||
"error": last_error or "unknown error",
|
||
},
|
||
)
|
||
raise RuntimeError(f"Phase {phase.id} ({phase.name}) failed: {last_error}")
|
||
|
||
async def _get_isolated_agent(self, expert: Expert, phase: PlanPhase) -> ConfigDrivenAgent:
|
||
"""Get an isolated ConfigDrivenAgent instance for the phase.
|
||
|
||
If AgentPool is available, creates a temporary agent with a unique name
|
||
for context isolation (KTD3). Otherwise, falls back to the expert's
|
||
existing agent.
|
||
"""
|
||
pool = self._team.pool
|
||
if pool is None:
|
||
# No pool available (e.g., in tests), use expert's existing agent
|
||
return expert.agent
|
||
|
||
# Create a temporary config with unique name for this phase
|
||
temp_config = copy.deepcopy(expert.config)
|
||
temp_config.name = f"{expert.config.name}__phase_{phase.id[:8]}"
|
||
|
||
try:
|
||
agent = await pool.create_agent(temp_config)
|
||
# Track for cleanup
|
||
self._temp_agents[phase.id] = temp_config.name
|
||
return agent
|
||
except Exception as e:
|
||
logger.warning(
|
||
f"Failed to create isolated agent for phase {phase.id}, "
|
||
f"using expert's existing agent: {e}"
|
||
)
|
||
return expert.agent
|
||
|
||
async def _cleanup_isolated_agent(self, phase: PlanPhase) -> None:
|
||
"""Clean up the temporary isolated agent if one was created."""
|
||
pool = self._team.pool
|
||
if pool is None:
|
||
return
|
||
|
||
temp_name = self._temp_agents.pop(phase.id, None)
|
||
if temp_name:
|
||
try:
|
||
await pool.remove_agent(temp_name)
|
||
except Exception as e:
|
||
logger.warning(f"Failed to clean up isolated agent '{temp_name}': {e}")
|
||
|
||
async def _mark_dependents_failed(
|
||
self, failed_phase_id: str, plan: TeamPlan, phase_results: dict[str, dict[str, Any]]
|
||
) -> None:
|
||
"""Mark all phases that depend on the failed phase as FAILED."""
|
||
for ph in plan.phases:
|
||
if ph.status != PhaseStatus.PENDING:
|
||
continue
|
||
if failed_phase_id in ph.depends_on:
|
||
ph.status = PhaseStatus.FAILED
|
||
ph.result = {"error": f"Dependency phase '{failed_phase_id}' failed"}
|
||
phase_results[ph.id] = {"error": f"Dependency '{failed_phase_id}' failed"}
|
||
# Emit phase_failed event for cascaded failure
|
||
await self._broadcast_event(
|
||
"phase_failed",
|
||
{
|
||
"phase_id": ph.id,
|
||
"phase_name": ph.name,
|
||
"error": f"Dependency phase '{failed_phase_id}' failed",
|
||
},
|
||
)
|
||
# Recursively mark their dependents
|
||
await self._mark_dependents_failed(ph.id, plan, phase_results)
|
||
|
||
async def _synthesize_results(
|
||
self, lead: Expert, task: str, completed_phases: list[PlanPhase]
|
||
) -> dict[str, Any]:
|
||
"""Lead Expert synthesizes results using BEST strategy.
|
||
|
||
The Lead Expert evaluates all completed phase results and produces
|
||
a final synthesized result. Uses LLM when available, otherwise
|
||
concatenates results.
|
||
"""
|
||
results = [ph.result or {} for ph in completed_phases]
|
||
if not results:
|
||
return {"content": ""}
|
||
|
||
# If only one result, return it directly
|
||
if len(results) == 1:
|
||
content = results[0].get("content", str(results[0]))
|
||
return {
|
||
"content": content,
|
||
"strategy": "best",
|
||
"phases_completed": 1,
|
||
}
|
||
|
||
gateway = self._get_llm_gateway(lead)
|
||
if not gateway:
|
||
# Without LLM, concatenate all results
|
||
combined = "\n\n".join(
|
||
r.get("content", str(r)) if isinstance(r, dict) else str(r) for r in results
|
||
)
|
||
return {
|
||
"content": combined,
|
||
"strategy": "best",
|
||
"phases_completed": len(results),
|
||
}
|
||
|
||
# Build result summaries for LLM evaluation
|
||
summaries = []
|
||
for i, ph in enumerate(completed_phases):
|
||
r = ph.result or {}
|
||
content = r.get("content", str(r)) if isinstance(r, dict) else str(r)
|
||
summaries.append(
|
||
f"Phase {i + 1}: {ph.name} (by {ph.assigned_expert}, task: {ph.task_description[:100]}):\n"
|
||
f"{content[:500]}"
|
||
)
|
||
|
||
prompt = (
|
||
f"Original task: {task}\n\n"
|
||
f"Below are {len(results)} phase results from your team members. "
|
||
f"Synthesize them into a single comprehensive final result that "
|
||
f"best addresses the original task.\n\n"
|
||
+ "\n---\n".join(summaries)
|
||
+ "\n\nProvide the synthesized result directly."
|
||
)
|
||
|
||
try:
|
||
response = await gateway.chat(
|
||
messages=[{"role": "user", "content": prompt}],
|
||
model=self._get_model(lead),
|
||
)
|
||
return {
|
||
"content": response.content.strip(),
|
||
"strategy": "best",
|
||
"phases_completed": len(results),
|
||
}
|
||
except Exception as e:
|
||
logger.warning(f"LLM synthesis failed, falling back to concatenation: {e}")
|
||
combined = "\n\n".join(
|
||
r.get("content", str(r)) if isinstance(r, dict) else str(r) for r in results
|
||
)
|
||
return {
|
||
"content": combined,
|
||
"strategy": "best",
|
||
"phases_completed": len(results),
|
||
}
|
||
|
||
async def _fallback_to_single_agent(
|
||
self,
|
||
task: str,
|
||
plan: TeamPlan,
|
||
phase_results: dict[str, dict[str, Any]],
|
||
) -> dict[str, Any]:
|
||
"""Fallback to single agent mode when pipeline execution fails.
|
||
|
||
Uses the lead expert (or first active expert) to complete the original task.
|
||
"""
|
||
plan.status = PlanStatus.FALLBACK
|
||
logger.warning("Falling back to single agent mode")
|
||
|
||
expert = self._team.lead_expert
|
||
if not expert or not expert.is_active:
|
||
active = self._team.active_experts
|
||
expert = active[0] if active else None
|
||
|
||
fallback_result: dict[str, Any] | None = None
|
||
if expert:
|
||
try:
|
||
task_msg = TaskMessage(
|
||
task_id=f"fallback_{plan.id}",
|
||
agent_name=expert.config.name,
|
||
task_type="fallback",
|
||
priority=0,
|
||
input_data={
|
||
"task": task,
|
||
"phase_results": phase_results,
|
||
"team_id": self._team.team_id,
|
||
},
|
||
callback_url=None,
|
||
created_at=datetime.now(timezone.utc),
|
||
)
|
||
task_result: TaskResult = await expert.agent.execute(task_msg)
|
||
fallback_result = task_result.output_data or {
|
||
"content": f"Task completed by {expert.config.name} (fallback mode)"
|
||
}
|
||
except Exception as e:
|
||
logger.error(f"Fallback agent execution failed: {e}")
|
||
fallback_result = {"error": f"Fallback execution failed: {e}"}
|
||
else:
|
||
fallback_result = {"error": "No active expert available for fallback"}
|
||
|
||
return {
|
||
"status": "fallback",
|
||
"result": fallback_result,
|
||
"phase_results": phase_results,
|
||
"plan": plan,
|
||
}
|
||
|
||
def _get_model(self, expert: Expert | None = None) -> str:
|
||
"""Get LLM model name from expert config.
|
||
|
||
Reads expert.config.llm (dict[str, Any] | None) and returns the model
|
||
name. Falls back to "default" if not configured.
|
||
|
||
V4 verified: ExpertConfig.llm is dict[str, Any] | None.
|
||
"""
|
||
target = expert or self._team.lead_expert
|
||
if target and target.config.llm:
|
||
return target.config.llm.get("model", "default")
|
||
return "default"
|
||
|
||
def _get_llm_gateway(self, expert: Expert | None = None) -> LLMGateway | None:
|
||
"""Get LLM gateway from the given expert or the lead expert's agent.
|
||
|
||
Falls back to other active experts if the primary target has no gateway.
|
||
"""
|
||
target = expert or self._team.lead_expert
|
||
if target and hasattr(target, "agent") and hasattr(target.agent, "_llm_gateway"):
|
||
gateway = target.agent._llm_gateway
|
||
if gateway is not None:
|
||
return gateway
|
||
# Fallback: try first active expert with a gateway
|
||
for exp in self._team.active_experts:
|
||
if hasattr(exp, "agent") and hasattr(exp.agent, "_llm_gateway"):
|
||
gateway = exp.agent._llm_gateway
|
||
if gateway is not None:
|
||
return gateway
|
||
return None
|
||
|
||
async def _broadcast_event(self, event_type: str, data: dict[str, Any]) -> None:
|
||
"""Broadcast an orchestration event to the team channel.
|
||
|
||
Events are emitted via handoff_transport for WebSocket relay.
|
||
Supported event types: team_formed, expert_step, expert_result,
|
||
plan_update, phase_started, phase_completed, phase_failed,
|
||
team_synthesis, team_dissolved.
|
||
"""
|
||
if self._team.handoff_transport:
|
||
try:
|
||
await self._team.handoff_transport.send(
|
||
self._team.team_channel, {"type": event_type, **data}
|
||
)
|
||
except Exception as e:
|
||
logger.warning(f"Failed to broadcast event '{event_type}': {e}")
|