feat: complex-task-quality-loop (R1-R12) #22
|
|
@ -72,6 +72,11 @@ async def drain_pending_evolution_tasks() -> None:
|
||||||
await asyncio.gather(*_pending_evolution_tasks, return_exceptions=True)
|
await asyncio.gather(*_pending_evolution_tasks, return_exceptions=True)
|
||||||
|
|
||||||
|
|
||||||
|
def get_evolution_dropped_count() -> int:
|
||||||
|
"""Return the number of evolution tasks dropped due to backpressure."""
|
||||||
|
return _evolution_dropped_count
|
||||||
|
|
||||||
|
|
||||||
class AgentConfig:
|
class AgentConfig:
|
||||||
"""Agent 配置模型,从 YAML 或 Dict 构建"""
|
"""Agent 配置模型,从 YAML 或 Dict 构建"""
|
||||||
|
|
||||||
|
|
@ -739,7 +744,20 @@ class ConfigDrivenAgent(BaseAgent, EvolutionMixin):
|
||||||
|
|
||||||
Shared by all _handle_*_stream methods to avoid duplicating the
|
Shared by all _handle_*_stream methods to avoid duplicating the
|
||||||
message-rendering logic that mirrors the sync _handle_* methods.
|
message-rendering logic that mirrors the sync _handle_* methods.
|
||||||
|
|
||||||
|
Portal path: if ``task.input_data["messages"]`` is present (a list of
|
||||||
|
``{role, content}`` dicts), use those pre-built messages directly
|
||||||
|
instead of rendering the prompt template. This lets the portal route
|
||||||
|
through ``execute_stream`` (inheriting evolution hooks + trace_outcome
|
||||||
|
propagation) while keeping its external message-building logic.
|
||||||
"""
|
"""
|
||||||
|
prebuilt = task.input_data.get("messages")
|
||||||
|
if prebuilt is not None:
|
||||||
|
system_prompt = task.input_data.get("system_prompt")
|
||||||
|
user_messages = [m for m in prebuilt if m.get("role") != "system"]
|
||||||
|
if not user_messages:
|
||||||
|
user_messages = [{"role": "user", "content": str(task.input_data)}]
|
||||||
|
return system_prompt, user_messages
|
||||||
variables = task.input_data.copy()
|
variables = task.input_data.copy()
|
||||||
variables["task_type"] = task.task_type
|
variables["task_type"] = task.task_type
|
||||||
if self._prompt_template:
|
if self._prompt_template:
|
||||||
|
|
@ -774,22 +792,35 @@ class ConfigDrivenAgent(BaseAgent, EvolutionMixin):
|
||||||
token = CancellationToken()
|
token = CancellationToken()
|
||||||
self._active_tokens[task.task_id] = token
|
self._active_tokens[task.task_id] = token
|
||||||
_stream_output: dict = {}
|
_stream_output: dict = {}
|
||||||
|
_stream_trace_outcome: str = "success"
|
||||||
_stream_error: BaseException | None = None
|
_stream_error: BaseException | None = None
|
||||||
_stream_completed = False
|
_stream_completed = False
|
||||||
|
_stream_started_at = datetime.now(timezone.utc)
|
||||||
try:
|
try:
|
||||||
await self._register_mcp_tools()
|
await self._register_mcp_tools()
|
||||||
async for event in self.handle_task_stream(task):
|
async for event in self.handle_task_stream(task):
|
||||||
if event.event_type == "final_answer":
|
if event.event_type == "final_answer":
|
||||||
_raw = event.data.get("output", "")
|
_raw = event.data.get("output", "")
|
||||||
_stream_output = {"content": _raw} if isinstance(_raw, str) else _raw
|
_stream_output = {"content": _raw} if isinstance(_raw, str) else _raw
|
||||||
|
# PLAN_EXEC path may embed trace_outcome in final_answer.
|
||||||
|
_to = event.data.get("trace_outcome")
|
||||||
|
if _to:
|
||||||
|
_stream_trace_outcome = _to
|
||||||
|
elif event.event_type == "final_result":
|
||||||
|
# REACT path: final_result carries ReActResult.status.
|
||||||
|
_result = event.data.get("result")
|
||||||
|
if _result is not None:
|
||||||
|
_stream_trace_outcome = getattr(_result, "status", "success")
|
||||||
yield event
|
yield event
|
||||||
_stream_completed = True
|
_stream_completed = True
|
||||||
except asyncio.CancelledError as ce:
|
except asyncio.CancelledError as ce:
|
||||||
# Cancellation must propagate, but hooks still fire (U2 edge case).
|
# Cancellation must propagate, but hooks still fire (U2 edge case).
|
||||||
_stream_error = ce
|
_stream_error = ce
|
||||||
|
_stream_trace_outcome = "cancelled"
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
_stream_error = e
|
_stream_error = e
|
||||||
|
_stream_trace_outcome = "error"
|
||||||
raise
|
raise
|
||||||
finally:
|
finally:
|
||||||
# async generator 的 finally 在 generator 关闭时执行(GC/aclose/正常结束)
|
# async generator 的 finally 在 generator 关闭时执行(GC/aclose/正常结束)
|
||||||
|
|
@ -797,6 +828,12 @@ class ConfigDrivenAgent(BaseAgent, EvolutionMixin):
|
||||||
# KTD-4: lifecycle parity — fire evolution hooks fire-and-forget.
|
# KTD-4: lifecycle parity — fire evolution hooks fire-and-forget.
|
||||||
try:
|
try:
|
||||||
now = datetime.now(timezone.utc)
|
now = datetime.now(timezone.utc)
|
||||||
|
# KTD-8: propagate trace_outcome into output_data so
|
||||||
|
# lifecycle._is_failure_path() can detect non-success outcomes.
|
||||||
|
if _stream_output:
|
||||||
|
_stream_output["trace_outcome"] = _stream_trace_outcome
|
||||||
|
else:
|
||||||
|
_stream_output = {"trace_outcome": _stream_trace_outcome}
|
||||||
if _stream_error is not None:
|
if _stream_error is not None:
|
||||||
if isinstance(_stream_error, (asyncio.CancelledError, TaskCancelledError)):
|
if isinstance(_stream_error, (asyncio.CancelledError, TaskCancelledError)):
|
||||||
status = TaskStatus.CANCELLED
|
status = TaskStatus.CANCELLED
|
||||||
|
|
@ -810,17 +847,29 @@ class ConfigDrivenAgent(BaseAgent, EvolutionMixin):
|
||||||
status=status,
|
status=status,
|
||||||
output_data=None,
|
output_data=None,
|
||||||
error_message=err_msg,
|
error_message=err_msg,
|
||||||
started_at=now,
|
started_at=_stream_started_at,
|
||||||
completed_at=now,
|
completed_at=now,
|
||||||
)
|
)
|
||||||
elif _stream_completed:
|
elif _stream_completed:
|
||||||
|
# KTD-8: map non-success trace_outcomes to FAILED.
|
||||||
|
if _stream_trace_outcome in (
|
||||||
|
"gave_up_after_reflections",
|
||||||
|
"verify_failed",
|
||||||
|
"verify_quota_exhausted",
|
||||||
|
"failed",
|
||||||
|
):
|
||||||
|
status = TaskStatus.FAILED
|
||||||
|
err_msg = _stream_trace_outcome
|
||||||
|
else:
|
||||||
|
status = TaskStatus.COMPLETED
|
||||||
|
err_msg = None
|
||||||
result = TaskResult(
|
result = TaskResult(
|
||||||
task_id=task.task_id,
|
task_id=task.task_id,
|
||||||
agent_name=self.name,
|
agent_name=self.name,
|
||||||
status=TaskStatus.COMPLETED,
|
status=status,
|
||||||
output_data=_stream_output,
|
output_data=_stream_output,
|
||||||
error_message=None,
|
error_message=err_msg,
|
||||||
started_at=now,
|
started_at=_stream_started_at,
|
||||||
completed_at=now,
|
completed_at=now,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
|
@ -831,7 +880,7 @@ class ConfigDrivenAgent(BaseAgent, EvolutionMixin):
|
||||||
status=TaskStatus.CANCELLED,
|
status=TaskStatus.CANCELLED,
|
||||||
output_data=None,
|
output_data=None,
|
||||||
error_message="stream closed before completion",
|
error_message="stream closed before completion",
|
||||||
started_at=now,
|
started_at=_stream_started_at,
|
||||||
completed_at=now,
|
completed_at=now,
|
||||||
)
|
)
|
||||||
self._trigger_evolution_hooks(task, result)
|
self._trigger_evolution_hooks(task, result)
|
||||||
|
|
|
||||||
|
|
@ -121,6 +121,10 @@ class PlanExecEngine:
|
||||||
# user's decision. None = skip the gate (backward compat — the engine
|
# user's decision. None = skip the gate (backward compat — the engine
|
||||||
# proceeds directly to execution after Spec persistence).
|
# proceeds directly to execution after Spec persistence).
|
||||||
spec_review_handler: SpecReviewHandler | None = None,
|
spec_review_handler: SpecReviewHandler | None = None,
|
||||||
|
# KTD-2/R4: max reflections for ReActEngine reinjection→reflection
|
||||||
|
# escalation. Threaded through to each step's ReActEngine so the
|
||||||
|
# verify-failed path can escalate from reinjection to full reflection.
|
||||||
|
max_reflections: int = 2,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
|
|
@ -159,6 +163,8 @@ class PlanExecEngine:
|
||||||
self._pitfall_detector = pitfall_detector
|
self._pitfall_detector = pitfall_detector
|
||||||
# U8/R8: spec review gate handler. None = skip gate (backward compat).
|
# U8/R8: spec review gate handler. None = skip gate (backward compat).
|
||||||
self._spec_review_handler = spec_review_handler
|
self._spec_review_handler = spec_review_handler
|
||||||
|
# KTD-2/R4: max reflections threaded to each step's ReActEngine.
|
||||||
|
self._max_reflections = max_reflections
|
||||||
# U4/R11: copy the default to avoid mutating the module-level dict.
|
# U4/R11: copy the default to avoid mutating the module-level dict.
|
||||||
self._phase_budgets = (
|
self._phase_budgets = (
|
||||||
dict(phase_budgets) if phase_budgets is not None else dict(_DEFAULT_PHASE_BUDGETS)
|
dict(phase_budgets) if phase_budgets is not None else dict(_DEFAULT_PHASE_BUDGETS)
|
||||||
|
|
@ -605,9 +611,10 @@ class PlanExecEngine:
|
||||||
"output": output,
|
"output": output,
|
||||||
"total_steps": len(state.trajectory),
|
"total_steps": len(state.trajectory),
|
||||||
"total_tokens": state.total_tokens,
|
"total_tokens": state.total_tokens,
|
||||||
"plan_id": plan.plan_id,
|
"plan_id": current_plan.plan_id,
|
||||||
"plan_status": plan_result.status.value,
|
"plan_status": plan_result.status.value,
|
||||||
"replanned": state.replanned,
|
"replanned": state.replanned,
|
||||||
|
"trace_outcome": trace_outcome,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -637,7 +644,7 @@ class PlanExecEngine:
|
||||||
async def _inject_pitfall_warnings(
|
async def _inject_pitfall_warnings(
|
||||||
self,
|
self,
|
||||||
goal: str,
|
goal: str,
|
||||||
plan_steps: list[Any],
|
plan_steps: list[PlanStep],
|
||||||
task_type: str,
|
task_type: str,
|
||||||
actor: str,
|
actor: str,
|
||||||
system_prompt: str | None,
|
system_prompt: str | None,
|
||||||
|
|
@ -1432,6 +1439,7 @@ class PlanExecEngine:
|
||||||
verification_enabled=self._verification_enabled,
|
verification_enabled=self._verification_enabled,
|
||||||
verification_commands=self._verification_commands,
|
verification_commands=self._verification_commands,
|
||||||
phase_budgets=self._phase_budgets,
|
phase_budgets=self._phase_budgets,
|
||||||
|
max_reflections=self._max_reflections,
|
||||||
)
|
)
|
||||||
return PlanExecutor(
|
return PlanExecutor(
|
||||||
agent_pool=step_executor,
|
agent_pool=step_executor,
|
||||||
|
|
@ -1590,11 +1598,13 @@ class ReActStepExecutor:
|
||||||
model: str = "default",
|
model: str = "default",
|
||||||
system_prompt: str | None = None,
|
system_prompt: str | None = None,
|
||||||
tools: list["Tool"] | None = None,
|
tools: list["Tool"] | None = None,
|
||||||
max_steps: int = 5,
|
max_steps: int = 10,
|
||||||
confirmation_handler: Any | None = None,
|
confirmation_handler: Any | None = None,
|
||||||
verification_enabled: bool = False,
|
verification_enabled: bool = False,
|
||||||
verification_commands: list[str] | None = None,
|
verification_commands: list[str] | None = None,
|
||||||
phase_budgets: dict[str, int] | None = None,
|
phase_budgets: dict[str, int] | None = None,
|
||||||
|
# KTD-2/R4: threaded through to each step's ReActEngine.
|
||||||
|
max_reflections: int = 2,
|
||||||
):
|
):
|
||||||
self._llm_gateway = llm_gateway
|
self._llm_gateway = llm_gateway
|
||||||
self._messages = messages or []
|
self._messages = messages or []
|
||||||
|
|
@ -1607,6 +1617,8 @@ class ReActStepExecutor:
|
||||||
self._verification_commands = verification_commands
|
self._verification_commands = verification_commands
|
||||||
# U4/R11: thread through to each step's ReActEngine.
|
# U4/R11: thread through to each step's ReActEngine.
|
||||||
self._phase_budgets = phase_budgets
|
self._phase_budgets = phase_budgets
|
||||||
|
# KTD-2/R4: thread through to each step's ReActEngine.
|
||||||
|
self._max_reflections = max_reflections
|
||||||
self._agents: dict[str, _ReActStepAgent] = {}
|
self._agents: dict[str, _ReActStepAgent] = {}
|
||||||
|
|
||||||
async def create_agent_from_skill(self, skill_name: str):
|
async def create_agent_from_skill(self, skill_name: str):
|
||||||
|
|
@ -1623,6 +1635,7 @@ class ReActStepExecutor:
|
||||||
verification_enabled=self._verification_enabled,
|
verification_enabled=self._verification_enabled,
|
||||||
verification_commands=self._verification_commands,
|
verification_commands=self._verification_commands,
|
||||||
phase_budgets=self._phase_budgets,
|
phase_budgets=self._phase_budgets,
|
||||||
|
max_reflections=self._max_reflections,
|
||||||
)
|
)
|
||||||
self._agents[skill_name] = agent
|
self._agents[skill_name] = agent
|
||||||
return agent
|
return agent
|
||||||
|
|
@ -1642,6 +1655,7 @@ class ReActStepExecutor:
|
||||||
verification_enabled=self._verification_enabled,
|
verification_enabled=self._verification_enabled,
|
||||||
verification_commands=self._verification_commands,
|
verification_commands=self._verification_commands,
|
||||||
phase_budgets=self._phase_budgets,
|
phase_budgets=self._phase_budgets,
|
||||||
|
max_reflections=self._max_reflections,
|
||||||
)
|
)
|
||||||
self._agents[key] = agent
|
self._agents[key] = agent
|
||||||
return agent
|
return agent
|
||||||
|
|
@ -1662,11 +1676,12 @@ class _ReActStepAgent:
|
||||||
model: str = "default",
|
model: str = "default",
|
||||||
system_prompt: str | None = None,
|
system_prompt: str | None = None,
|
||||||
tools: list["Tool"] | None = None,
|
tools: list["Tool"] | None = None,
|
||||||
max_steps: int = 5,
|
max_steps: int = 10,
|
||||||
confirmation_handler: Any | None = None,
|
confirmation_handler: Any | None = None,
|
||||||
verification_enabled: bool = False,
|
verification_enabled: bool = False,
|
||||||
verification_commands: list[str] | None = None,
|
verification_commands: list[str] | None = None,
|
||||||
phase_budgets: dict[str, int] | None = None,
|
phase_budgets: dict[str, int] | None = None,
|
||||||
|
max_reflections: int = 2,
|
||||||
):
|
):
|
||||||
self.name = name
|
self.name = name
|
||||||
self._llm_gateway = llm_gateway
|
self._llm_gateway = llm_gateway
|
||||||
|
|
@ -1680,6 +1695,7 @@ class _ReActStepAgent:
|
||||||
self._verification_commands = verification_commands
|
self._verification_commands = verification_commands
|
||||||
# U4/R11: per-phase step quotas, passed to ReActEngine.
|
# U4/R11: per-phase step quotas, passed to ReActEngine.
|
||||||
self._phase_budgets = phase_budgets
|
self._phase_budgets = phase_budgets
|
||||||
|
self._max_reflections = max_reflections
|
||||||
|
|
||||||
async def execute(self, task_msg: TaskMessage) -> "TaskResult":
|
async def execute(self, task_msg: TaskMessage) -> "TaskResult":
|
||||||
"""执行步骤:通过 ReActEngine 循环调用"""
|
"""执行步骤:通过 ReActEngine 循环调用"""
|
||||||
|
|
@ -1710,6 +1726,7 @@ class _ReActStepAgent:
|
||||||
verification_enabled=self._verification_enabled,
|
verification_enabled=self._verification_enabled,
|
||||||
verification_commands=self._verification_commands,
|
verification_commands=self._verification_commands,
|
||||||
phase_budgets=self._phase_budgets,
|
phase_budgets=self._phase_budgets,
|
||||||
|
max_reflections=self._max_reflections,
|
||||||
)
|
)
|
||||||
|
|
||||||
# 构建 messages
|
# 构建 messages
|
||||||
|
|
@ -1728,7 +1745,13 @@ class _ReActStepAgent:
|
||||||
|
|
||||||
now = datetime.now(timezone.utc)
|
now = datetime.now(timezone.utc)
|
||||||
status = TaskStatus.COMPLETED.value
|
status = TaskStatus.COMPLETED.value
|
||||||
if react_result.status in ("timeout", "cancelled"):
|
if react_result.status in (
|
||||||
|
"timeout",
|
||||||
|
"cancelled",
|
||||||
|
"verify_failed",
|
||||||
|
"gave_up_after_reflections",
|
||||||
|
"failed",
|
||||||
|
):
|
||||||
status = TaskStatus.FAILED.value
|
status = TaskStatus.FAILED.value
|
||||||
|
|
||||||
return TaskResult(
|
return TaskResult(
|
||||||
|
|
|
||||||
|
|
@ -33,10 +33,12 @@ from agentkit.telemetry.metrics import (
|
||||||
agent_duration_histogram,
|
agent_duration_histogram,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from agentkit.core.phase import PhaseState
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from agentkit.core.compressor import CompressionStrategy
|
from agentkit.core.compressor import CompressionStrategy
|
||||||
from agentkit.core.middleware import MiddlewareChain
|
from agentkit.core.middleware import MiddlewareChain
|
||||||
from agentkit.core.phase import PhasePolicy, PhaseState
|
from agentkit.core.phase import PhasePolicy
|
||||||
from agentkit.core.sandbox import WorkspaceSandbox
|
from agentkit.core.sandbox import WorkspaceSandbox
|
||||||
from agentkit.core.trace import TraceRecorder
|
from agentkit.core.trace import TraceRecorder
|
||||||
from agentkit.evolution.pitfall_detector import PitfallWarning
|
from agentkit.evolution.pitfall_detector import PitfallWarning
|
||||||
|
|
@ -420,8 +422,6 @@ class ReActEngine:
|
||||||
"""
|
"""
|
||||||
if self._phase_policy is None or self._current_phase is None:
|
if self._phase_policy is None or self._current_phase is None:
|
||||||
return
|
return
|
||||||
from agentkit.core.phase import PhaseState
|
|
||||||
|
|
||||||
while self._current_phase not in (PhaseState.VERIFICATION, PhaseState.DELIVERY):
|
while self._current_phase not in (PhaseState.VERIFICATION, PhaseState.DELIVERY):
|
||||||
nxt = self.advance_phase()
|
nxt = self.advance_phase()
|
||||||
if nxt is None:
|
if nxt is None:
|
||||||
|
|
@ -446,8 +446,6 @@ class ReActEngine:
|
||||||
"""
|
"""
|
||||||
if self._phase_policy is None or self._current_phase is None:
|
if self._phase_policy is None or self._current_phase is None:
|
||||||
return None
|
return None
|
||||||
from agentkit.core.phase import PhaseState
|
|
||||||
|
|
||||||
nxt = PhaseState.next_of(self._current_phase)
|
nxt = PhaseState.next_of(self._current_phase)
|
||||||
if nxt is None:
|
if nxt is None:
|
||||||
# Already at DELIVERY — return None to signal no transition.
|
# Already at DELIVERY — return None to signal no transition.
|
||||||
|
|
@ -890,8 +888,8 @@ class ReActEngine:
|
||||||
|
|
||||||
trace_outcome = "success"
|
trace_outcome = "success"
|
||||||
# U4/G1: verify 失败回灌计数器。受 max_steps 上限约束(不无限循环)。
|
# U4/G1: verify 失败回灌计数器。受 max_steps 上限约束(不无限循环)。
|
||||||
# U4/KTD-7: initialize from restored budget state (checkpoint resume).
|
# U4/KTD-7: _reflect_count is initialized from restored budget state
|
||||||
reinjections = self._reflect_count
|
# (checkpoint resume) and used directly — no redundant local copy.
|
||||||
_loop_start = time.monotonic()
|
_loop_start = time.monotonic()
|
||||||
|
|
||||||
while step < self._max_steps:
|
while step < self._max_steps:
|
||||||
|
|
@ -913,9 +911,7 @@ class ReActEngine:
|
||||||
and self._phase_policy is not None
|
and self._phase_policy is not None
|
||||||
and self._current_phase is not None
|
and self._current_phase is not None
|
||||||
):
|
):
|
||||||
from agentkit.core.phase import PhaseState as _PS
|
if self._current_phase in (PhaseState.PLANNING, PhaseState.BUILDING):
|
||||||
|
|
||||||
if self._current_phase in (_PS.PLANNING, _PS.BUILDING):
|
|
||||||
self._think_count += 1
|
self._think_count += 1
|
||||||
think_quota = self._phase_budgets.get("think")
|
think_quota = self._phase_budgets.get("think")
|
||||||
if think_quota is not None and self._think_count >= think_quota:
|
if think_quota is not None and self._think_count >= think_quota:
|
||||||
|
|
@ -1547,7 +1543,7 @@ class ReActEngine:
|
||||||
vresult = await vloop.verify()
|
vresult = await vloop.verify()
|
||||||
if not vresult.passed:
|
if not vresult.passed:
|
||||||
if (
|
if (
|
||||||
reinjections < self._max_reinjections
|
self._reflect_count < self._max_reinjections
|
||||||
and step < self._max_steps
|
and step < self._max_steps
|
||||||
):
|
):
|
||||||
errors_text = "\n".join(vresult.errors)
|
errors_text = "\n".join(vresult.errors)
|
||||||
|
|
@ -1557,7 +1553,6 @@ class ReActEngine:
|
||||||
"content": (f"验证失败,错误如下:\n{errors_text}"),
|
"content": (f"验证失败,错误如下:\n{errors_text}"),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
reinjections += 1
|
|
||||||
# U4/R10: track reflect count for
|
# U4/R10: track reflect count for
|
||||||
# checkpoint reconstruction (KTD-7).
|
# checkpoint reconstruction (KTD-7).
|
||||||
self._reflect_count += 1
|
self._reflect_count += 1
|
||||||
|
|
@ -1574,7 +1569,7 @@ class ReActEngine:
|
||||||
data={
|
data={
|
||||||
"message": (
|
"message": (
|
||||||
f"验证失败,已注入错误信息让 LLM 自纠正 "
|
f"验证失败,已注入错误信息让 LLM 自纠正 "
|
||||||
f"(reinjection {reinjections}/{self._max_reinjections})"
|
f"(reinjection {self._reflect_count}/{self._max_reinjections})"
|
||||||
),
|
),
|
||||||
"verify_errors": vresult.errors,
|
"verify_errors": vresult.errors,
|
||||||
},
|
},
|
||||||
|
|
@ -1681,7 +1676,7 @@ class ReActEngine:
|
||||||
logger.info(
|
logger.info(
|
||||||
"Verification failed after %d reinjections, "
|
"Verification failed after %d reinjections, "
|
||||||
"%d reflections, interrupting with verify log",
|
"%d reflections, interrupting with verify log",
|
||||||
reinjections,
|
self._reflect_count,
|
||||||
self._reflection_count,
|
self._reflection_count,
|
||||||
)
|
)
|
||||||
break
|
break
|
||||||
|
|
@ -2136,7 +2131,7 @@ class ReActEngine:
|
||||||
in_verification = (
|
in_verification = (
|
||||||
self._sandbox is not None
|
self._sandbox is not None
|
||||||
and self._current_phase is not None
|
and self._current_phase is not None
|
||||||
and self._current_phase.value == "verification"
|
and self._current_phase == PhaseState.VERIFICATION
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -28,10 +28,24 @@ import contextlib
|
||||||
import errno
|
import errno
|
||||||
import logging
|
import logging
|
||||||
import socket
|
import socket
|
||||||
|
import threading
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Reentrancy counter for ``network_block``. Concurrent VERIFICATION phases
|
||||||
|
# (parallel PLAN_EXEC steps) each enter the context manager; only the first
|
||||||
|
# entry (0 -> 1) patches ``socket.socket.connect``, and only the last exit
|
||||||
|
# (1 -> 0) restores it. Naive save/restore would unpatch on the first exit
|
||||||
|
# while other phases are still expecting the block to be in effect, breaking
|
||||||
|
# sandboxing for any phase that started later.
|
||||||
|
# ponytail: process-wide counter — not subprocess-safe (inherited fork state
|
||||||
|
# is irrelevant because the monkey-patch lives in the parent's socket module).
|
||||||
|
_network_block_count: int = 0
|
||||||
|
_network_block_lock = threading.Lock()
|
||||||
|
_original_socket_connect = socket.socket.connect
|
||||||
|
_original_socket_connect_ex = socket.socket.connect_ex
|
||||||
|
|
||||||
|
|
||||||
class SandboxNetworkBlockedError(RuntimeError):
|
class SandboxNetworkBlockedError(RuntimeError):
|
||||||
"""Raised when a tool attempts an outbound network call under sandbox."""
|
"""Raised when a tool attempts an outbound network call under sandbox."""
|
||||||
|
|
@ -115,17 +129,23 @@ class WorkspaceSandbox:
|
||||||
"""Block outbound network connections within the async context.
|
"""Block outbound network connections within the async context.
|
||||||
|
|
||||||
Patches ``socket.socket.connect`` and ``connect_ex`` to raise /
|
Patches ``socket.socket.connect`` and ``connect_ex`` to raise /
|
||||||
return ``ECONNREFUSED`` respectively. Restores the originals on exit,
|
return ``ECONNREFUSED`` respectively. Restores the originals on the
|
||||||
even if the wrapped code raises.
|
last concurrent exit, even if the wrapped code raises.
|
||||||
|
|
||||||
Already-connected sockets (e.g. an LLM gateway keep-alive pool) are
|
Already-connected sockets (e.g. an LLM gateway keep-alive pool) are
|
||||||
unaffected — only *new* ``connect()`` calls are blocked. This is the
|
unaffected — only *new* ``connect()`` calls are blocked. This is the
|
||||||
correct granularity: the LLM gateway talks over its existing
|
correct granularity: the LLM gateway talks over its existing
|
||||||
connection, while a tool trying to ``requests.get(...)`` makes a new
|
connection, while a tool trying to ``requests.get(...)`` makes a new
|
||||||
connect and is rejected.
|
connect and is rejected.
|
||||||
|
|
||||||
|
Reentrancy: a module-level counter guards the patch. Concurrent
|
||||||
|
VERIFICATION phases (parallel PLAN_EXEC steps) each enter/exit; the
|
||||||
|
patch is engaged on count 0->1 and released on count 1->0. Without
|
||||||
|
this, the first exit would restore the original connect while later
|
||||||
|
phases are still expecting the block, terminating new LLM gateway /
|
||||||
|
Redis / PostgreSQL connections in those phases.
|
||||||
"""
|
"""
|
||||||
original_connect = socket.socket.connect
|
global _network_block_count # noqa: PLW0603
|
||||||
original_connect_ex = socket.socket.connect_ex
|
|
||||||
|
|
||||||
def _blocked_connect(self_sock, *args, **kwargs): # noqa: ANN001
|
def _blocked_connect(self_sock, *args, **kwargs): # noqa: ANN001
|
||||||
raise SandboxNetworkBlockedError(
|
raise SandboxNetworkBlockedError(
|
||||||
|
|
@ -136,15 +156,26 @@ class WorkspaceSandbox:
|
||||||
# connect_ex returns an errno instead of raising (POSIX contract).
|
# connect_ex returns an errno instead of raising (POSIX contract).
|
||||||
return errno.ECONNREFUSED
|
return errno.ECONNREFUSED
|
||||||
|
|
||||||
socket.socket.connect = _blocked_connect # type: ignore[method-assign]
|
with _network_block_lock:
|
||||||
socket.socket.connect_ex = _blocked_connect_ex # type: ignore[method-assign]
|
_network_block_count += 1
|
||||||
logger.debug("sandbox: network block engaged")
|
if _network_block_count == 1:
|
||||||
|
socket.socket.connect = _blocked_connect # type: ignore[method-assign]
|
||||||
|
socket.socket.connect_ex = _blocked_connect_ex # type: ignore[method-assign]
|
||||||
|
logger.debug("sandbox: network block engaged (count=1)")
|
||||||
try:
|
try:
|
||||||
yield
|
yield
|
||||||
finally:
|
finally:
|
||||||
socket.socket.connect = original_connect # type: ignore[method-assign]
|
with _network_block_lock:
|
||||||
socket.socket.connect_ex = original_connect_ex # type: ignore[method-assign]
|
_network_block_count -= 1
|
||||||
logger.debug("sandbox: network block released")
|
if _network_block_count == 0:
|
||||||
|
socket.socket.connect = _original_socket_connect # type: ignore[method-assign]
|
||||||
|
socket.socket.connect_ex = _original_socket_connect_ex # type: ignore[method-assign]
|
||||||
|
logger.debug("sandbox: network block released (count=0)")
|
||||||
|
else:
|
||||||
|
logger.debug(
|
||||||
|
"sandbox: network block still held (count=%d)",
|
||||||
|
_network_block_count,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def detect_verification_commands(workspace_root: str | Path | None) -> list[str]:
|
def detect_verification_commands(workspace_root: str | Path | None) -> list[str]:
|
||||||
|
|
|
||||||
|
|
@ -805,7 +805,14 @@ async def lifespan(app: FastAPI):
|
||||||
try:
|
try:
|
||||||
from agentkit.core.config_driven import drain_pending_evolution_tasks
|
from agentkit.core.config_driven import drain_pending_evolution_tasks
|
||||||
|
|
||||||
await drain_pending_evolution_tasks()
|
await asyncio.wait_for(drain_pending_evolution_tasks(), timeout=10.0)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
from agentkit.core.config_driven import _pending_evolution_tasks
|
||||||
|
|
||||||
|
logger.warning(
|
||||||
|
"drain_pending_evolution_tasks 超时 10s, %d 个任务被放弃",
|
||||||
|
len(_pending_evolution_tasks),
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.debug("drain_pending_evolution_tasks 异常已忽略", exc_info=True)
|
logger.debug("drain_pending_evolution_tasks 异常已忽略", exc_info=True)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1494,6 +1494,23 @@ async def _handle_chat_message(
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
# U8/R8: persist the spec_review_request so it survives a page reload.
|
||||||
|
# The frontend reconstructs the pending review card from the restored
|
||||||
|
# message metadata (spec_review_id + goal + steps).
|
||||||
|
try:
|
||||||
|
await sm.append_message(
|
||||||
|
session_id=session_id,
|
||||||
|
role=MessageRole.ASSISTANT,
|
||||||
|
content=f"[Spec Review] {goal}",
|
||||||
|
metadata={
|
||||||
|
"message_type": "spec_review_request",
|
||||||
|
"spec_review_id": spec_review_id,
|
||||||
|
"spec_review_goal": goal,
|
||||||
|
"spec_review_steps": steps,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.debug("Failed to persist spec_review_request", exc_info=True)
|
||||||
|
|
||||||
loop = asyncio.get_running_loop()
|
loop = asyncio.get_running_loop()
|
||||||
future: asyncio.Future[tuple[str, str]] = loop.create_future()
|
future: asyncio.Future[tuple[str, str]] = loop.create_future()
|
||||||
|
|
@ -1506,19 +1523,58 @@ async def _handle_chat_message(
|
||||||
# "failed") so the user can resume on return.
|
# "failed") so the user can resume on return.
|
||||||
decision, feedback = await asyncio.wait_for(future, timeout=1800.0)
|
decision, feedback = await asyncio.wait_for(future, timeout=1800.0)
|
||||||
logger.info(f"Spec review {spec_review_id} resolved: decision={decision!r}")
|
logger.info(f"Spec review {spec_review_id} resolved: decision={decision!r}")
|
||||||
|
# Persist the decision so the frontend can show the outcome after
|
||||||
|
# a reload (e.g. timeout→parked transition the user never saw).
|
||||||
|
try:
|
||||||
|
await sm.append_message(
|
||||||
|
session_id=session_id,
|
||||||
|
role=MessageRole.ASSISTANT,
|
||||||
|
content=f"[Spec Review Decision] {decision}: {feedback}",
|
||||||
|
metadata={
|
||||||
|
"message_type": "spec_review_reply",
|
||||||
|
"spec_review_id": spec_review_id,
|
||||||
|
"spec_review_decision": decision,
|
||||||
|
"spec_review_feedback": feedback,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.debug("Failed to persist spec_review_reply", exc_info=True)
|
||||||
return decision, feedback
|
return decision, feedback
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
logger.warning(f"Spec review {spec_review_id} timed out (30 min)")
|
logger.warning(f"Spec review {spec_review_id} timed out (30 min)")
|
||||||
|
# Persist the timeout→parked transition so the frontend can show
|
||||||
|
# the parked state after a reload.
|
||||||
|
try:
|
||||||
|
await sm.append_message(
|
||||||
|
session_id=session_id,
|
||||||
|
role=MessageRole.ASSISTANT,
|
||||||
|
content=f"[Spec Review Timed Out] {spec_review_id}",
|
||||||
|
metadata={
|
||||||
|
"message_type": "spec_review_reply",
|
||||||
|
"spec_review_id": spec_review_id,
|
||||||
|
"spec_review_decision": "parked",
|
||||||
|
"spec_review_feedback": "timed out (30 min)",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.debug("Failed to persist spec_review timeout", exc_info=True)
|
||||||
raise
|
raise
|
||||||
finally:
|
finally:
|
||||||
_pending_spec_reviews.pop(spec_review_id, None)
|
_pending_spec_reviews.pop(spec_review_id, None)
|
||||||
|
|
||||||
# Wire the handler onto a PlanExecEngine only (the WS PLAN_EXEC path uses
|
# U8/R8: spec review gate wiring. The WS PLAN_EXEC path uses
|
||||||
# a ReActEngine + phase_policy, where this is a no-op). Local import to
|
# ``_build_phase_engine`` which returns a ``ReActEngine`` with
|
||||||
# avoid a top-level dependency that the WS path doesn't need.
|
# ``phase_policy`` (NOT a ``PlanExecEngine``), so the gate cannot be
|
||||||
from agentkit.core.plan_exec_engine import PlanExecEngine as _PlanExecEngine
|
# wired here — ``ReActEngine`` does not read ``_spec_review_handler``.
|
||||||
|
# The gate only fires when ``ConfigDrivenAgent.execute_stream`` →
|
||||||
if isinstance(react_engine, _PlanExecEngine):
|
# ``_handle_plan_exec_stream`` → ``PlanExecEngine.execute_stream`` runs,
|
||||||
|
# which is the portal/task path (not the WS chat path).
|
||||||
|
# ponytail: known ceiling — WS chat PLAN_EXEC (phase_policy mechanism)
|
||||||
|
# does not support spec review. Upgrade path: route WS PLAN_EXEC through
|
||||||
|
# ``ConfigDrivenAgent.execute_stream`` to unify with the portal path and
|
||||||
|
# inherit the gate. The ``_spec_review_handler`` closure + event handlers
|
||||||
|
# below are kept so the upgrade is a routing change, not a rewrite.
|
||||||
|
if hasattr(react_engine, "_spec_review_handler"):
|
||||||
react_engine._spec_review_handler = _spec_review_handler
|
react_engine._spec_review_handler = _spec_review_handler
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ from pydantic import BaseModel
|
||||||
|
|
||||||
from agentkit.core.config_driven import ConfigDrivenAgent
|
from agentkit.core.config_driven import ConfigDrivenAgent
|
||||||
from agentkit.core.event_queue import EventQueue
|
from agentkit.core.event_queue import EventQueue
|
||||||
from agentkit.core.protocol import Event, TaskEventType, TaskStatus, TurnEventType
|
from agentkit.core.protocol import Event, TaskEventType, TaskMessage, TaskStatus, TurnEventType
|
||||||
from agentkit.core.react import ReActEngine
|
from agentkit.core.react import ReActEngine
|
||||||
from agentkit.chat.skill_routing import ExecutionMode, SkillRoutingResult
|
from agentkit.chat.skill_routing import ExecutionMode, SkillRoutingResult
|
||||||
from agentkit.chat.request_preprocessor import RequestPreprocessor
|
from agentkit.chat.request_preprocessor import RequestPreprocessor
|
||||||
|
|
@ -73,6 +73,42 @@ def _ensure_non_empty(text: str | None) -> str:
|
||||||
return EMPTY_LLM_RESPONSE
|
return EMPTY_LLM_RESPONSE
|
||||||
|
|
||||||
|
|
||||||
|
def _build_portal_task(
|
||||||
|
*,
|
||||||
|
agent_name: str,
|
||||||
|
messages: list[dict[str, str]],
|
||||||
|
system_prompt: str | None,
|
||||||
|
timeout_seconds: float | None,
|
||||||
|
conversation_id: str | None = None,
|
||||||
|
task_id: str | None = None,
|
||||||
|
) -> TaskMessage:
|
||||||
|
"""Construct a TaskMessage for routing through ConfigDrivenAgent.execute_stream.
|
||||||
|
|
||||||
|
The portal builds messages externally (history + user message). The
|
||||||
|
``messages`` key in input_data tells _build_llm_messages to use them
|
||||||
|
directly instead of rendering the prompt template. This lets the portal
|
||||||
|
inherit evolution hooks + trace_outcome propagation from execute_stream's
|
||||||
|
finally block (KTD-4/KTD-8).
|
||||||
|
"""
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
return TaskMessage(
|
||||||
|
task_id=task_id or str(uuid.uuid4()),
|
||||||
|
agent_name=agent_name,
|
||||||
|
task_type="chat",
|
||||||
|
priority=0,
|
||||||
|
input_data={
|
||||||
|
"messages": messages,
|
||||||
|
"system_prompt": system_prompt,
|
||||||
|
"content": messages[-1].get("content", "") if messages else "",
|
||||||
|
},
|
||||||
|
callback_url=None,
|
||||||
|
created_at=datetime.now(timezone.utc),
|
||||||
|
timeout_seconds=int(timeout_seconds) if timeout_seconds else 300,
|
||||||
|
conversation_id=conversation_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def _emit_event_safe(
|
async def _emit_event_safe(
|
||||||
event_queue: EventQueue | None,
|
event_queue: EventQueue | None,
|
||||||
event_type: str,
|
event_type: str,
|
||||||
|
|
@ -556,38 +592,39 @@ async def chat(request: ChatRequest, req: Request, _auth: None = Depends(_verify
|
||||||
)
|
)
|
||||||
|
|
||||||
react_config = agent.get_react_config()
|
react_config = agent.get_react_config()
|
||||||
react_engine = getattr(agent, "_react_engine", None)
|
# KTD-4/KTD-8: route through ConfigDrivenAgent.execute_stream so the
|
||||||
if react_engine is None:
|
# finally block fires evolution hooks + propagates trace_outcome. The
|
||||||
react_engine = ReActEngine(
|
# portal builds messages externally; _build_portal_task packages them
|
||||||
|
# into a TaskMessage whose input_data["messages"] is used directly by
|
||||||
|
# _build_llm_messages (bypassing the prompt template).
|
||||||
|
_react_engine = getattr(agent, "_react_engine", None)
|
||||||
|
if _react_engine is None:
|
||||||
|
_react_engine = ReActEngine(
|
||||||
llm_gateway=llm_gateway,
|
llm_gateway=llm_gateway,
|
||||||
max_steps=react_config["max_steps"],
|
max_steps=react_config["max_steps"],
|
||||||
)
|
)
|
||||||
|
agent._react_engine = _react_engine
|
||||||
else:
|
else:
|
||||||
react_engine.reset()
|
_react_engine.reset()
|
||||||
|
|
||||||
messages = [{"role": "user", "content": request.message}]
|
messages = [{"role": "user", "content": request.message}]
|
||||||
# Inject conversation history
|
# Inject conversation history
|
||||||
history_msgs = await _build_history_messages(conv.id)
|
history_msgs = await _build_history_messages(conv.id)
|
||||||
for hm in reversed(history_msgs):
|
for hm in reversed(history_msgs):
|
||||||
messages.insert(0, hm)
|
messages.insert(0, hm)
|
||||||
tools = agent.get_tools()
|
|
||||||
model = agent.get_model()
|
|
||||||
system_prompt = getattr(agent, "_system_prompt", None) or agent.get_system_prompt()
|
system_prompt = getattr(agent, "_system_prompt", None) or agent.get_system_prompt()
|
||||||
timeout_seconds = react_config["timeout_seconds"]
|
timeout_seconds = react_config["timeout_seconds"]
|
||||||
|
|
||||||
|
portal_task = _build_portal_task(
|
||||||
|
agent_name=agent.name,
|
||||||
|
messages=messages,
|
||||||
|
system_prompt=system_prompt,
|
||||||
|
timeout_seconds=timeout_seconds,
|
||||||
|
conversation_id=conv.id,
|
||||||
|
)
|
||||||
collected_output: list[str] = []
|
collected_output: list[str] = []
|
||||||
try:
|
try:
|
||||||
# U2 verify: calls react_engine.execute_stream directly, bypassing
|
async for event in agent.execute_stream(portal_task):
|
||||||
# ConfigDrivenAgent.execute_stream — evolution hooks NOT propagated
|
|
||||||
# here. Routing through agent.execute_stream is tracked separately.
|
|
||||||
async for event in react_engine.execute_stream(
|
|
||||||
messages=messages,
|
|
||||||
tools=tools,
|
|
||||||
model=model,
|
|
||||||
agent_name=agent.name,
|
|
||||||
system_prompt=system_prompt,
|
|
||||||
timeout_seconds=timeout_seconds,
|
|
||||||
):
|
|
||||||
if event.event_type == "final_answer":
|
if event.event_type == "final_answer":
|
||||||
collected_output.append(event.data.get("output", ""))
|
collected_output.append(event.data.get("output", ""))
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
|
|
@ -684,34 +721,32 @@ async def chat_stream(request: ChatRequest, req: Request, _auth: None = Depends(
|
||||||
)
|
)
|
||||||
|
|
||||||
react_config = agent.get_react_config()
|
react_config = agent.get_react_config()
|
||||||
react_engine = getattr(agent, "_react_engine", None)
|
# KTD-4/KTD-8: route through ConfigDrivenAgent.execute_stream
|
||||||
if react_engine is None:
|
# (evolution hooks + trace_outcome propagation in finally block).
|
||||||
react_engine = ReActEngine(
|
_react_engine = getattr(agent, "_react_engine", None)
|
||||||
|
if _react_engine is None:
|
||||||
|
_react_engine = ReActEngine(
|
||||||
llm_gateway=llm_gateway,
|
llm_gateway=llm_gateway,
|
||||||
max_steps=react_config["max_steps"],
|
max_steps=react_config["max_steps"],
|
||||||
)
|
)
|
||||||
|
agent._react_engine = _react_engine
|
||||||
else:
|
else:
|
||||||
react_engine.reset()
|
_react_engine.reset()
|
||||||
|
|
||||||
messages = [{"role": "user", "content": request.message}]
|
messages = [{"role": "user", "content": request.message}]
|
||||||
tools = agent.get_tools()
|
|
||||||
model = agent.get_model()
|
|
||||||
system_prompt = getattr(agent, "_system_prompt", None) or agent.get_system_prompt()
|
system_prompt = getattr(agent, "_system_prompt", None) or agent.get_system_prompt()
|
||||||
timeout_seconds = react_config["timeout_seconds"]
|
timeout_seconds = react_config["timeout_seconds"]
|
||||||
|
|
||||||
|
portal_task = _build_portal_task(
|
||||||
|
agent_name=agent.name,
|
||||||
|
messages=messages,
|
||||||
|
system_prompt=system_prompt,
|
||||||
|
timeout_seconds=timeout_seconds,
|
||||||
|
conversation_id=conv.id,
|
||||||
|
)
|
||||||
collected_output: list[str] = []
|
collected_output: list[str] = []
|
||||||
try:
|
try:
|
||||||
# U2 verify: calls react_engine.execute_stream directly, bypassing
|
async for event in agent.execute_stream(portal_task):
|
||||||
# ConfigDrivenAgent.execute_stream — evolution hooks NOT propagated
|
|
||||||
# here. Routing through agent.execute_stream is tracked separately.
|
|
||||||
async for event in react_engine.execute_stream(
|
|
||||||
messages=messages,
|
|
||||||
tools=tools,
|
|
||||||
model=model,
|
|
||||||
agent_name=agent.name,
|
|
||||||
system_prompt=system_prompt,
|
|
||||||
timeout_seconds=timeout_seconds,
|
|
||||||
):
|
|
||||||
if event.event_type == "final_answer":
|
if event.event_type == "final_answer":
|
||||||
collected_output.append(event.data.get("output", ""))
|
collected_output.append(event.data.get("output", ""))
|
||||||
yield {
|
yield {
|
||||||
|
|
@ -967,11 +1002,8 @@ def _derive_title_from_messages(messages: list) -> str:
|
||||||
|
|
||||||
|
|
||||||
async def _execute_react_background(
|
async def _execute_react_background(
|
||||||
react_engine: ReActEngine,
|
agent: ConfigDrivenAgent,
|
||||||
messages: list[dict],
|
messages: list[dict],
|
||||||
tools: list,
|
|
||||||
model: str,
|
|
||||||
agent_name: str,
|
|
||||||
system_prompt: str | None,
|
system_prompt: str | None,
|
||||||
timeout_seconds: float | None,
|
timeout_seconds: float | None,
|
||||||
conv_id: str,
|
conv_id: str,
|
||||||
|
|
@ -987,6 +1019,10 @@ async def _execute_react_background(
|
||||||
Results are always persisted to the conversation store, regardless of
|
Results are always persisted to the conversation store, regardless of
|
||||||
whether a WebSocket subscriber is active.
|
whether a WebSocket subscriber is active.
|
||||||
Task status is tracked in TaskStore when provided.
|
Task status is tracked in TaskStore when provided.
|
||||||
|
|
||||||
|
KTD-4/KTD-8: routes through ``agent.execute_stream`` (not
|
||||||
|
``react_engine.execute_stream`` directly) so the finally block fires
|
||||||
|
evolution hooks and propagates trace_outcome.
|
||||||
"""
|
"""
|
||||||
collected_output: list[str] = []
|
collected_output: list[str] = []
|
||||||
try:
|
try:
|
||||||
|
|
@ -1005,17 +1041,15 @@ async def _execute_react_background(
|
||||||
):
|
):
|
||||||
logger.warning("Failed to update TaskStore RUNNING", exc_info=True)
|
logger.warning("Failed to update TaskStore RUNNING", exc_info=True)
|
||||||
|
|
||||||
# U2 verify: calls react_engine.execute_stream directly, bypassing
|
portal_task = _build_portal_task(
|
||||||
# ConfigDrivenAgent.execute_stream — evolution hooks NOT propagated
|
agent_name=agent.name,
|
||||||
# here. Routing through agent.execute_stream is tracked separately.
|
|
||||||
async for event in react_engine.execute_stream(
|
|
||||||
messages=messages,
|
messages=messages,
|
||||||
tools=tools,
|
|
||||||
model=model,
|
|
||||||
agent_name=agent_name,
|
|
||||||
system_prompt=system_prompt,
|
system_prompt=system_prompt,
|
||||||
timeout_seconds=timeout_seconds,
|
timeout_seconds=timeout_seconds,
|
||||||
):
|
conversation_id=conv_id,
|
||||||
|
task_id=task_id,
|
||||||
|
)
|
||||||
|
async for event in agent.execute_stream(portal_task):
|
||||||
if event.event_type == "final_answer":
|
if event.event_type == "final_answer":
|
||||||
collected_output.append(event.data.get("output", ""))
|
collected_output.append(event.data.get("output", ""))
|
||||||
|
|
||||||
|
|
@ -1219,6 +1253,14 @@ async def portal_websocket(websocket: WebSocket):
|
||||||
task_id: str | None = None
|
task_id: str | None = None
|
||||||
# Track the active background task so cancel can propagate to it.
|
# Track the active background task so cancel can propagate to it.
|
||||||
active_bg_task: asyncio.Task | None = None
|
active_bg_task: asyncio.Task | None = None
|
||||||
|
# U8/R8: pending spec review futures. The portal WS path doesn't wire
|
||||||
|
# _spec_review_handler on the agent (the background task architecture
|
||||||
|
# makes EventQueue-based request/reply non-trivial), so this dict is
|
||||||
|
# typically empty. It exists so stale spec_review_reply messages from
|
||||||
|
# the frontend are handled gracefully instead of silently ignored.
|
||||||
|
# ponytail: upgrade path — wire _spec_review_handler via EventQueue +
|
||||||
|
# future, mirroring chat.py's _spec_review_handler closure.
|
||||||
|
pending_spec_reviews: dict[str, asyncio.Future[tuple[str, str]]] = {}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
|
|
@ -1256,6 +1298,32 @@ async def portal_websocket(websocket: WebSocket):
|
||||||
await websocket.send_json({"type": "pong"})
|
await websocket.send_json({"type": "pong"})
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if msg_type == "spec_review_reply":
|
||||||
|
# U8/R8: mirror chat.py:1126 — resolve a pending spec review
|
||||||
|
# future. Typically a no-op in the portal WS path (the
|
||||||
|
# _spec_review_handler isn't wired), but handles stale replies
|
||||||
|
# gracefully.
|
||||||
|
spec_review_id = msg.get("spec_review_id")
|
||||||
|
decision = msg.get("decision", "rejected")
|
||||||
|
feedback = msg.get("feedback", "")
|
||||||
|
logger.info(
|
||||||
|
f"Received spec_review_reply: id={spec_review_id!r}, decision={decision!r}"
|
||||||
|
)
|
||||||
|
if spec_review_id and spec_review_id in pending_spec_reviews:
|
||||||
|
fut = pending_spec_reviews[spec_review_id]
|
||||||
|
if not fut.done():
|
||||||
|
fut.set_result((decision, feedback))
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"spec_review_reply {spec_review_id!r} already resolved"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"spec_review_reply {spec_review_id!r} not found in "
|
||||||
|
f"pending_spec_reviews — ignoring"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
if msg_type == "resume":
|
if msg_type == "resume":
|
||||||
# Frontend reconnected and wants to resume a running task
|
# Frontend reconnected and wants to resume a running task
|
||||||
resume_task_id = msg.get("task_id", "")
|
resume_task_id = msg.get("task_id", "")
|
||||||
|
|
@ -1800,15 +1868,17 @@ async def portal_websocket(websocket: WebSocket):
|
||||||
|
|
||||||
# Execute via ReAct stream
|
# Execute via ReAct stream
|
||||||
react_config = agent.get_react_config()
|
react_config = agent.get_react_config()
|
||||||
# Reuse agent's ReActEngine if available (aligned with chat.py pattern)
|
# KTD-4/KTD-8: route through ConfigDrivenAgent.execute_stream
|
||||||
react_engine = getattr(agent, "_react_engine", None)
|
# (evolution hooks + trace_outcome propagation in finally block).
|
||||||
if react_engine is None:
|
_react_engine = getattr(agent, "_react_engine", None)
|
||||||
react_engine = ReActEngine(
|
if _react_engine is None:
|
||||||
|
_react_engine = ReActEngine(
|
||||||
llm_gateway=llm_gateway,
|
llm_gateway=llm_gateway,
|
||||||
max_steps=react_config["max_steps"],
|
max_steps=react_config["max_steps"],
|
||||||
)
|
)
|
||||||
|
agent._react_engine = _react_engine
|
||||||
else:
|
else:
|
||||||
react_engine.reset()
|
_react_engine.reset()
|
||||||
|
|
||||||
messages = [{"role": "user", "content": message_text}]
|
messages = [{"role": "user", "content": message_text}]
|
||||||
# Inject conversation history for context continuity
|
# Inject conversation history for context continuity
|
||||||
|
|
@ -1829,11 +1899,8 @@ async def portal_websocket(websocket: WebSocket):
|
||||||
# background task continues running and persists the result.
|
# background task continues running and persists the result.
|
||||||
bg_task = asyncio.create_task(
|
bg_task = asyncio.create_task(
|
||||||
_execute_react_background(
|
_execute_react_background(
|
||||||
react_engine=react_engine,
|
agent=agent,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
tools=tools,
|
|
||||||
model=model,
|
|
||||||
agent_name=agent.name,
|
|
||||||
system_prompt=system_prompt,
|
system_prompt=system_prompt,
|
||||||
timeout_seconds=timeout_seconds,
|
timeout_seconds=timeout_seconds,
|
||||||
conv_id=conv.id,
|
conv_id=conv.id,
|
||||||
|
|
|
||||||
|
|
@ -38,10 +38,12 @@ class FakeConversationStore:
|
||||||
class FakeReactEngine:
|
class FakeReactEngine:
|
||||||
"""Fake ReAct engine that yields events from a predefined list."""
|
"""Fake ReAct engine that yields events from a predefined list."""
|
||||||
|
|
||||||
|
name = "test-agent"
|
||||||
|
|
||||||
def __init__(self, events: list[Event]) -> None:
|
def __init__(self, events: list[Event]) -> None:
|
||||||
self._events = events
|
self._events = events
|
||||||
|
|
||||||
async def execute_stream(self, **kwargs):
|
async def execute_stream(self, task):
|
||||||
for event in self._events:
|
for event in self._events:
|
||||||
yield event
|
yield event
|
||||||
|
|
||||||
|
|
@ -49,11 +51,13 @@ class FakeReactEngine:
|
||||||
class FailingReactEngine:
|
class FailingReactEngine:
|
||||||
"""Fake ReAct engine that raises an exception after yielding some events."""
|
"""Fake ReAct engine that raises an exception after yielding some events."""
|
||||||
|
|
||||||
|
name = "test-agent"
|
||||||
|
|
||||||
def __init__(self, events: list[Event], error: Exception) -> None:
|
def __init__(self, events: list[Event], error: Exception) -> None:
|
||||||
self._events = events
|
self._events = events
|
||||||
self._error = error
|
self._error = error
|
||||||
|
|
||||||
async def execute_stream(self, **kwargs):
|
async def execute_stream(self, task):
|
||||||
for event in self._events:
|
for event in self._events:
|
||||||
yield event
|
yield event
|
||||||
raise self._error
|
raise self._error
|
||||||
|
|
@ -76,11 +80,13 @@ def _make_event(
|
||||||
class SlowFakeReactEngine:
|
class SlowFakeReactEngine:
|
||||||
"""Fake ReAct engine with a delay to allow status checks during execution."""
|
"""Fake ReAct engine with a delay to allow status checks during execution."""
|
||||||
|
|
||||||
|
name = "test-agent"
|
||||||
|
|
||||||
def __init__(self, events: list[Event], delay: float = 0.1) -> None:
|
def __init__(self, events: list[Event], delay: float = 0.1) -> None:
|
||||||
self._events = events
|
self._events = events
|
||||||
self._delay = delay
|
self._delay = delay
|
||||||
|
|
||||||
async def execute_stream(self, **kwargs):
|
async def execute_stream(self, task):
|
||||||
for event in self._events:
|
for event in self._events:
|
||||||
await asyncio.sleep(self._delay)
|
await asyncio.sleep(self._delay)
|
||||||
yield event
|
yield event
|
||||||
|
|
@ -93,11 +99,13 @@ class CancellableReactEngine:
|
||||||
Event so the test can cancel the task and verify CancelledError cleanup.
|
Event so the test can cancel the task and verify CancelledError cleanup.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
name = "test-agent"
|
||||||
|
|
||||||
def __init__(self, first_event: Event) -> None:
|
def __init__(self, first_event: Event) -> None:
|
||||||
self._first_event = first_event
|
self._first_event = first_event
|
||||||
self.started = asyncio.Event()
|
self.started = asyncio.Event()
|
||||||
|
|
||||||
async def execute_stream(self, **kwargs):
|
async def execute_stream(self, task):
|
||||||
yield self._first_event
|
yield self._first_event
|
||||||
self.started.set()
|
self.started.set()
|
||||||
# Block forever until cancelled
|
# Block forever until cancelled
|
||||||
|
|
@ -130,11 +138,8 @@ class TestExecuteReactBackground:
|
||||||
eq = EventQueue()
|
eq = EventQueue()
|
||||||
|
|
||||||
await _execute_react_background(
|
await _execute_react_background(
|
||||||
react_engine=engine,
|
agent=engine,
|
||||||
messages=[],
|
messages=[],
|
||||||
tools=[],
|
|
||||||
model="test-model",
|
|
||||||
agent_name="test-agent",
|
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
timeout_seconds=None,
|
timeout_seconds=None,
|
||||||
conv_id="test-conv",
|
conv_id="test-conv",
|
||||||
|
|
@ -162,11 +167,8 @@ class TestExecuteReactBackground:
|
||||||
eq = EventQueue()
|
eq = EventQueue()
|
||||||
|
|
||||||
await _execute_react_background(
|
await _execute_react_background(
|
||||||
react_engine=engine,
|
agent=engine,
|
||||||
messages=[],
|
messages=[],
|
||||||
tools=[],
|
|
||||||
model="test-model",
|
|
||||||
agent_name="test-agent",
|
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
timeout_seconds=None,
|
timeout_seconds=None,
|
||||||
conv_id="test-conv",
|
conv_id="test-conv",
|
||||||
|
|
@ -190,11 +192,8 @@ class TestExecuteReactBackground:
|
||||||
eq = EventQueue()
|
eq = EventQueue()
|
||||||
|
|
||||||
await _execute_react_background(
|
await _execute_react_background(
|
||||||
react_engine=engine,
|
agent=engine,
|
||||||
messages=[],
|
messages=[],
|
||||||
tools=[],
|
|
||||||
model="test-model",
|
|
||||||
agent_name="test-agent",
|
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
timeout_seconds=None,
|
timeout_seconds=None,
|
||||||
conv_id="test-conv",
|
conv_id="test-conv",
|
||||||
|
|
@ -228,11 +227,8 @@ class TestExecuteReactBackground:
|
||||||
await asyncio.sleep(0.05)
|
await asyncio.sleep(0.05)
|
||||||
|
|
||||||
await _execute_react_background(
|
await _execute_react_background(
|
||||||
react_engine=engine,
|
agent=engine,
|
||||||
messages=[],
|
messages=[],
|
||||||
tools=[],
|
|
||||||
model="test-model",
|
|
||||||
agent_name="test-agent",
|
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
timeout_seconds=None,
|
timeout_seconds=None,
|
||||||
conv_id="test-conv",
|
conv_id="test-conv",
|
||||||
|
|
@ -270,11 +266,8 @@ class TestExecuteReactBackground:
|
||||||
await asyncio.sleep(0.05)
|
await asyncio.sleep(0.05)
|
||||||
|
|
||||||
await _execute_react_background(
|
await _execute_react_background(
|
||||||
react_engine=engine,
|
agent=engine,
|
||||||
messages=[],
|
messages=[],
|
||||||
tools=[],
|
|
||||||
model="test-model",
|
|
||||||
agent_name="test-agent",
|
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
timeout_seconds=None,
|
timeout_seconds=None,
|
||||||
conv_id="test-conv",
|
conv_id="test-conv",
|
||||||
|
|
@ -318,11 +311,8 @@ class TestTaskStoreIntegration:
|
||||||
# Start background task
|
# Start background task
|
||||||
bg_task = asyncio.create_task(
|
bg_task = asyncio.create_task(
|
||||||
_execute_react_background(
|
_execute_react_background(
|
||||||
react_engine=engine,
|
agent=engine,
|
||||||
messages=[],
|
messages=[],
|
||||||
tools=[],
|
|
||||||
model="test-model",
|
|
||||||
agent_name="test-agent",
|
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
timeout_seconds=None,
|
timeout_seconds=None,
|
||||||
conv_id="test-conv",
|
conv_id="test-conv",
|
||||||
|
|
@ -365,11 +355,8 @@ class TestTaskStoreIntegration:
|
||||||
)
|
)
|
||||||
|
|
||||||
await _execute_react_background(
|
await _execute_react_background(
|
||||||
react_engine=engine,
|
agent=engine,
|
||||||
messages=[],
|
messages=[],
|
||||||
tools=[],
|
|
||||||
model="test-model",
|
|
||||||
agent_name="test-agent",
|
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
timeout_seconds=None,
|
timeout_seconds=None,
|
||||||
conv_id="test-conv",
|
conv_id="test-conv",
|
||||||
|
|
@ -394,11 +381,8 @@ class TestTaskStoreIntegration:
|
||||||
|
|
||||||
# Should not raise
|
# Should not raise
|
||||||
await _execute_react_background(
|
await _execute_react_background(
|
||||||
react_engine=engine,
|
agent=engine,
|
||||||
messages=[],
|
messages=[],
|
||||||
tools=[],
|
|
||||||
model="test-model",
|
|
||||||
agent_name="test-agent",
|
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
timeout_seconds=None,
|
timeout_seconds=None,
|
||||||
conv_id="test-conv",
|
conv_id="test-conv",
|
||||||
|
|
@ -552,11 +536,8 @@ class TestCancelledErrorPath:
|
||||||
|
|
||||||
bg_task = asyncio.create_task(
|
bg_task = asyncio.create_task(
|
||||||
_execute_react_background(
|
_execute_react_background(
|
||||||
react_engine=engine,
|
agent=engine,
|
||||||
messages=[],
|
messages=[],
|
||||||
tools=[],
|
|
||||||
model="test-model",
|
|
||||||
agent_name="test-agent",
|
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
timeout_seconds=None,
|
timeout_seconds=None,
|
||||||
conv_id="test-conv",
|
conv_id="test-conv",
|
||||||
|
|
@ -590,11 +571,8 @@ class TestCancelledErrorPath:
|
||||||
|
|
||||||
bg_task = asyncio.create_task(
|
bg_task = asyncio.create_task(
|
||||||
_execute_react_background(
|
_execute_react_background(
|
||||||
react_engine=engine,
|
agent=engine,
|
||||||
messages=[],
|
messages=[],
|
||||||
tools=[],
|
|
||||||
model="test-model",
|
|
||||||
agent_name="test-agent",
|
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
timeout_seconds=None,
|
timeout_seconds=None,
|
||||||
conv_id="test-conv",
|
conv_id="test-conv",
|
||||||
|
|
@ -636,11 +614,8 @@ class TestCancelledErrorPath:
|
||||||
|
|
||||||
bg_task = asyncio.create_task(
|
bg_task = asyncio.create_task(
|
||||||
_execute_react_background(
|
_execute_react_background(
|
||||||
react_engine=engine,
|
agent=engine,
|
||||||
messages=[],
|
messages=[],
|
||||||
tools=[],
|
|
||||||
model="test-model",
|
|
||||||
agent_name="test-agent",
|
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
timeout_seconds=None,
|
timeout_seconds=None,
|
||||||
conv_id="test-conv",
|
conv_id="test-conv",
|
||||||
|
|
@ -769,11 +744,8 @@ class TestCancelPropagation:
|
||||||
# Simulate the background task as portal.py would create it
|
# Simulate the background task as portal.py would create it
|
||||||
active_bg_task: asyncio.Task | None = asyncio.create_task(
|
active_bg_task: asyncio.Task | None = asyncio.create_task(
|
||||||
_execute_react_background(
|
_execute_react_background(
|
||||||
react_engine=engine,
|
agent=engine,
|
||||||
messages=[],
|
messages=[],
|
||||||
tools=[],
|
|
||||||
model="test-model",
|
|
||||||
agent_name="test-agent",
|
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
timeout_seconds=None,
|
timeout_seconds=None,
|
||||||
conv_id="cancel-conv",
|
conv_id="cancel-conv",
|
||||||
|
|
@ -814,11 +786,8 @@ class TestCancelPropagation:
|
||||||
|
|
||||||
bg_task = asyncio.create_task(
|
bg_task = asyncio.create_task(
|
||||||
_execute_react_background(
|
_execute_react_background(
|
||||||
react_engine=engine,
|
agent=engine,
|
||||||
messages=[],
|
messages=[],
|
||||||
tools=[],
|
|
||||||
model="test-model",
|
|
||||||
agent_name="test-agent",
|
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
timeout_seconds=None,
|
timeout_seconds=None,
|
||||||
conv_id="test-conv",
|
conv_id="test-conv",
|
||||||
|
|
@ -865,11 +834,8 @@ class TestWebSocketDisconnectNoCancel:
|
||||||
# Start the background task (as portal.py would)
|
# Start the background task (as portal.py would)
|
||||||
bg_task = asyncio.create_task(
|
bg_task = asyncio.create_task(
|
||||||
_execute_react_background(
|
_execute_react_background(
|
||||||
react_engine=engine,
|
agent=engine,
|
||||||
messages=[],
|
messages=[],
|
||||||
tools=[],
|
|
||||||
model="test-model",
|
|
||||||
agent_name="test-agent",
|
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
timeout_seconds=None,
|
timeout_seconds=None,
|
||||||
conv_id="test-conv",
|
conv_id="test-conv",
|
||||||
|
|
@ -912,11 +878,8 @@ class TestWebSocketDisconnectNoCancel:
|
||||||
|
|
||||||
bg_task = asyncio.create_task(
|
bg_task = asyncio.create_task(
|
||||||
_execute_react_background(
|
_execute_react_background(
|
||||||
react_engine=engine,
|
agent=engine,
|
||||||
messages=[],
|
messages=[],
|
||||||
tools=[],
|
|
||||||
model="test-model",
|
|
||||||
agent_name="test-agent",
|
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
timeout_seconds=None,
|
timeout_seconds=None,
|
||||||
conv_id="resume-conv",
|
conv_id="resume-conv",
|
||||||
|
|
|
||||||
|
|
@ -112,7 +112,8 @@ class TestExecuteStreamHooks:
|
||||||
assert events[0].event_type == "final_answer"
|
assert events[0].event_type == "final_answer"
|
||||||
assert len(fired) == 1
|
assert len(fired) == 1
|
||||||
assert fired[0].status == TaskStatus.COMPLETED
|
assert fired[0].status == TaskStatus.COMPLETED
|
||||||
assert fired[0].output_data == {"content": "hello world"}
|
# KTD-8: output_data includes trace_outcome for lifecycle._is_failure_path()
|
||||||
|
assert fired[0].output_data == {"content": "hello world", "trace_outcome": "success"}
|
||||||
|
|
||||||
async def test_failure_fires_on_task_failed(self):
|
async def test_failure_fires_on_task_failed(self):
|
||||||
"""Stream exception fires evolve_after_task with FAILED status."""
|
"""Stream exception fires evolve_after_task with FAILED status."""
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue