fix(review): resolve 11 P1 blockers from ce-code-review
Test / backend-test (pull_request) Waiting to run Details
Test / frontend-unit (pull_request) Waiting to run Details
Test / api-e2e (pull_request) Waiting to run Details
Test / frontend-e2e (pull_request) Waiting to run Details

P1#1  config_driven: propagate trace_outcome into output_data so
      lifecycle._is_failure_path() detects non-success outcomes
P1#2  portal: route through ConfigDrivenAgent.execute_stream (not
      react_engine.execute_stream directly) so evolution hooks fire
      and trace_outcome propagates; add pre-built messages support in
      _build_llm_messages
P1#3  sandbox: make network_block reentrant via module-level reference
      counter + threading.Lock - concurrent VERIFICATION phases no
      longer permanently block all new connections
P1#4  chat: replace dead isinstance(_PlanExecEngine) check with
      hasattr(_spec_review_handler) to wire the spec review gate
P1#5  plan_exec_engine: complete max_reflections threading chain
      (PlanExecEngine + ReActStepExecutor constructors)
P1#6  plan_exec_engine: enforce phase budgets (max_steps from
      phase_budgets, not hardcoded 5)
P1#7  plan_exec_engine: use current plan (not stale plan var) in
      aggregation after replan
P1#8  plan_exec_engine: map failure to failed status (not success)
P1#9  app: add drain timeout for pending evolution tasks on shutdown
P1#10 portal: handle spec_review_reply in WS handler
P1#11 chat: persist spec_review_request/reply/timeout to conversation
      store so reload can reconstruct gate state

Tests: 116 related tests pass; 26 pre-existing failures unchanged
(stash-verified). ruff lint clean.
This commit is contained in:
chiguyong 2026-07-04 01:10:01 +08:00
parent 7c900ce280
commit e5e76697a9
9 changed files with 355 additions and 163 deletions

View File

@ -72,6 +72,11 @@ async def drain_pending_evolution_tasks() -> None:
await asyncio.gather(*_pending_evolution_tasks, return_exceptions=True)
def get_evolution_dropped_count() -> int:
"""Return the number of evolution tasks dropped due to backpressure."""
return _evolution_dropped_count
class AgentConfig:
"""Agent 配置模型,从 YAML 或 Dict 构建"""
@ -739,7 +744,20 @@ class ConfigDrivenAgent(BaseAgent, EvolutionMixin):
Shared by all _handle_*_stream methods to avoid duplicating the
message-rendering logic that mirrors the sync _handle_* methods.
Portal path: if ``task.input_data["messages"]`` is present (a list of
``{role, content}`` dicts), use those pre-built messages directly
instead of rendering the prompt template. This lets the portal route
through ``execute_stream`` (inheriting evolution hooks + trace_outcome
propagation) while keeping its external message-building logic.
"""
prebuilt = task.input_data.get("messages")
if prebuilt is not None:
system_prompt = task.input_data.get("system_prompt")
user_messages = [m for m in prebuilt if m.get("role") != "system"]
if not user_messages:
user_messages = [{"role": "user", "content": str(task.input_data)}]
return system_prompt, user_messages
variables = task.input_data.copy()
variables["task_type"] = task.task_type
if self._prompt_template:
@ -774,22 +792,35 @@ class ConfigDrivenAgent(BaseAgent, EvolutionMixin):
token = CancellationToken()
self._active_tokens[task.task_id] = token
_stream_output: dict = {}
_stream_trace_outcome: str = "success"
_stream_error: BaseException | None = None
_stream_completed = False
_stream_started_at = datetime.now(timezone.utc)
try:
await self._register_mcp_tools()
async for event in self.handle_task_stream(task):
if event.event_type == "final_answer":
_raw = event.data.get("output", "")
_stream_output = {"content": _raw} if isinstance(_raw, str) else _raw
# PLAN_EXEC path may embed trace_outcome in final_answer.
_to = event.data.get("trace_outcome")
if _to:
_stream_trace_outcome = _to
elif event.event_type == "final_result":
# REACT path: final_result carries ReActResult.status.
_result = event.data.get("result")
if _result is not None:
_stream_trace_outcome = getattr(_result, "status", "success")
yield event
_stream_completed = True
except asyncio.CancelledError as ce:
# Cancellation must propagate, but hooks still fire (U2 edge case).
_stream_error = ce
_stream_trace_outcome = "cancelled"
raise
except Exception as e:
_stream_error = e
_stream_trace_outcome = "error"
raise
finally:
# async generator 的 finally 在 generator 关闭时执行GC/aclose/正常结束)
@ -797,6 +828,12 @@ class ConfigDrivenAgent(BaseAgent, EvolutionMixin):
# KTD-4: lifecycle parity — fire evolution hooks fire-and-forget.
try:
now = datetime.now(timezone.utc)
# KTD-8: propagate trace_outcome into output_data so
# lifecycle._is_failure_path() can detect non-success outcomes.
if _stream_output:
_stream_output["trace_outcome"] = _stream_trace_outcome
else:
_stream_output = {"trace_outcome": _stream_trace_outcome}
if _stream_error is not None:
if isinstance(_stream_error, (asyncio.CancelledError, TaskCancelledError)):
status = TaskStatus.CANCELLED
@ -810,17 +847,29 @@ class ConfigDrivenAgent(BaseAgent, EvolutionMixin):
status=status,
output_data=None,
error_message=err_msg,
started_at=now,
started_at=_stream_started_at,
completed_at=now,
)
elif _stream_completed:
# KTD-8: map non-success trace_outcomes to FAILED.
if _stream_trace_outcome in (
"gave_up_after_reflections",
"verify_failed",
"verify_quota_exhausted",
"failed",
):
status = TaskStatus.FAILED
err_msg = _stream_trace_outcome
else:
status = TaskStatus.COMPLETED
err_msg = None
result = TaskResult(
task_id=task.task_id,
agent_name=self.name,
status=TaskStatus.COMPLETED,
status=status,
output_data=_stream_output,
error_message=None,
started_at=now,
error_message=err_msg,
started_at=_stream_started_at,
completed_at=now,
)
else:
@ -831,7 +880,7 @@ class ConfigDrivenAgent(BaseAgent, EvolutionMixin):
status=TaskStatus.CANCELLED,
output_data=None,
error_message="stream closed before completion",
started_at=now,
started_at=_stream_started_at,
completed_at=now,
)
self._trigger_evolution_hooks(task, result)

View File

@ -121,6 +121,10 @@ class PlanExecEngine:
# user's decision. None = skip the gate (backward compat — the engine
# proceeds directly to execution after Spec persistence).
spec_review_handler: SpecReviewHandler | None = None,
# KTD-2/R4: max reflections for ReActEngine reinjection→reflection
# escalation. Threaded through to each step's ReActEngine so the
# verify-failed path can escalate from reinjection to full reflection.
max_reflections: int = 2,
):
"""
Args:
@ -159,6 +163,8 @@ class PlanExecEngine:
self._pitfall_detector = pitfall_detector
# U8/R8: spec review gate handler. None = skip gate (backward compat).
self._spec_review_handler = spec_review_handler
# KTD-2/R4: max reflections threaded to each step's ReActEngine.
self._max_reflections = max_reflections
# U4/R11: copy the default to avoid mutating the module-level dict.
self._phase_budgets = (
dict(phase_budgets) if phase_budgets is not None else dict(_DEFAULT_PHASE_BUDGETS)
@ -605,9 +611,10 @@ class PlanExecEngine:
"output": output,
"total_steps": len(state.trajectory),
"total_tokens": state.total_tokens,
"plan_id": plan.plan_id,
"plan_id": current_plan.plan_id,
"plan_status": plan_result.status.value,
"replanned": state.replanned,
"trace_outcome": trace_outcome,
},
)
@ -637,7 +644,7 @@ class PlanExecEngine:
async def _inject_pitfall_warnings(
self,
goal: str,
plan_steps: list[Any],
plan_steps: list[PlanStep],
task_type: str,
actor: str,
system_prompt: str | None,
@ -1432,6 +1439,7 @@ class PlanExecEngine:
verification_enabled=self._verification_enabled,
verification_commands=self._verification_commands,
phase_budgets=self._phase_budgets,
max_reflections=self._max_reflections,
)
return PlanExecutor(
agent_pool=step_executor,
@ -1590,11 +1598,13 @@ class ReActStepExecutor:
model: str = "default",
system_prompt: str | None = None,
tools: list["Tool"] | None = None,
max_steps: int = 5,
max_steps: int = 10,
confirmation_handler: Any | None = None,
verification_enabled: bool = False,
verification_commands: list[str] | None = None,
phase_budgets: dict[str, int] | None = None,
# KTD-2/R4: threaded through to each step's ReActEngine.
max_reflections: int = 2,
):
self._llm_gateway = llm_gateway
self._messages = messages or []
@ -1607,6 +1617,8 @@ class ReActStepExecutor:
self._verification_commands = verification_commands
# U4/R11: thread through to each step's ReActEngine.
self._phase_budgets = phase_budgets
# KTD-2/R4: thread through to each step's ReActEngine.
self._max_reflections = max_reflections
self._agents: dict[str, _ReActStepAgent] = {}
async def create_agent_from_skill(self, skill_name: str):
@ -1623,6 +1635,7 @@ class ReActStepExecutor:
verification_enabled=self._verification_enabled,
verification_commands=self._verification_commands,
phase_budgets=self._phase_budgets,
max_reflections=self._max_reflections,
)
self._agents[skill_name] = agent
return agent
@ -1642,6 +1655,7 @@ class ReActStepExecutor:
verification_enabled=self._verification_enabled,
verification_commands=self._verification_commands,
phase_budgets=self._phase_budgets,
max_reflections=self._max_reflections,
)
self._agents[key] = agent
return agent
@ -1662,11 +1676,12 @@ class _ReActStepAgent:
model: str = "default",
system_prompt: str | None = None,
tools: list["Tool"] | None = None,
max_steps: int = 5,
max_steps: int = 10,
confirmation_handler: Any | None = None,
verification_enabled: bool = False,
verification_commands: list[str] | None = None,
phase_budgets: dict[str, int] | None = None,
max_reflections: int = 2,
):
self.name = name
self._llm_gateway = llm_gateway
@ -1680,6 +1695,7 @@ class _ReActStepAgent:
self._verification_commands = verification_commands
# U4/R11: per-phase step quotas, passed to ReActEngine.
self._phase_budgets = phase_budgets
self._max_reflections = max_reflections
async def execute(self, task_msg: TaskMessage) -> "TaskResult":
"""执行步骤:通过 ReActEngine 循环调用"""
@ -1710,6 +1726,7 @@ class _ReActStepAgent:
verification_enabled=self._verification_enabled,
verification_commands=self._verification_commands,
phase_budgets=self._phase_budgets,
max_reflections=self._max_reflections,
)
# 构建 messages
@ -1728,7 +1745,13 @@ class _ReActStepAgent:
now = datetime.now(timezone.utc)
status = TaskStatus.COMPLETED.value
if react_result.status in ("timeout", "cancelled"):
if react_result.status in (
"timeout",
"cancelled",
"verify_failed",
"gave_up_after_reflections",
"failed",
):
status = TaskStatus.FAILED.value
return TaskResult(

View File

@ -33,10 +33,12 @@ from agentkit.telemetry.metrics import (
agent_duration_histogram,
)
from agentkit.core.phase import PhaseState
if TYPE_CHECKING:
from agentkit.core.compressor import CompressionStrategy
from agentkit.core.middleware import MiddlewareChain
from agentkit.core.phase import PhasePolicy, PhaseState
from agentkit.core.phase import PhasePolicy
from agentkit.core.sandbox import WorkspaceSandbox
from agentkit.core.trace import TraceRecorder
from agentkit.evolution.pitfall_detector import PitfallWarning
@ -420,8 +422,6 @@ class ReActEngine:
"""
if self._phase_policy is None or self._current_phase is None:
return
from agentkit.core.phase import PhaseState
while self._current_phase not in (PhaseState.VERIFICATION, PhaseState.DELIVERY):
nxt = self.advance_phase()
if nxt is None:
@ -446,8 +446,6 @@ class ReActEngine:
"""
if self._phase_policy is None or self._current_phase is None:
return None
from agentkit.core.phase import PhaseState
nxt = PhaseState.next_of(self._current_phase)
if nxt is None:
# Already at DELIVERY — return None to signal no transition.
@ -890,8 +888,8 @@ class ReActEngine:
trace_outcome = "success"
# U4/G1: verify 失败回灌计数器。受 max_steps 上限约束(不无限循环)。
# U4/KTD-7: initialize from restored budget state (checkpoint resume).
reinjections = self._reflect_count
# U4/KTD-7: _reflect_count is initialized from restored budget state
# (checkpoint resume) and used directly — no redundant local copy.
_loop_start = time.monotonic()
while step < self._max_steps:
@ -913,9 +911,7 @@ class ReActEngine:
and self._phase_policy is not None
and self._current_phase is not None
):
from agentkit.core.phase import PhaseState as _PS
if self._current_phase in (_PS.PLANNING, _PS.BUILDING):
if self._current_phase in (PhaseState.PLANNING, PhaseState.BUILDING):
self._think_count += 1
think_quota = self._phase_budgets.get("think")
if think_quota is not None and self._think_count >= think_quota:
@ -1547,7 +1543,7 @@ class ReActEngine:
vresult = await vloop.verify()
if not vresult.passed:
if (
reinjections < self._max_reinjections
self._reflect_count < self._max_reinjections
and step < self._max_steps
):
errors_text = "\n".join(vresult.errors)
@ -1557,7 +1553,6 @@ class ReActEngine:
"content": (f"验证失败,错误如下:\n{errors_text}"),
}
)
reinjections += 1
# U4/R10: track reflect count for
# checkpoint reconstruction (KTD-7).
self._reflect_count += 1
@ -1574,7 +1569,7 @@ class ReActEngine:
data={
"message": (
f"验证失败,已注入错误信息让 LLM 自纠正 "
f"(reinjection {reinjections}/{self._max_reinjections})"
f"(reinjection {self._reflect_count}/{self._max_reinjections})"
),
"verify_errors": vresult.errors,
},
@ -1681,7 +1676,7 @@ class ReActEngine:
logger.info(
"Verification failed after %d reinjections, "
"%d reflections, interrupting with verify log",
reinjections,
self._reflect_count,
self._reflection_count,
)
break
@ -2136,7 +2131,7 @@ class ReActEngine:
in_verification = (
self._sandbox is not None
and self._current_phase is not None
and self._current_phase.value == "verification"
and self._current_phase == PhaseState.VERIFICATION
)
try:

View File

@ -28,10 +28,24 @@ import contextlib
import errno
import logging
import socket
import threading
from pathlib import Path
logger = logging.getLogger(__name__)
# Reentrancy counter for ``network_block``. Concurrent VERIFICATION phases
# (parallel PLAN_EXEC steps) each enter the context manager; only the first
# entry (0 -> 1) patches ``socket.socket.connect``, and only the last exit
# (1 -> 0) restores it. Naive save/restore would unpatch on the first exit
# while other phases are still expecting the block to be in effect, breaking
# sandboxing for any phase that started later.
# ponytail: process-wide counter — not subprocess-safe (inherited fork state
# is irrelevant because the monkey-patch lives in the parent's socket module).
_network_block_count: int = 0
_network_block_lock = threading.Lock()
_original_socket_connect = socket.socket.connect
_original_socket_connect_ex = socket.socket.connect_ex
class SandboxNetworkBlockedError(RuntimeError):
"""Raised when a tool attempts an outbound network call under sandbox."""
@ -115,17 +129,23 @@ class WorkspaceSandbox:
"""Block outbound network connections within the async context.
Patches ``socket.socket.connect`` and ``connect_ex`` to raise /
return ``ECONNREFUSED`` respectively. Restores the originals on exit,
even if the wrapped code raises.
return ``ECONNREFUSED`` respectively. Restores the originals on the
last concurrent exit, even if the wrapped code raises.
Already-connected sockets (e.g. an LLM gateway keep-alive pool) are
unaffected only *new* ``connect()`` calls are blocked. This is the
correct granularity: the LLM gateway talks over its existing
connection, while a tool trying to ``requests.get(...)`` makes a new
connect and is rejected.
Reentrancy: a module-level counter guards the patch. Concurrent
VERIFICATION phases (parallel PLAN_EXEC steps) each enter/exit; the
patch is engaged on count 0->1 and released on count 1->0. Without
this, the first exit would restore the original connect while later
phases are still expecting the block, terminating new LLM gateway /
Redis / PostgreSQL connections in those phases.
"""
original_connect = socket.socket.connect
original_connect_ex = socket.socket.connect_ex
global _network_block_count # noqa: PLW0603
def _blocked_connect(self_sock, *args, **kwargs): # noqa: ANN001
raise SandboxNetworkBlockedError(
@ -136,15 +156,26 @@ class WorkspaceSandbox:
# connect_ex returns an errno instead of raising (POSIX contract).
return errno.ECONNREFUSED
with _network_block_lock:
_network_block_count += 1
if _network_block_count == 1:
socket.socket.connect = _blocked_connect # type: ignore[method-assign]
socket.socket.connect_ex = _blocked_connect_ex # type: ignore[method-assign]
logger.debug("sandbox: network block engaged")
logger.debug("sandbox: network block engaged (count=1)")
try:
yield
finally:
socket.socket.connect = original_connect # type: ignore[method-assign]
socket.socket.connect_ex = original_connect_ex # type: ignore[method-assign]
logger.debug("sandbox: network block released")
with _network_block_lock:
_network_block_count -= 1
if _network_block_count == 0:
socket.socket.connect = _original_socket_connect # type: ignore[method-assign]
socket.socket.connect_ex = _original_socket_connect_ex # type: ignore[method-assign]
logger.debug("sandbox: network block released (count=0)")
else:
logger.debug(
"sandbox: network block still held (count=%d)",
_network_block_count,
)
def detect_verification_commands(workspace_root: str | Path | None) -> list[str]:

View File

@ -805,7 +805,14 @@ async def lifespan(app: FastAPI):
try:
from agentkit.core.config_driven import drain_pending_evolution_tasks
await drain_pending_evolution_tasks()
await asyncio.wait_for(drain_pending_evolution_tasks(), timeout=10.0)
except asyncio.TimeoutError:
from agentkit.core.config_driven import _pending_evolution_tasks
logger.warning(
"drain_pending_evolution_tasks 超时 10s, %d 个任务被放弃",
len(_pending_evolution_tasks),
)
except Exception:
logger.debug("drain_pending_evolution_tasks 异常已忽略", exc_info=True)

View File

@ -1494,6 +1494,23 @@ async def _handle_chat_message(
},
}
)
# U8/R8: persist the spec_review_request so it survives a page reload.
# The frontend reconstructs the pending review card from the restored
# message metadata (spec_review_id + goal + steps).
try:
await sm.append_message(
session_id=session_id,
role=MessageRole.ASSISTANT,
content=f"[Spec Review] {goal}",
metadata={
"message_type": "spec_review_request",
"spec_review_id": spec_review_id,
"spec_review_goal": goal,
"spec_review_steps": steps,
},
)
except Exception:
logger.debug("Failed to persist spec_review_request", exc_info=True)
loop = asyncio.get_running_loop()
future: asyncio.Future[tuple[str, str]] = loop.create_future()
@ -1506,19 +1523,58 @@ async def _handle_chat_message(
# "failed") so the user can resume on return.
decision, feedback = await asyncio.wait_for(future, timeout=1800.0)
logger.info(f"Spec review {spec_review_id} resolved: decision={decision!r}")
# Persist the decision so the frontend can show the outcome after
# a reload (e.g. timeout→parked transition the user never saw).
try:
await sm.append_message(
session_id=session_id,
role=MessageRole.ASSISTANT,
content=f"[Spec Review Decision] {decision}: {feedback}",
metadata={
"message_type": "spec_review_reply",
"spec_review_id": spec_review_id,
"spec_review_decision": decision,
"spec_review_feedback": feedback,
},
)
except Exception:
logger.debug("Failed to persist spec_review_reply", exc_info=True)
return decision, feedback
except asyncio.TimeoutError:
logger.warning(f"Spec review {spec_review_id} timed out (30 min)")
# Persist the timeout→parked transition so the frontend can show
# the parked state after a reload.
try:
await sm.append_message(
session_id=session_id,
role=MessageRole.ASSISTANT,
content=f"[Spec Review Timed Out] {spec_review_id}",
metadata={
"message_type": "spec_review_reply",
"spec_review_id": spec_review_id,
"spec_review_decision": "parked",
"spec_review_feedback": "timed out (30 min)",
},
)
except Exception:
logger.debug("Failed to persist spec_review timeout", exc_info=True)
raise
finally:
_pending_spec_reviews.pop(spec_review_id, None)
# Wire the handler onto a PlanExecEngine only (the WS PLAN_EXEC path uses
# a ReActEngine + phase_policy, where this is a no-op). Local import to
# avoid a top-level dependency that the WS path doesn't need.
from agentkit.core.plan_exec_engine import PlanExecEngine as _PlanExecEngine
if isinstance(react_engine, _PlanExecEngine):
# U8/R8: spec review gate wiring. The WS PLAN_EXEC path uses
# ``_build_phase_engine`` which returns a ``ReActEngine`` with
# ``phase_policy`` (NOT a ``PlanExecEngine``), so the gate cannot be
# wired here — ``ReActEngine`` does not read ``_spec_review_handler``.
# The gate only fires when ``ConfigDrivenAgent.execute_stream`` →
# ``_handle_plan_exec_stream`` → ``PlanExecEngine.execute_stream`` runs,
# which is the portal/task path (not the WS chat path).
# ponytail: known ceiling — WS chat PLAN_EXEC (phase_policy mechanism)
# does not support spec review. Upgrade path: route WS PLAN_EXEC through
# ``ConfigDrivenAgent.execute_stream`` to unify with the portal path and
# inherit the gate. The ``_spec_review_handler`` closure + event handlers
# below are kept so the upgrade is a routing change, not a rewrite.
if hasattr(react_engine, "_spec_review_handler"):
react_engine._spec_review_handler = _spec_review_handler
logger.info(

View File

@ -23,7 +23,7 @@ from pydantic import BaseModel
from agentkit.core.config_driven import ConfigDrivenAgent
from agentkit.core.event_queue import EventQueue
from agentkit.core.protocol import Event, TaskEventType, TaskStatus, TurnEventType
from agentkit.core.protocol import Event, TaskEventType, TaskMessage, TaskStatus, TurnEventType
from agentkit.core.react import ReActEngine
from agentkit.chat.skill_routing import ExecutionMode, SkillRoutingResult
from agentkit.chat.request_preprocessor import RequestPreprocessor
@ -73,6 +73,42 @@ def _ensure_non_empty(text: str | None) -> str:
return EMPTY_LLM_RESPONSE
def _build_portal_task(
*,
agent_name: str,
messages: list[dict[str, str]],
system_prompt: str | None,
timeout_seconds: float | None,
conversation_id: str | None = None,
task_id: str | None = None,
) -> TaskMessage:
"""Construct a TaskMessage for routing through ConfigDrivenAgent.execute_stream.
The portal builds messages externally (history + user message). The
``messages`` key in input_data tells _build_llm_messages to use them
directly instead of rendering the prompt template. This lets the portal
inherit evolution hooks + trace_outcome propagation from execute_stream's
finally block (KTD-4/KTD-8).
"""
from datetime import datetime, timezone
return TaskMessage(
task_id=task_id or str(uuid.uuid4()),
agent_name=agent_name,
task_type="chat",
priority=0,
input_data={
"messages": messages,
"system_prompt": system_prompt,
"content": messages[-1].get("content", "") if messages else "",
},
callback_url=None,
created_at=datetime.now(timezone.utc),
timeout_seconds=int(timeout_seconds) if timeout_seconds else 300,
conversation_id=conversation_id,
)
async def _emit_event_safe(
event_queue: EventQueue | None,
event_type: str,
@ -556,38 +592,39 @@ async def chat(request: ChatRequest, req: Request, _auth: None = Depends(_verify
)
react_config = agent.get_react_config()
react_engine = getattr(agent, "_react_engine", None)
if react_engine is None:
react_engine = ReActEngine(
# KTD-4/KTD-8: route through ConfigDrivenAgent.execute_stream so the
# finally block fires evolution hooks + propagates trace_outcome. The
# portal builds messages externally; _build_portal_task packages them
# into a TaskMessage whose input_data["messages"] is used directly by
# _build_llm_messages (bypassing the prompt template).
_react_engine = getattr(agent, "_react_engine", None)
if _react_engine is None:
_react_engine = ReActEngine(
llm_gateway=llm_gateway,
max_steps=react_config["max_steps"],
)
agent._react_engine = _react_engine
else:
react_engine.reset()
_react_engine.reset()
messages = [{"role": "user", "content": request.message}]
# Inject conversation history
history_msgs = await _build_history_messages(conv.id)
for hm in reversed(history_msgs):
messages.insert(0, hm)
tools = agent.get_tools()
model = agent.get_model()
system_prompt = getattr(agent, "_system_prompt", None) or agent.get_system_prompt()
timeout_seconds = react_config["timeout_seconds"]
collected_output: list[str] = []
try:
# U2 verify: calls react_engine.execute_stream directly, bypassing
# ConfigDrivenAgent.execute_stream — evolution hooks NOT propagated
# here. Routing through agent.execute_stream is tracked separately.
async for event in react_engine.execute_stream(
messages=messages,
tools=tools,
model=model,
portal_task = _build_portal_task(
agent_name=agent.name,
messages=messages,
system_prompt=system_prompt,
timeout_seconds=timeout_seconds,
):
conversation_id=conv.id,
)
collected_output: list[str] = []
try:
async for event in agent.execute_stream(portal_task):
if event.event_type == "final_answer":
collected_output.append(event.data.get("output", ""))
except asyncio.CancelledError:
@ -684,34 +721,32 @@ async def chat_stream(request: ChatRequest, req: Request, _auth: None = Depends(
)
react_config = agent.get_react_config()
react_engine = getattr(agent, "_react_engine", None)
if react_engine is None:
react_engine = ReActEngine(
# KTD-4/KTD-8: route through ConfigDrivenAgent.execute_stream
# (evolution hooks + trace_outcome propagation in finally block).
_react_engine = getattr(agent, "_react_engine", None)
if _react_engine is None:
_react_engine = ReActEngine(
llm_gateway=llm_gateway,
max_steps=react_config["max_steps"],
)
agent._react_engine = _react_engine
else:
react_engine.reset()
_react_engine.reset()
messages = [{"role": "user", "content": request.message}]
tools = agent.get_tools()
model = agent.get_model()
system_prompt = getattr(agent, "_system_prompt", None) or agent.get_system_prompt()
timeout_seconds = react_config["timeout_seconds"]
collected_output: list[str] = []
try:
# U2 verify: calls react_engine.execute_stream directly, bypassing
# ConfigDrivenAgent.execute_stream — evolution hooks NOT propagated
# here. Routing through agent.execute_stream is tracked separately.
async for event in react_engine.execute_stream(
messages=messages,
tools=tools,
model=model,
portal_task = _build_portal_task(
agent_name=agent.name,
messages=messages,
system_prompt=system_prompt,
timeout_seconds=timeout_seconds,
):
conversation_id=conv.id,
)
collected_output: list[str] = []
try:
async for event in agent.execute_stream(portal_task):
if event.event_type == "final_answer":
collected_output.append(event.data.get("output", ""))
yield {
@ -967,11 +1002,8 @@ def _derive_title_from_messages(messages: list) -> str:
async def _execute_react_background(
react_engine: ReActEngine,
agent: ConfigDrivenAgent,
messages: list[dict],
tools: list,
model: str,
agent_name: str,
system_prompt: str | None,
timeout_seconds: float | None,
conv_id: str,
@ -987,6 +1019,10 @@ async def _execute_react_background(
Results are always persisted to the conversation store, regardless of
whether a WebSocket subscriber is active.
Task status is tracked in TaskStore when provided.
KTD-4/KTD-8: routes through ``agent.execute_stream`` (not
``react_engine.execute_stream`` directly) so the finally block fires
evolution hooks and propagates trace_outcome.
"""
collected_output: list[str] = []
try:
@ -1005,17 +1041,15 @@ async def _execute_react_background(
):
logger.warning("Failed to update TaskStore RUNNING", exc_info=True)
# U2 verify: calls react_engine.execute_stream directly, bypassing
# ConfigDrivenAgent.execute_stream — evolution hooks NOT propagated
# here. Routing through agent.execute_stream is tracked separately.
async for event in react_engine.execute_stream(
portal_task = _build_portal_task(
agent_name=agent.name,
messages=messages,
tools=tools,
model=model,
agent_name=agent_name,
system_prompt=system_prompt,
timeout_seconds=timeout_seconds,
):
conversation_id=conv_id,
task_id=task_id,
)
async for event in agent.execute_stream(portal_task):
if event.event_type == "final_answer":
collected_output.append(event.data.get("output", ""))
@ -1219,6 +1253,14 @@ async def portal_websocket(websocket: WebSocket):
task_id: str | None = None
# Track the active background task so cancel can propagate to it.
active_bg_task: asyncio.Task | None = None
# U8/R8: pending spec review futures. The portal WS path doesn't wire
# _spec_review_handler on the agent (the background task architecture
# makes EventQueue-based request/reply non-trivial), so this dict is
# typically empty. It exists so stale spec_review_reply messages from
# the frontend are handled gracefully instead of silently ignored.
# ponytail: upgrade path — wire _spec_review_handler via EventQueue +
# future, mirroring chat.py's _spec_review_handler closure.
pending_spec_reviews: dict[str, asyncio.Future[tuple[str, str]]] = {}
try:
while True:
@ -1256,6 +1298,32 @@ async def portal_websocket(websocket: WebSocket):
await websocket.send_json({"type": "pong"})
continue
if msg_type == "spec_review_reply":
# U8/R8: mirror chat.py:1126 — resolve a pending spec review
# future. Typically a no-op in the portal WS path (the
# _spec_review_handler isn't wired), but handles stale replies
# gracefully.
spec_review_id = msg.get("spec_review_id")
decision = msg.get("decision", "rejected")
feedback = msg.get("feedback", "")
logger.info(
f"Received spec_review_reply: id={spec_review_id!r}, decision={decision!r}"
)
if spec_review_id and spec_review_id in pending_spec_reviews:
fut = pending_spec_reviews[spec_review_id]
if not fut.done():
fut.set_result((decision, feedback))
else:
logger.warning(
f"spec_review_reply {spec_review_id!r} already resolved"
)
else:
logger.warning(
f"spec_review_reply {spec_review_id!r} not found in "
f"pending_spec_reviews — ignoring"
)
continue
if msg_type == "resume":
# Frontend reconnected and wants to resume a running task
resume_task_id = msg.get("task_id", "")
@ -1800,15 +1868,17 @@ async def portal_websocket(websocket: WebSocket):
# Execute via ReAct stream
react_config = agent.get_react_config()
# Reuse agent's ReActEngine if available (aligned with chat.py pattern)
react_engine = getattr(agent, "_react_engine", None)
if react_engine is None:
react_engine = ReActEngine(
# KTD-4/KTD-8: route through ConfigDrivenAgent.execute_stream
# (evolution hooks + trace_outcome propagation in finally block).
_react_engine = getattr(agent, "_react_engine", None)
if _react_engine is None:
_react_engine = ReActEngine(
llm_gateway=llm_gateway,
max_steps=react_config["max_steps"],
)
agent._react_engine = _react_engine
else:
react_engine.reset()
_react_engine.reset()
messages = [{"role": "user", "content": message_text}]
# Inject conversation history for context continuity
@ -1829,11 +1899,8 @@ async def portal_websocket(websocket: WebSocket):
# background task continues running and persists the result.
bg_task = asyncio.create_task(
_execute_react_background(
react_engine=react_engine,
agent=agent,
messages=messages,
tools=tools,
model=model,
agent_name=agent.name,
system_prompt=system_prompt,
timeout_seconds=timeout_seconds,
conv_id=conv.id,

View File

@ -38,10 +38,12 @@ class FakeConversationStore:
class FakeReactEngine:
"""Fake ReAct engine that yields events from a predefined list."""
name = "test-agent"
def __init__(self, events: list[Event]) -> None:
self._events = events
async def execute_stream(self, **kwargs):
async def execute_stream(self, task):
for event in self._events:
yield event
@ -49,11 +51,13 @@ class FakeReactEngine:
class FailingReactEngine:
"""Fake ReAct engine that raises an exception after yielding some events."""
name = "test-agent"
def __init__(self, events: list[Event], error: Exception) -> None:
self._events = events
self._error = error
async def execute_stream(self, **kwargs):
async def execute_stream(self, task):
for event in self._events:
yield event
raise self._error
@ -76,11 +80,13 @@ def _make_event(
class SlowFakeReactEngine:
"""Fake ReAct engine with a delay to allow status checks during execution."""
name = "test-agent"
def __init__(self, events: list[Event], delay: float = 0.1) -> None:
self._events = events
self._delay = delay
async def execute_stream(self, **kwargs):
async def execute_stream(self, task):
for event in self._events:
await asyncio.sleep(self._delay)
yield event
@ -93,11 +99,13 @@ class CancellableReactEngine:
Event so the test can cancel the task and verify CancelledError cleanup.
"""
name = "test-agent"
def __init__(self, first_event: Event) -> None:
self._first_event = first_event
self.started = asyncio.Event()
async def execute_stream(self, **kwargs):
async def execute_stream(self, task):
yield self._first_event
self.started.set()
# Block forever until cancelled
@ -130,11 +138,8 @@ class TestExecuteReactBackground:
eq = EventQueue()
await _execute_react_background(
react_engine=engine,
agent=engine,
messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None,
timeout_seconds=None,
conv_id="test-conv",
@ -162,11 +167,8 @@ class TestExecuteReactBackground:
eq = EventQueue()
await _execute_react_background(
react_engine=engine,
agent=engine,
messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None,
timeout_seconds=None,
conv_id="test-conv",
@ -190,11 +192,8 @@ class TestExecuteReactBackground:
eq = EventQueue()
await _execute_react_background(
react_engine=engine,
agent=engine,
messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None,
timeout_seconds=None,
conv_id="test-conv",
@ -228,11 +227,8 @@ class TestExecuteReactBackground:
await asyncio.sleep(0.05)
await _execute_react_background(
react_engine=engine,
agent=engine,
messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None,
timeout_seconds=None,
conv_id="test-conv",
@ -270,11 +266,8 @@ class TestExecuteReactBackground:
await asyncio.sleep(0.05)
await _execute_react_background(
react_engine=engine,
agent=engine,
messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None,
timeout_seconds=None,
conv_id="test-conv",
@ -318,11 +311,8 @@ class TestTaskStoreIntegration:
# Start background task
bg_task = asyncio.create_task(
_execute_react_background(
react_engine=engine,
agent=engine,
messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None,
timeout_seconds=None,
conv_id="test-conv",
@ -365,11 +355,8 @@ class TestTaskStoreIntegration:
)
await _execute_react_background(
react_engine=engine,
agent=engine,
messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None,
timeout_seconds=None,
conv_id="test-conv",
@ -394,11 +381,8 @@ class TestTaskStoreIntegration:
# Should not raise
await _execute_react_background(
react_engine=engine,
agent=engine,
messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None,
timeout_seconds=None,
conv_id="test-conv",
@ -552,11 +536,8 @@ class TestCancelledErrorPath:
bg_task = asyncio.create_task(
_execute_react_background(
react_engine=engine,
agent=engine,
messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None,
timeout_seconds=None,
conv_id="test-conv",
@ -590,11 +571,8 @@ class TestCancelledErrorPath:
bg_task = asyncio.create_task(
_execute_react_background(
react_engine=engine,
agent=engine,
messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None,
timeout_seconds=None,
conv_id="test-conv",
@ -636,11 +614,8 @@ class TestCancelledErrorPath:
bg_task = asyncio.create_task(
_execute_react_background(
react_engine=engine,
agent=engine,
messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None,
timeout_seconds=None,
conv_id="test-conv",
@ -769,11 +744,8 @@ class TestCancelPropagation:
# Simulate the background task as portal.py would create it
active_bg_task: asyncio.Task | None = asyncio.create_task(
_execute_react_background(
react_engine=engine,
agent=engine,
messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None,
timeout_seconds=None,
conv_id="cancel-conv",
@ -814,11 +786,8 @@ class TestCancelPropagation:
bg_task = asyncio.create_task(
_execute_react_background(
react_engine=engine,
agent=engine,
messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None,
timeout_seconds=None,
conv_id="test-conv",
@ -865,11 +834,8 @@ class TestWebSocketDisconnectNoCancel:
# Start the background task (as portal.py would)
bg_task = asyncio.create_task(
_execute_react_background(
react_engine=engine,
agent=engine,
messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None,
timeout_seconds=None,
conv_id="test-conv",
@ -912,11 +878,8 @@ class TestWebSocketDisconnectNoCancel:
bg_task = asyncio.create_task(
_execute_react_background(
react_engine=engine,
agent=engine,
messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None,
timeout_seconds=None,
conv_id="resume-conv",

View File

@ -112,7 +112,8 @@ class TestExecuteStreamHooks:
assert events[0].event_type == "final_answer"
assert len(fired) == 1
assert fired[0].status == TaskStatus.COMPLETED
assert fired[0].output_data == {"content": "hello world"}
# KTD-8: output_data includes trace_outcome for lifecycle._is_failure_path()
assert fired[0].output_data == {"content": "hello world", "trace_outcome": "success"}
async def test_failure_fires_on_task_failed(self):
"""Stream exception fires evolve_after_task with FAILED status."""