fix(review): resolve 11 P1 blockers from ce-code-review
Test / backend-test (pull_request) Waiting to run Details
Test / frontend-unit (pull_request) Waiting to run Details
Test / api-e2e (pull_request) Waiting to run Details
Test / frontend-e2e (pull_request) Waiting to run Details

P1#1  config_driven: propagate trace_outcome into output_data so
      lifecycle._is_failure_path() detects non-success outcomes
P1#2  portal: route through ConfigDrivenAgent.execute_stream (not
      react_engine.execute_stream directly) so evolution hooks fire
      and trace_outcome propagates; add pre-built messages support in
      _build_llm_messages
P1#3  sandbox: make network_block reentrant via module-level reference
      counter + threading.Lock - concurrent VERIFICATION phases no
      longer permanently block all new connections
P1#4  chat: replace dead isinstance(_PlanExecEngine) check with
      hasattr(_spec_review_handler) to wire the spec review gate
P1#5  plan_exec_engine: complete max_reflections threading chain
      (PlanExecEngine + ReActStepExecutor constructors)
P1#6  plan_exec_engine: enforce phase budgets (max_steps from
      phase_budgets, not hardcoded 5)
P1#7  plan_exec_engine: use current plan (not stale plan var) in
      aggregation after replan
P1#8  plan_exec_engine: map failure to failed status (not success)
P1#9  app: add drain timeout for pending evolution tasks on shutdown
P1#10 portal: handle spec_review_reply in WS handler
P1#11 chat: persist spec_review_request/reply/timeout to conversation
      store so reload can reconstruct gate state

Tests: 116 related tests pass; 26 pre-existing failures unchanged
(stash-verified). ruff lint clean.
This commit is contained in:
chiguyong 2026-07-04 01:10:01 +08:00
parent 7c900ce280
commit e5e76697a9
9 changed files with 355 additions and 163 deletions

View File

@ -72,6 +72,11 @@ async def drain_pending_evolution_tasks() -> None:
await asyncio.gather(*_pending_evolution_tasks, return_exceptions=True) await asyncio.gather(*_pending_evolution_tasks, return_exceptions=True)
def get_evolution_dropped_count() -> int:
"""Return the number of evolution tasks dropped due to backpressure."""
return _evolution_dropped_count
class AgentConfig: class AgentConfig:
"""Agent 配置模型,从 YAML 或 Dict 构建""" """Agent 配置模型,从 YAML 或 Dict 构建"""
@ -739,7 +744,20 @@ class ConfigDrivenAgent(BaseAgent, EvolutionMixin):
Shared by all _handle_*_stream methods to avoid duplicating the Shared by all _handle_*_stream methods to avoid duplicating the
message-rendering logic that mirrors the sync _handle_* methods. message-rendering logic that mirrors the sync _handle_* methods.
Portal path: if ``task.input_data["messages"]`` is present (a list of
``{role, content}`` dicts), use those pre-built messages directly
instead of rendering the prompt template. This lets the portal route
through ``execute_stream`` (inheriting evolution hooks + trace_outcome
propagation) while keeping its external message-building logic.
""" """
prebuilt = task.input_data.get("messages")
if prebuilt is not None:
system_prompt = task.input_data.get("system_prompt")
user_messages = [m for m in prebuilt if m.get("role") != "system"]
if not user_messages:
user_messages = [{"role": "user", "content": str(task.input_data)}]
return system_prompt, user_messages
variables = task.input_data.copy() variables = task.input_data.copy()
variables["task_type"] = task.task_type variables["task_type"] = task.task_type
if self._prompt_template: if self._prompt_template:
@ -774,22 +792,35 @@ class ConfigDrivenAgent(BaseAgent, EvolutionMixin):
token = CancellationToken() token = CancellationToken()
self._active_tokens[task.task_id] = token self._active_tokens[task.task_id] = token
_stream_output: dict = {} _stream_output: dict = {}
_stream_trace_outcome: str = "success"
_stream_error: BaseException | None = None _stream_error: BaseException | None = None
_stream_completed = False _stream_completed = False
_stream_started_at = datetime.now(timezone.utc)
try: try:
await self._register_mcp_tools() await self._register_mcp_tools()
async for event in self.handle_task_stream(task): async for event in self.handle_task_stream(task):
if event.event_type == "final_answer": if event.event_type == "final_answer":
_raw = event.data.get("output", "") _raw = event.data.get("output", "")
_stream_output = {"content": _raw} if isinstance(_raw, str) else _raw _stream_output = {"content": _raw} if isinstance(_raw, str) else _raw
# PLAN_EXEC path may embed trace_outcome in final_answer.
_to = event.data.get("trace_outcome")
if _to:
_stream_trace_outcome = _to
elif event.event_type == "final_result":
# REACT path: final_result carries ReActResult.status.
_result = event.data.get("result")
if _result is not None:
_stream_trace_outcome = getattr(_result, "status", "success")
yield event yield event
_stream_completed = True _stream_completed = True
except asyncio.CancelledError as ce: except asyncio.CancelledError as ce:
# Cancellation must propagate, but hooks still fire (U2 edge case). # Cancellation must propagate, but hooks still fire (U2 edge case).
_stream_error = ce _stream_error = ce
_stream_trace_outcome = "cancelled"
raise raise
except Exception as e: except Exception as e:
_stream_error = e _stream_error = e
_stream_trace_outcome = "error"
raise raise
finally: finally:
# async generator 的 finally 在 generator 关闭时执行GC/aclose/正常结束) # async generator 的 finally 在 generator 关闭时执行GC/aclose/正常结束)
@ -797,6 +828,12 @@ class ConfigDrivenAgent(BaseAgent, EvolutionMixin):
# KTD-4: lifecycle parity — fire evolution hooks fire-and-forget. # KTD-4: lifecycle parity — fire evolution hooks fire-and-forget.
try: try:
now = datetime.now(timezone.utc) now = datetime.now(timezone.utc)
# KTD-8: propagate trace_outcome into output_data so
# lifecycle._is_failure_path() can detect non-success outcomes.
if _stream_output:
_stream_output["trace_outcome"] = _stream_trace_outcome
else:
_stream_output = {"trace_outcome": _stream_trace_outcome}
if _stream_error is not None: if _stream_error is not None:
if isinstance(_stream_error, (asyncio.CancelledError, TaskCancelledError)): if isinstance(_stream_error, (asyncio.CancelledError, TaskCancelledError)):
status = TaskStatus.CANCELLED status = TaskStatus.CANCELLED
@ -810,17 +847,29 @@ class ConfigDrivenAgent(BaseAgent, EvolutionMixin):
status=status, status=status,
output_data=None, output_data=None,
error_message=err_msg, error_message=err_msg,
started_at=now, started_at=_stream_started_at,
completed_at=now, completed_at=now,
) )
elif _stream_completed: elif _stream_completed:
# KTD-8: map non-success trace_outcomes to FAILED.
if _stream_trace_outcome in (
"gave_up_after_reflections",
"verify_failed",
"verify_quota_exhausted",
"failed",
):
status = TaskStatus.FAILED
err_msg = _stream_trace_outcome
else:
status = TaskStatus.COMPLETED
err_msg = None
result = TaskResult( result = TaskResult(
task_id=task.task_id, task_id=task.task_id,
agent_name=self.name, agent_name=self.name,
status=TaskStatus.COMPLETED, status=status,
output_data=_stream_output, output_data=_stream_output,
error_message=None, error_message=err_msg,
started_at=now, started_at=_stream_started_at,
completed_at=now, completed_at=now,
) )
else: else:
@ -831,7 +880,7 @@ class ConfigDrivenAgent(BaseAgent, EvolutionMixin):
status=TaskStatus.CANCELLED, status=TaskStatus.CANCELLED,
output_data=None, output_data=None,
error_message="stream closed before completion", error_message="stream closed before completion",
started_at=now, started_at=_stream_started_at,
completed_at=now, completed_at=now,
) )
self._trigger_evolution_hooks(task, result) self._trigger_evolution_hooks(task, result)

View File

@ -121,6 +121,10 @@ class PlanExecEngine:
# user's decision. None = skip the gate (backward compat — the engine # user's decision. None = skip the gate (backward compat — the engine
# proceeds directly to execution after Spec persistence). # proceeds directly to execution after Spec persistence).
spec_review_handler: SpecReviewHandler | None = None, spec_review_handler: SpecReviewHandler | None = None,
# KTD-2/R4: max reflections for ReActEngine reinjection→reflection
# escalation. Threaded through to each step's ReActEngine so the
# verify-failed path can escalate from reinjection to full reflection.
max_reflections: int = 2,
): ):
""" """
Args: Args:
@ -159,6 +163,8 @@ class PlanExecEngine:
self._pitfall_detector = pitfall_detector self._pitfall_detector = pitfall_detector
# U8/R8: spec review gate handler. None = skip gate (backward compat). # U8/R8: spec review gate handler. None = skip gate (backward compat).
self._spec_review_handler = spec_review_handler self._spec_review_handler = spec_review_handler
# KTD-2/R4: max reflections threaded to each step's ReActEngine.
self._max_reflections = max_reflections
# U4/R11: copy the default to avoid mutating the module-level dict. # U4/R11: copy the default to avoid mutating the module-level dict.
self._phase_budgets = ( self._phase_budgets = (
dict(phase_budgets) if phase_budgets is not None else dict(_DEFAULT_PHASE_BUDGETS) dict(phase_budgets) if phase_budgets is not None else dict(_DEFAULT_PHASE_BUDGETS)
@ -605,9 +611,10 @@ class PlanExecEngine:
"output": output, "output": output,
"total_steps": len(state.trajectory), "total_steps": len(state.trajectory),
"total_tokens": state.total_tokens, "total_tokens": state.total_tokens,
"plan_id": plan.plan_id, "plan_id": current_plan.plan_id,
"plan_status": plan_result.status.value, "plan_status": plan_result.status.value,
"replanned": state.replanned, "replanned": state.replanned,
"trace_outcome": trace_outcome,
}, },
) )
@ -637,7 +644,7 @@ class PlanExecEngine:
async def _inject_pitfall_warnings( async def _inject_pitfall_warnings(
self, self,
goal: str, goal: str,
plan_steps: list[Any], plan_steps: list[PlanStep],
task_type: str, task_type: str,
actor: str, actor: str,
system_prompt: str | None, system_prompt: str | None,
@ -1432,6 +1439,7 @@ class PlanExecEngine:
verification_enabled=self._verification_enabled, verification_enabled=self._verification_enabled,
verification_commands=self._verification_commands, verification_commands=self._verification_commands,
phase_budgets=self._phase_budgets, phase_budgets=self._phase_budgets,
max_reflections=self._max_reflections,
) )
return PlanExecutor( return PlanExecutor(
agent_pool=step_executor, agent_pool=step_executor,
@ -1590,11 +1598,13 @@ class ReActStepExecutor:
model: str = "default", model: str = "default",
system_prompt: str | None = None, system_prompt: str | None = None,
tools: list["Tool"] | None = None, tools: list["Tool"] | None = None,
max_steps: int = 5, max_steps: int = 10,
confirmation_handler: Any | None = None, confirmation_handler: Any | None = None,
verification_enabled: bool = False, verification_enabled: bool = False,
verification_commands: list[str] | None = None, verification_commands: list[str] | None = None,
phase_budgets: dict[str, int] | None = None, phase_budgets: dict[str, int] | None = None,
# KTD-2/R4: threaded through to each step's ReActEngine.
max_reflections: int = 2,
): ):
self._llm_gateway = llm_gateway self._llm_gateway = llm_gateway
self._messages = messages or [] self._messages = messages or []
@ -1607,6 +1617,8 @@ class ReActStepExecutor:
self._verification_commands = verification_commands self._verification_commands = verification_commands
# U4/R11: thread through to each step's ReActEngine. # U4/R11: thread through to each step's ReActEngine.
self._phase_budgets = phase_budgets self._phase_budgets = phase_budgets
# KTD-2/R4: thread through to each step's ReActEngine.
self._max_reflections = max_reflections
self._agents: dict[str, _ReActStepAgent] = {} self._agents: dict[str, _ReActStepAgent] = {}
async def create_agent_from_skill(self, skill_name: str): async def create_agent_from_skill(self, skill_name: str):
@ -1623,6 +1635,7 @@ class ReActStepExecutor:
verification_enabled=self._verification_enabled, verification_enabled=self._verification_enabled,
verification_commands=self._verification_commands, verification_commands=self._verification_commands,
phase_budgets=self._phase_budgets, phase_budgets=self._phase_budgets,
max_reflections=self._max_reflections,
) )
self._agents[skill_name] = agent self._agents[skill_name] = agent
return agent return agent
@ -1642,6 +1655,7 @@ class ReActStepExecutor:
verification_enabled=self._verification_enabled, verification_enabled=self._verification_enabled,
verification_commands=self._verification_commands, verification_commands=self._verification_commands,
phase_budgets=self._phase_budgets, phase_budgets=self._phase_budgets,
max_reflections=self._max_reflections,
) )
self._agents[key] = agent self._agents[key] = agent
return agent return agent
@ -1662,11 +1676,12 @@ class _ReActStepAgent:
model: str = "default", model: str = "default",
system_prompt: str | None = None, system_prompt: str | None = None,
tools: list["Tool"] | None = None, tools: list["Tool"] | None = None,
max_steps: int = 5, max_steps: int = 10,
confirmation_handler: Any | None = None, confirmation_handler: Any | None = None,
verification_enabled: bool = False, verification_enabled: bool = False,
verification_commands: list[str] | None = None, verification_commands: list[str] | None = None,
phase_budgets: dict[str, int] | None = None, phase_budgets: dict[str, int] | None = None,
max_reflections: int = 2,
): ):
self.name = name self.name = name
self._llm_gateway = llm_gateway self._llm_gateway = llm_gateway
@ -1680,6 +1695,7 @@ class _ReActStepAgent:
self._verification_commands = verification_commands self._verification_commands = verification_commands
# U4/R11: per-phase step quotas, passed to ReActEngine. # U4/R11: per-phase step quotas, passed to ReActEngine.
self._phase_budgets = phase_budgets self._phase_budgets = phase_budgets
self._max_reflections = max_reflections
async def execute(self, task_msg: TaskMessage) -> "TaskResult": async def execute(self, task_msg: TaskMessage) -> "TaskResult":
"""执行步骤:通过 ReActEngine 循环调用""" """执行步骤:通过 ReActEngine 循环调用"""
@ -1710,6 +1726,7 @@ class _ReActStepAgent:
verification_enabled=self._verification_enabled, verification_enabled=self._verification_enabled,
verification_commands=self._verification_commands, verification_commands=self._verification_commands,
phase_budgets=self._phase_budgets, phase_budgets=self._phase_budgets,
max_reflections=self._max_reflections,
) )
# 构建 messages # 构建 messages
@ -1728,7 +1745,13 @@ class _ReActStepAgent:
now = datetime.now(timezone.utc) now = datetime.now(timezone.utc)
status = TaskStatus.COMPLETED.value status = TaskStatus.COMPLETED.value
if react_result.status in ("timeout", "cancelled"): if react_result.status in (
"timeout",
"cancelled",
"verify_failed",
"gave_up_after_reflections",
"failed",
):
status = TaskStatus.FAILED.value status = TaskStatus.FAILED.value
return TaskResult( return TaskResult(

View File

@ -33,10 +33,12 @@ from agentkit.telemetry.metrics import (
agent_duration_histogram, agent_duration_histogram,
) )
from agentkit.core.phase import PhaseState
if TYPE_CHECKING: if TYPE_CHECKING:
from agentkit.core.compressor import CompressionStrategy from agentkit.core.compressor import CompressionStrategy
from agentkit.core.middleware import MiddlewareChain from agentkit.core.middleware import MiddlewareChain
from agentkit.core.phase import PhasePolicy, PhaseState from agentkit.core.phase import PhasePolicy
from agentkit.core.sandbox import WorkspaceSandbox from agentkit.core.sandbox import WorkspaceSandbox
from agentkit.core.trace import TraceRecorder from agentkit.core.trace import TraceRecorder
from agentkit.evolution.pitfall_detector import PitfallWarning from agentkit.evolution.pitfall_detector import PitfallWarning
@ -420,8 +422,6 @@ class ReActEngine:
""" """
if self._phase_policy is None or self._current_phase is None: if self._phase_policy is None or self._current_phase is None:
return return
from agentkit.core.phase import PhaseState
while self._current_phase not in (PhaseState.VERIFICATION, PhaseState.DELIVERY): while self._current_phase not in (PhaseState.VERIFICATION, PhaseState.DELIVERY):
nxt = self.advance_phase() nxt = self.advance_phase()
if nxt is None: if nxt is None:
@ -446,8 +446,6 @@ class ReActEngine:
""" """
if self._phase_policy is None or self._current_phase is None: if self._phase_policy is None or self._current_phase is None:
return None return None
from agentkit.core.phase import PhaseState
nxt = PhaseState.next_of(self._current_phase) nxt = PhaseState.next_of(self._current_phase)
if nxt is None: if nxt is None:
# Already at DELIVERY — return None to signal no transition. # Already at DELIVERY — return None to signal no transition.
@ -890,8 +888,8 @@ class ReActEngine:
trace_outcome = "success" trace_outcome = "success"
# U4/G1: verify 失败回灌计数器。受 max_steps 上限约束(不无限循环)。 # U4/G1: verify 失败回灌计数器。受 max_steps 上限约束(不无限循环)。
# U4/KTD-7: initialize from restored budget state (checkpoint resume). # U4/KTD-7: _reflect_count is initialized from restored budget state
reinjections = self._reflect_count # (checkpoint resume) and used directly — no redundant local copy.
_loop_start = time.monotonic() _loop_start = time.monotonic()
while step < self._max_steps: while step < self._max_steps:
@ -913,9 +911,7 @@ class ReActEngine:
and self._phase_policy is not None and self._phase_policy is not None
and self._current_phase is not None and self._current_phase is not None
): ):
from agentkit.core.phase import PhaseState as _PS if self._current_phase in (PhaseState.PLANNING, PhaseState.BUILDING):
if self._current_phase in (_PS.PLANNING, _PS.BUILDING):
self._think_count += 1 self._think_count += 1
think_quota = self._phase_budgets.get("think") think_quota = self._phase_budgets.get("think")
if think_quota is not None and self._think_count >= think_quota: if think_quota is not None and self._think_count >= think_quota:
@ -1547,7 +1543,7 @@ class ReActEngine:
vresult = await vloop.verify() vresult = await vloop.verify()
if not vresult.passed: if not vresult.passed:
if ( if (
reinjections < self._max_reinjections self._reflect_count < self._max_reinjections
and step < self._max_steps and step < self._max_steps
): ):
errors_text = "\n".join(vresult.errors) errors_text = "\n".join(vresult.errors)
@ -1557,7 +1553,6 @@ class ReActEngine:
"content": (f"验证失败,错误如下:\n{errors_text}"), "content": (f"验证失败,错误如下:\n{errors_text}"),
} }
) )
reinjections += 1
# U4/R10: track reflect count for # U4/R10: track reflect count for
# checkpoint reconstruction (KTD-7). # checkpoint reconstruction (KTD-7).
self._reflect_count += 1 self._reflect_count += 1
@ -1574,7 +1569,7 @@ class ReActEngine:
data={ data={
"message": ( "message": (
f"验证失败,已注入错误信息让 LLM 自纠正 " f"验证失败,已注入错误信息让 LLM 自纠正 "
f"(reinjection {reinjections}/{self._max_reinjections})" f"(reinjection {self._reflect_count}/{self._max_reinjections})"
), ),
"verify_errors": vresult.errors, "verify_errors": vresult.errors,
}, },
@ -1681,7 +1676,7 @@ class ReActEngine:
logger.info( logger.info(
"Verification failed after %d reinjections, " "Verification failed after %d reinjections, "
"%d reflections, interrupting with verify log", "%d reflections, interrupting with verify log",
reinjections, self._reflect_count,
self._reflection_count, self._reflection_count,
) )
break break
@ -2136,7 +2131,7 @@ class ReActEngine:
in_verification = ( in_verification = (
self._sandbox is not None self._sandbox is not None
and self._current_phase is not None and self._current_phase is not None
and self._current_phase.value == "verification" and self._current_phase == PhaseState.VERIFICATION
) )
try: try:

View File

@ -28,10 +28,24 @@ import contextlib
import errno import errno
import logging import logging
import socket import socket
import threading
from pathlib import Path from pathlib import Path
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Reentrancy counter for ``network_block``. Concurrent VERIFICATION phases
# (parallel PLAN_EXEC steps) each enter the context manager; only the first
# entry (0 -> 1) patches ``socket.socket.connect``, and only the last exit
# (1 -> 0) restores it. Naive save/restore would unpatch on the first exit
# while other phases are still expecting the block to be in effect, breaking
# sandboxing for any phase that started later.
# ponytail: process-wide counter — not subprocess-safe (inherited fork state
# is irrelevant because the monkey-patch lives in the parent's socket module).
_network_block_count: int = 0
_network_block_lock = threading.Lock()
_original_socket_connect = socket.socket.connect
_original_socket_connect_ex = socket.socket.connect_ex
class SandboxNetworkBlockedError(RuntimeError): class SandboxNetworkBlockedError(RuntimeError):
"""Raised when a tool attempts an outbound network call under sandbox.""" """Raised when a tool attempts an outbound network call under sandbox."""
@ -115,17 +129,23 @@ class WorkspaceSandbox:
"""Block outbound network connections within the async context. """Block outbound network connections within the async context.
Patches ``socket.socket.connect`` and ``connect_ex`` to raise / Patches ``socket.socket.connect`` and ``connect_ex`` to raise /
return ``ECONNREFUSED`` respectively. Restores the originals on exit, return ``ECONNREFUSED`` respectively. Restores the originals on the
even if the wrapped code raises. last concurrent exit, even if the wrapped code raises.
Already-connected sockets (e.g. an LLM gateway keep-alive pool) are Already-connected sockets (e.g. an LLM gateway keep-alive pool) are
unaffected only *new* ``connect()`` calls are blocked. This is the unaffected only *new* ``connect()`` calls are blocked. This is the
correct granularity: the LLM gateway talks over its existing correct granularity: the LLM gateway talks over its existing
connection, while a tool trying to ``requests.get(...)`` makes a new connection, while a tool trying to ``requests.get(...)`` makes a new
connect and is rejected. connect and is rejected.
Reentrancy: a module-level counter guards the patch. Concurrent
VERIFICATION phases (parallel PLAN_EXEC steps) each enter/exit; the
patch is engaged on count 0->1 and released on count 1->0. Without
this, the first exit would restore the original connect while later
phases are still expecting the block, terminating new LLM gateway /
Redis / PostgreSQL connections in those phases.
""" """
original_connect = socket.socket.connect global _network_block_count # noqa: PLW0603
original_connect_ex = socket.socket.connect_ex
def _blocked_connect(self_sock, *args, **kwargs): # noqa: ANN001 def _blocked_connect(self_sock, *args, **kwargs): # noqa: ANN001
raise SandboxNetworkBlockedError( raise SandboxNetworkBlockedError(
@ -136,15 +156,26 @@ class WorkspaceSandbox:
# connect_ex returns an errno instead of raising (POSIX contract). # connect_ex returns an errno instead of raising (POSIX contract).
return errno.ECONNREFUSED return errno.ECONNREFUSED
with _network_block_lock:
_network_block_count += 1
if _network_block_count == 1:
socket.socket.connect = _blocked_connect # type: ignore[method-assign] socket.socket.connect = _blocked_connect # type: ignore[method-assign]
socket.socket.connect_ex = _blocked_connect_ex # type: ignore[method-assign] socket.socket.connect_ex = _blocked_connect_ex # type: ignore[method-assign]
logger.debug("sandbox: network block engaged") logger.debug("sandbox: network block engaged (count=1)")
try: try:
yield yield
finally: finally:
socket.socket.connect = original_connect # type: ignore[method-assign] with _network_block_lock:
socket.socket.connect_ex = original_connect_ex # type: ignore[method-assign] _network_block_count -= 1
logger.debug("sandbox: network block released") if _network_block_count == 0:
socket.socket.connect = _original_socket_connect # type: ignore[method-assign]
socket.socket.connect_ex = _original_socket_connect_ex # type: ignore[method-assign]
logger.debug("sandbox: network block released (count=0)")
else:
logger.debug(
"sandbox: network block still held (count=%d)",
_network_block_count,
)
def detect_verification_commands(workspace_root: str | Path | None) -> list[str]: def detect_verification_commands(workspace_root: str | Path | None) -> list[str]:

View File

@ -805,7 +805,14 @@ async def lifespan(app: FastAPI):
try: try:
from agentkit.core.config_driven import drain_pending_evolution_tasks from agentkit.core.config_driven import drain_pending_evolution_tasks
await drain_pending_evolution_tasks() await asyncio.wait_for(drain_pending_evolution_tasks(), timeout=10.0)
except asyncio.TimeoutError:
from agentkit.core.config_driven import _pending_evolution_tasks
logger.warning(
"drain_pending_evolution_tasks 超时 10s, %d 个任务被放弃",
len(_pending_evolution_tasks),
)
except Exception: except Exception:
logger.debug("drain_pending_evolution_tasks 异常已忽略", exc_info=True) logger.debug("drain_pending_evolution_tasks 异常已忽略", exc_info=True)

View File

@ -1494,6 +1494,23 @@ async def _handle_chat_message(
}, },
} }
) )
# U8/R8: persist the spec_review_request so it survives a page reload.
# The frontend reconstructs the pending review card from the restored
# message metadata (spec_review_id + goal + steps).
try:
await sm.append_message(
session_id=session_id,
role=MessageRole.ASSISTANT,
content=f"[Spec Review] {goal}",
metadata={
"message_type": "spec_review_request",
"spec_review_id": spec_review_id,
"spec_review_goal": goal,
"spec_review_steps": steps,
},
)
except Exception:
logger.debug("Failed to persist spec_review_request", exc_info=True)
loop = asyncio.get_running_loop() loop = asyncio.get_running_loop()
future: asyncio.Future[tuple[str, str]] = loop.create_future() future: asyncio.Future[tuple[str, str]] = loop.create_future()
@ -1506,19 +1523,58 @@ async def _handle_chat_message(
# "failed") so the user can resume on return. # "failed") so the user can resume on return.
decision, feedback = await asyncio.wait_for(future, timeout=1800.0) decision, feedback = await asyncio.wait_for(future, timeout=1800.0)
logger.info(f"Spec review {spec_review_id} resolved: decision={decision!r}") logger.info(f"Spec review {spec_review_id} resolved: decision={decision!r}")
# Persist the decision so the frontend can show the outcome after
# a reload (e.g. timeout→parked transition the user never saw).
try:
await sm.append_message(
session_id=session_id,
role=MessageRole.ASSISTANT,
content=f"[Spec Review Decision] {decision}: {feedback}",
metadata={
"message_type": "spec_review_reply",
"spec_review_id": spec_review_id,
"spec_review_decision": decision,
"spec_review_feedback": feedback,
},
)
except Exception:
logger.debug("Failed to persist spec_review_reply", exc_info=True)
return decision, feedback return decision, feedback
except asyncio.TimeoutError: except asyncio.TimeoutError:
logger.warning(f"Spec review {spec_review_id} timed out (30 min)") logger.warning(f"Spec review {spec_review_id} timed out (30 min)")
# Persist the timeout→parked transition so the frontend can show
# the parked state after a reload.
try:
await sm.append_message(
session_id=session_id,
role=MessageRole.ASSISTANT,
content=f"[Spec Review Timed Out] {spec_review_id}",
metadata={
"message_type": "spec_review_reply",
"spec_review_id": spec_review_id,
"spec_review_decision": "parked",
"spec_review_feedback": "timed out (30 min)",
},
)
except Exception:
logger.debug("Failed to persist spec_review timeout", exc_info=True)
raise raise
finally: finally:
_pending_spec_reviews.pop(spec_review_id, None) _pending_spec_reviews.pop(spec_review_id, None)
# Wire the handler onto a PlanExecEngine only (the WS PLAN_EXEC path uses # U8/R8: spec review gate wiring. The WS PLAN_EXEC path uses
# a ReActEngine + phase_policy, where this is a no-op). Local import to # ``_build_phase_engine`` which returns a ``ReActEngine`` with
# avoid a top-level dependency that the WS path doesn't need. # ``phase_policy`` (NOT a ``PlanExecEngine``), so the gate cannot be
from agentkit.core.plan_exec_engine import PlanExecEngine as _PlanExecEngine # wired here — ``ReActEngine`` does not read ``_spec_review_handler``.
# The gate only fires when ``ConfigDrivenAgent.execute_stream`` →
if isinstance(react_engine, _PlanExecEngine): # ``_handle_plan_exec_stream`` → ``PlanExecEngine.execute_stream`` runs,
# which is the portal/task path (not the WS chat path).
# ponytail: known ceiling — WS chat PLAN_EXEC (phase_policy mechanism)
# does not support spec review. Upgrade path: route WS PLAN_EXEC through
# ``ConfigDrivenAgent.execute_stream`` to unify with the portal path and
# inherit the gate. The ``_spec_review_handler`` closure + event handlers
# below are kept so the upgrade is a routing change, not a rewrite.
if hasattr(react_engine, "_spec_review_handler"):
react_engine._spec_review_handler = _spec_review_handler react_engine._spec_review_handler = _spec_review_handler
logger.info( logger.info(

View File

@ -23,7 +23,7 @@ from pydantic import BaseModel
from agentkit.core.config_driven import ConfigDrivenAgent from agentkit.core.config_driven import ConfigDrivenAgent
from agentkit.core.event_queue import EventQueue from agentkit.core.event_queue import EventQueue
from agentkit.core.protocol import Event, TaskEventType, TaskStatus, TurnEventType from agentkit.core.protocol import Event, TaskEventType, TaskMessage, TaskStatus, TurnEventType
from agentkit.core.react import ReActEngine from agentkit.core.react import ReActEngine
from agentkit.chat.skill_routing import ExecutionMode, SkillRoutingResult from agentkit.chat.skill_routing import ExecutionMode, SkillRoutingResult
from agentkit.chat.request_preprocessor import RequestPreprocessor from agentkit.chat.request_preprocessor import RequestPreprocessor
@ -73,6 +73,42 @@ def _ensure_non_empty(text: str | None) -> str:
return EMPTY_LLM_RESPONSE return EMPTY_LLM_RESPONSE
def _build_portal_task(
*,
agent_name: str,
messages: list[dict[str, str]],
system_prompt: str | None,
timeout_seconds: float | None,
conversation_id: str | None = None,
task_id: str | None = None,
) -> TaskMessage:
"""Construct a TaskMessage for routing through ConfigDrivenAgent.execute_stream.
The portal builds messages externally (history + user message). The
``messages`` key in input_data tells _build_llm_messages to use them
directly instead of rendering the prompt template. This lets the portal
inherit evolution hooks + trace_outcome propagation from execute_stream's
finally block (KTD-4/KTD-8).
"""
from datetime import datetime, timezone
return TaskMessage(
task_id=task_id or str(uuid.uuid4()),
agent_name=agent_name,
task_type="chat",
priority=0,
input_data={
"messages": messages,
"system_prompt": system_prompt,
"content": messages[-1].get("content", "") if messages else "",
},
callback_url=None,
created_at=datetime.now(timezone.utc),
timeout_seconds=int(timeout_seconds) if timeout_seconds else 300,
conversation_id=conversation_id,
)
async def _emit_event_safe( async def _emit_event_safe(
event_queue: EventQueue | None, event_queue: EventQueue | None,
event_type: str, event_type: str,
@ -556,38 +592,39 @@ async def chat(request: ChatRequest, req: Request, _auth: None = Depends(_verify
) )
react_config = agent.get_react_config() react_config = agent.get_react_config()
react_engine = getattr(agent, "_react_engine", None) # KTD-4/KTD-8: route through ConfigDrivenAgent.execute_stream so the
if react_engine is None: # finally block fires evolution hooks + propagates trace_outcome. The
react_engine = ReActEngine( # portal builds messages externally; _build_portal_task packages them
# into a TaskMessage whose input_data["messages"] is used directly by
# _build_llm_messages (bypassing the prompt template).
_react_engine = getattr(agent, "_react_engine", None)
if _react_engine is None:
_react_engine = ReActEngine(
llm_gateway=llm_gateway, llm_gateway=llm_gateway,
max_steps=react_config["max_steps"], max_steps=react_config["max_steps"],
) )
agent._react_engine = _react_engine
else: else:
react_engine.reset() _react_engine.reset()
messages = [{"role": "user", "content": request.message}] messages = [{"role": "user", "content": request.message}]
# Inject conversation history # Inject conversation history
history_msgs = await _build_history_messages(conv.id) history_msgs = await _build_history_messages(conv.id)
for hm in reversed(history_msgs): for hm in reversed(history_msgs):
messages.insert(0, hm) messages.insert(0, hm)
tools = agent.get_tools()
model = agent.get_model()
system_prompt = getattr(agent, "_system_prompt", None) or agent.get_system_prompt() system_prompt = getattr(agent, "_system_prompt", None) or agent.get_system_prompt()
timeout_seconds = react_config["timeout_seconds"] timeout_seconds = react_config["timeout_seconds"]
collected_output: list[str] = [] portal_task = _build_portal_task(
try:
# U2 verify: calls react_engine.execute_stream directly, bypassing
# ConfigDrivenAgent.execute_stream — evolution hooks NOT propagated
# here. Routing through agent.execute_stream is tracked separately.
async for event in react_engine.execute_stream(
messages=messages,
tools=tools,
model=model,
agent_name=agent.name, agent_name=agent.name,
messages=messages,
system_prompt=system_prompt, system_prompt=system_prompt,
timeout_seconds=timeout_seconds, timeout_seconds=timeout_seconds,
): conversation_id=conv.id,
)
collected_output: list[str] = []
try:
async for event in agent.execute_stream(portal_task):
if event.event_type == "final_answer": if event.event_type == "final_answer":
collected_output.append(event.data.get("output", "")) collected_output.append(event.data.get("output", ""))
except asyncio.CancelledError: except asyncio.CancelledError:
@ -684,34 +721,32 @@ async def chat_stream(request: ChatRequest, req: Request, _auth: None = Depends(
) )
react_config = agent.get_react_config() react_config = agent.get_react_config()
react_engine = getattr(agent, "_react_engine", None) # KTD-4/KTD-8: route through ConfigDrivenAgent.execute_stream
if react_engine is None: # (evolution hooks + trace_outcome propagation in finally block).
react_engine = ReActEngine( _react_engine = getattr(agent, "_react_engine", None)
if _react_engine is None:
_react_engine = ReActEngine(
llm_gateway=llm_gateway, llm_gateway=llm_gateway,
max_steps=react_config["max_steps"], max_steps=react_config["max_steps"],
) )
agent._react_engine = _react_engine
else: else:
react_engine.reset() _react_engine.reset()
messages = [{"role": "user", "content": request.message}] messages = [{"role": "user", "content": request.message}]
tools = agent.get_tools()
model = agent.get_model()
system_prompt = getattr(agent, "_system_prompt", None) or agent.get_system_prompt() system_prompt = getattr(agent, "_system_prompt", None) or agent.get_system_prompt()
timeout_seconds = react_config["timeout_seconds"] timeout_seconds = react_config["timeout_seconds"]
collected_output: list[str] = [] portal_task = _build_portal_task(
try:
# U2 verify: calls react_engine.execute_stream directly, bypassing
# ConfigDrivenAgent.execute_stream — evolution hooks NOT propagated
# here. Routing through agent.execute_stream is tracked separately.
async for event in react_engine.execute_stream(
messages=messages,
tools=tools,
model=model,
agent_name=agent.name, agent_name=agent.name,
messages=messages,
system_prompt=system_prompt, system_prompt=system_prompt,
timeout_seconds=timeout_seconds, timeout_seconds=timeout_seconds,
): conversation_id=conv.id,
)
collected_output: list[str] = []
try:
async for event in agent.execute_stream(portal_task):
if event.event_type == "final_answer": if event.event_type == "final_answer":
collected_output.append(event.data.get("output", "")) collected_output.append(event.data.get("output", ""))
yield { yield {
@ -967,11 +1002,8 @@ def _derive_title_from_messages(messages: list) -> str:
async def _execute_react_background( async def _execute_react_background(
react_engine: ReActEngine, agent: ConfigDrivenAgent,
messages: list[dict], messages: list[dict],
tools: list,
model: str,
agent_name: str,
system_prompt: str | None, system_prompt: str | None,
timeout_seconds: float | None, timeout_seconds: float | None,
conv_id: str, conv_id: str,
@ -987,6 +1019,10 @@ async def _execute_react_background(
Results are always persisted to the conversation store, regardless of Results are always persisted to the conversation store, regardless of
whether a WebSocket subscriber is active. whether a WebSocket subscriber is active.
Task status is tracked in TaskStore when provided. Task status is tracked in TaskStore when provided.
KTD-4/KTD-8: routes through ``agent.execute_stream`` (not
``react_engine.execute_stream`` directly) so the finally block fires
evolution hooks and propagates trace_outcome.
""" """
collected_output: list[str] = [] collected_output: list[str] = []
try: try:
@ -1005,17 +1041,15 @@ async def _execute_react_background(
): ):
logger.warning("Failed to update TaskStore RUNNING", exc_info=True) logger.warning("Failed to update TaskStore RUNNING", exc_info=True)
# U2 verify: calls react_engine.execute_stream directly, bypassing portal_task = _build_portal_task(
# ConfigDrivenAgent.execute_stream — evolution hooks NOT propagated agent_name=agent.name,
# here. Routing through agent.execute_stream is tracked separately.
async for event in react_engine.execute_stream(
messages=messages, messages=messages,
tools=tools,
model=model,
agent_name=agent_name,
system_prompt=system_prompt, system_prompt=system_prompt,
timeout_seconds=timeout_seconds, timeout_seconds=timeout_seconds,
): conversation_id=conv_id,
task_id=task_id,
)
async for event in agent.execute_stream(portal_task):
if event.event_type == "final_answer": if event.event_type == "final_answer":
collected_output.append(event.data.get("output", "")) collected_output.append(event.data.get("output", ""))
@ -1219,6 +1253,14 @@ async def portal_websocket(websocket: WebSocket):
task_id: str | None = None task_id: str | None = None
# Track the active background task so cancel can propagate to it. # Track the active background task so cancel can propagate to it.
active_bg_task: asyncio.Task | None = None active_bg_task: asyncio.Task | None = None
# U8/R8: pending spec review futures. The portal WS path doesn't wire
# _spec_review_handler on the agent (the background task architecture
# makes EventQueue-based request/reply non-trivial), so this dict is
# typically empty. It exists so stale spec_review_reply messages from
# the frontend are handled gracefully instead of silently ignored.
# ponytail: upgrade path — wire _spec_review_handler via EventQueue +
# future, mirroring chat.py's _spec_review_handler closure.
pending_spec_reviews: dict[str, asyncio.Future[tuple[str, str]]] = {}
try: try:
while True: while True:
@ -1256,6 +1298,32 @@ async def portal_websocket(websocket: WebSocket):
await websocket.send_json({"type": "pong"}) await websocket.send_json({"type": "pong"})
continue continue
if msg_type == "spec_review_reply":
# U8/R8: mirror chat.py:1126 — resolve a pending spec review
# future. Typically a no-op in the portal WS path (the
# _spec_review_handler isn't wired), but handles stale replies
# gracefully.
spec_review_id = msg.get("spec_review_id")
decision = msg.get("decision", "rejected")
feedback = msg.get("feedback", "")
logger.info(
f"Received spec_review_reply: id={spec_review_id!r}, decision={decision!r}"
)
if spec_review_id and spec_review_id in pending_spec_reviews:
fut = pending_spec_reviews[spec_review_id]
if not fut.done():
fut.set_result((decision, feedback))
else:
logger.warning(
f"spec_review_reply {spec_review_id!r} already resolved"
)
else:
logger.warning(
f"spec_review_reply {spec_review_id!r} not found in "
f"pending_spec_reviews — ignoring"
)
continue
if msg_type == "resume": if msg_type == "resume":
# Frontend reconnected and wants to resume a running task # Frontend reconnected and wants to resume a running task
resume_task_id = msg.get("task_id", "") resume_task_id = msg.get("task_id", "")
@ -1800,15 +1868,17 @@ async def portal_websocket(websocket: WebSocket):
# Execute via ReAct stream # Execute via ReAct stream
react_config = agent.get_react_config() react_config = agent.get_react_config()
# Reuse agent's ReActEngine if available (aligned with chat.py pattern) # KTD-4/KTD-8: route through ConfigDrivenAgent.execute_stream
react_engine = getattr(agent, "_react_engine", None) # (evolution hooks + trace_outcome propagation in finally block).
if react_engine is None: _react_engine = getattr(agent, "_react_engine", None)
react_engine = ReActEngine( if _react_engine is None:
_react_engine = ReActEngine(
llm_gateway=llm_gateway, llm_gateway=llm_gateway,
max_steps=react_config["max_steps"], max_steps=react_config["max_steps"],
) )
agent._react_engine = _react_engine
else: else:
react_engine.reset() _react_engine.reset()
messages = [{"role": "user", "content": message_text}] messages = [{"role": "user", "content": message_text}]
# Inject conversation history for context continuity # Inject conversation history for context continuity
@ -1829,11 +1899,8 @@ async def portal_websocket(websocket: WebSocket):
# background task continues running and persists the result. # background task continues running and persists the result.
bg_task = asyncio.create_task( bg_task = asyncio.create_task(
_execute_react_background( _execute_react_background(
react_engine=react_engine, agent=agent,
messages=messages, messages=messages,
tools=tools,
model=model,
agent_name=agent.name,
system_prompt=system_prompt, system_prompt=system_prompt,
timeout_seconds=timeout_seconds, timeout_seconds=timeout_seconds,
conv_id=conv.id, conv_id=conv.id,

View File

@ -38,10 +38,12 @@ class FakeConversationStore:
class FakeReactEngine: class FakeReactEngine:
"""Fake ReAct engine that yields events from a predefined list.""" """Fake ReAct engine that yields events from a predefined list."""
name = "test-agent"
def __init__(self, events: list[Event]) -> None: def __init__(self, events: list[Event]) -> None:
self._events = events self._events = events
async def execute_stream(self, **kwargs): async def execute_stream(self, task):
for event in self._events: for event in self._events:
yield event yield event
@ -49,11 +51,13 @@ class FakeReactEngine:
class FailingReactEngine: class FailingReactEngine:
"""Fake ReAct engine that raises an exception after yielding some events.""" """Fake ReAct engine that raises an exception after yielding some events."""
name = "test-agent"
def __init__(self, events: list[Event], error: Exception) -> None: def __init__(self, events: list[Event], error: Exception) -> None:
self._events = events self._events = events
self._error = error self._error = error
async def execute_stream(self, **kwargs): async def execute_stream(self, task):
for event in self._events: for event in self._events:
yield event yield event
raise self._error raise self._error
@ -76,11 +80,13 @@ def _make_event(
class SlowFakeReactEngine: class SlowFakeReactEngine:
"""Fake ReAct engine with a delay to allow status checks during execution.""" """Fake ReAct engine with a delay to allow status checks during execution."""
name = "test-agent"
def __init__(self, events: list[Event], delay: float = 0.1) -> None: def __init__(self, events: list[Event], delay: float = 0.1) -> None:
self._events = events self._events = events
self._delay = delay self._delay = delay
async def execute_stream(self, **kwargs): async def execute_stream(self, task):
for event in self._events: for event in self._events:
await asyncio.sleep(self._delay) await asyncio.sleep(self._delay)
yield event yield event
@ -93,11 +99,13 @@ class CancellableReactEngine:
Event so the test can cancel the task and verify CancelledError cleanup. Event so the test can cancel the task and verify CancelledError cleanup.
""" """
name = "test-agent"
def __init__(self, first_event: Event) -> None: def __init__(self, first_event: Event) -> None:
self._first_event = first_event self._first_event = first_event
self.started = asyncio.Event() self.started = asyncio.Event()
async def execute_stream(self, **kwargs): async def execute_stream(self, task):
yield self._first_event yield self._first_event
self.started.set() self.started.set()
# Block forever until cancelled # Block forever until cancelled
@ -130,11 +138,8 @@ class TestExecuteReactBackground:
eq = EventQueue() eq = EventQueue()
await _execute_react_background( await _execute_react_background(
react_engine=engine, agent=engine,
messages=[], messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None, system_prompt=None,
timeout_seconds=None, timeout_seconds=None,
conv_id="test-conv", conv_id="test-conv",
@ -162,11 +167,8 @@ class TestExecuteReactBackground:
eq = EventQueue() eq = EventQueue()
await _execute_react_background( await _execute_react_background(
react_engine=engine, agent=engine,
messages=[], messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None, system_prompt=None,
timeout_seconds=None, timeout_seconds=None,
conv_id="test-conv", conv_id="test-conv",
@ -190,11 +192,8 @@ class TestExecuteReactBackground:
eq = EventQueue() eq = EventQueue()
await _execute_react_background( await _execute_react_background(
react_engine=engine, agent=engine,
messages=[], messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None, system_prompt=None,
timeout_seconds=None, timeout_seconds=None,
conv_id="test-conv", conv_id="test-conv",
@ -228,11 +227,8 @@ class TestExecuteReactBackground:
await asyncio.sleep(0.05) await asyncio.sleep(0.05)
await _execute_react_background( await _execute_react_background(
react_engine=engine, agent=engine,
messages=[], messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None, system_prompt=None,
timeout_seconds=None, timeout_seconds=None,
conv_id="test-conv", conv_id="test-conv",
@ -270,11 +266,8 @@ class TestExecuteReactBackground:
await asyncio.sleep(0.05) await asyncio.sleep(0.05)
await _execute_react_background( await _execute_react_background(
react_engine=engine, agent=engine,
messages=[], messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None, system_prompt=None,
timeout_seconds=None, timeout_seconds=None,
conv_id="test-conv", conv_id="test-conv",
@ -318,11 +311,8 @@ class TestTaskStoreIntegration:
# Start background task # Start background task
bg_task = asyncio.create_task( bg_task = asyncio.create_task(
_execute_react_background( _execute_react_background(
react_engine=engine, agent=engine,
messages=[], messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None, system_prompt=None,
timeout_seconds=None, timeout_seconds=None,
conv_id="test-conv", conv_id="test-conv",
@ -365,11 +355,8 @@ class TestTaskStoreIntegration:
) )
await _execute_react_background( await _execute_react_background(
react_engine=engine, agent=engine,
messages=[], messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None, system_prompt=None,
timeout_seconds=None, timeout_seconds=None,
conv_id="test-conv", conv_id="test-conv",
@ -394,11 +381,8 @@ class TestTaskStoreIntegration:
# Should not raise # Should not raise
await _execute_react_background( await _execute_react_background(
react_engine=engine, agent=engine,
messages=[], messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None, system_prompt=None,
timeout_seconds=None, timeout_seconds=None,
conv_id="test-conv", conv_id="test-conv",
@ -552,11 +536,8 @@ class TestCancelledErrorPath:
bg_task = asyncio.create_task( bg_task = asyncio.create_task(
_execute_react_background( _execute_react_background(
react_engine=engine, agent=engine,
messages=[], messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None, system_prompt=None,
timeout_seconds=None, timeout_seconds=None,
conv_id="test-conv", conv_id="test-conv",
@ -590,11 +571,8 @@ class TestCancelledErrorPath:
bg_task = asyncio.create_task( bg_task = asyncio.create_task(
_execute_react_background( _execute_react_background(
react_engine=engine, agent=engine,
messages=[], messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None, system_prompt=None,
timeout_seconds=None, timeout_seconds=None,
conv_id="test-conv", conv_id="test-conv",
@ -636,11 +614,8 @@ class TestCancelledErrorPath:
bg_task = asyncio.create_task( bg_task = asyncio.create_task(
_execute_react_background( _execute_react_background(
react_engine=engine, agent=engine,
messages=[], messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None, system_prompt=None,
timeout_seconds=None, timeout_seconds=None,
conv_id="test-conv", conv_id="test-conv",
@ -769,11 +744,8 @@ class TestCancelPropagation:
# Simulate the background task as portal.py would create it # Simulate the background task as portal.py would create it
active_bg_task: asyncio.Task | None = asyncio.create_task( active_bg_task: asyncio.Task | None = asyncio.create_task(
_execute_react_background( _execute_react_background(
react_engine=engine, agent=engine,
messages=[], messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None, system_prompt=None,
timeout_seconds=None, timeout_seconds=None,
conv_id="cancel-conv", conv_id="cancel-conv",
@ -814,11 +786,8 @@ class TestCancelPropagation:
bg_task = asyncio.create_task( bg_task = asyncio.create_task(
_execute_react_background( _execute_react_background(
react_engine=engine, agent=engine,
messages=[], messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None, system_prompt=None,
timeout_seconds=None, timeout_seconds=None,
conv_id="test-conv", conv_id="test-conv",
@ -865,11 +834,8 @@ class TestWebSocketDisconnectNoCancel:
# Start the background task (as portal.py would) # Start the background task (as portal.py would)
bg_task = asyncio.create_task( bg_task = asyncio.create_task(
_execute_react_background( _execute_react_background(
react_engine=engine, agent=engine,
messages=[], messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None, system_prompt=None,
timeout_seconds=None, timeout_seconds=None,
conv_id="test-conv", conv_id="test-conv",
@ -912,11 +878,8 @@ class TestWebSocketDisconnectNoCancel:
bg_task = asyncio.create_task( bg_task = asyncio.create_task(
_execute_react_background( _execute_react_background(
react_engine=engine, agent=engine,
messages=[], messages=[],
tools=[],
model="test-model",
agent_name="test-agent",
system_prompt=None, system_prompt=None,
timeout_seconds=None, timeout_seconds=None,
conv_id="resume-conv", conv_id="resume-conv",

View File

@ -112,7 +112,8 @@ class TestExecuteStreamHooks:
assert events[0].event_type == "final_answer" assert events[0].event_type == "final_answer"
assert len(fired) == 1 assert len(fired) == 1
assert fired[0].status == TaskStatus.COMPLETED assert fired[0].status == TaskStatus.COMPLETED
assert fired[0].output_data == {"content": "hello world"} # KTD-8: output_data includes trace_outcome for lifecycle._is_failure_path()
assert fired[0].output_data == {"content": "hello world", "trace_outcome": "success"}
async def test_failure_fires_on_task_failed(self): async def test_failure_fires_on_task_failed(self):
"""Stream exception fires evolve_after_task with FAILED status.""" """Stream exception fires evolve_after_task with FAILED status."""