feat(agent): Wave 4 PLAN_EXEC Hardening (U1-U5) #7
|
|
@ -55,12 +55,6 @@ class PhaseState(enum.Enum):
|
||||||
# Wildcard token meaning "all tools allowed in this phase".
|
# Wildcard token meaning "all tools allowed in this phase".
|
||||||
WILDCARD = "*"
|
WILDCARD = "*"
|
||||||
|
|
||||||
# Legacy regex-based bash filter. Kept for backward compatibility with configs
|
|
||||||
# that pass a `re.Pattern` into `bash_command_filter`. New default uses
|
|
||||||
# `ShellTool._is_dangerous` (a Callable) which closes the regex ceiling
|
|
||||||
# (missed `:>file`, `dd of=file`, etc. — see Wave 4 U1).
|
|
||||||
_DEFAULT_BASH_FILTER = re.compile(r"\b(rm|mv|cp|mkdir|rmdir|chmod|chown)\b|(?<!\S)>|>>")
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(slots=True)
|
@dataclass(slots=True)
|
||||||
class PhasePolicy:
|
class PhasePolicy:
|
||||||
|
|
|
||||||
|
|
@ -1433,8 +1433,7 @@ export const useChatStore = defineStore("chat", () => {
|
||||||
}
|
}
|
||||||
|
|
||||||
case "phase_violation": {
|
case "phase_violation": {
|
||||||
// Track current phase from violation data (backend doesn't emit
|
// Track current phase from violation data.
|
||||||
// phase_changed yet — U4 frontend is forward-compatible).
|
|
||||||
currentPhase.value = data.data.current_phase;
|
currentPhase.value = data.data.current_phase;
|
||||||
const violation = {
|
const violation = {
|
||||||
phase: data.data.current_phase,
|
phase: data.data.current_phase,
|
||||||
|
|
|
||||||
|
|
@ -54,7 +54,11 @@ describe('chat store — PLAN_EXEC phase state (U4)', () => {
|
||||||
expect(store.isPlanExec).toBe(true)
|
expect(store.isPlanExec).toBe(true)
|
||||||
})
|
})
|
||||||
|
|
||||||
it('phaseViolations capped at 5 entries', async () => {
|
it('phaseViolations array is directly mutable for test fixtures', async () => {
|
||||||
|
// Direct mutation bypasses the capping logic in handleWsMessage;
|
||||||
|
// the cap is enforced inside the case handler, not as a setter.
|
||||||
|
// This test verifies the array is accessible; the cap-at-5 behavior
|
||||||
|
// is exercised through handleWsMessage in the U5 E2E test.
|
||||||
const { useChatStore } = await import('@/stores/chat')
|
const { useChatStore } = await import('@/stores/chat')
|
||||||
const store = useChatStore()
|
const store = useChatStore()
|
||||||
for (let i = 0; i < 7; i++) {
|
for (let i = 0; i < 7; i++) {
|
||||||
|
|
@ -69,9 +73,6 @@ describe('chat store — PLAN_EXEC phase state (U4)', () => {
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
// Direct mutation bypasses the capping logic in handleWsMessage;
|
|
||||||
// the cap is enforced inside the case handler, not as a setter.
|
|
||||||
// This test just verifies the array is accessible.
|
|
||||||
expect(store.phaseViolations.length).toBe(7)
|
expect(store.phaseViolations.length).toBe(7)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import asyncio
|
||||||
import hmac
|
import hmac
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from typing import Any
|
from typing import Any, TYPE_CHECKING
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import uuid
|
import uuid
|
||||||
|
|
@ -33,6 +33,11 @@ from agentkit.session.manager import SessionManager
|
||||||
from agentkit.session.models import MessageRole, SessionStatus
|
from agentkit.session.models import MessageRole, SessionStatus
|
||||||
from agentkit.tools.advance_phase import AdvancePhaseTool
|
from agentkit.tools.advance_phase import AdvancePhaseTool
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from agentkit.llm.gateway import LLMGateway
|
||||||
|
from agentkit.server.config import ServerConfig
|
||||||
|
from agentkit.tools.base import Tool
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
router = APIRouter(prefix="/chat", tags=["chat"])
|
router = APIRouter(prefix="/chat", tags=["chat"])
|
||||||
|
|
@ -536,12 +541,12 @@ def _message_to_response(msg) -> MessageResponse:
|
||||||
|
|
||||||
def _build_phase_engine(
|
def _build_phase_engine(
|
||||||
*,
|
*,
|
||||||
server_config: Any,
|
server_config: ServerConfig | None,
|
||||||
llm_gateway: Any,
|
llm_gateway: LLMGateway,
|
||||||
execution_mode: ExecutionMode,
|
execution_mode: ExecutionMode,
|
||||||
base_tools: list,
|
base_tools: list[Tool],
|
||||||
session_id: str = "",
|
session_id: str = "",
|
||||||
) -> tuple[ReActEngine | None, list | None, str | None]:
|
) -> tuple[ReActEngine | None, list[Tool] | None, str | None]:
|
||||||
"""Build a PLAN_EXEC engine with PhasePolicy + AdvancePhaseTool.
|
"""Build a PLAN_EXEC engine with PhasePolicy + AdvancePhaseTool.
|
||||||
|
|
||||||
Encapsulates the WS path's phase_policy construction so the REST path
|
Encapsulates the WS path's phase_policy construction so the REST path
|
||||||
|
|
|
||||||
|
|
@ -322,6 +322,8 @@ class TestPlanExecE2EEdgeCases:
|
||||||
events.append(ev)
|
events.append(ev)
|
||||||
|
|
||||||
# Engine should have transitioned out of PLANNING (auto-advance fired).
|
# Engine should have transitioned out of PLANNING (auto-advance fired).
|
||||||
|
# Weak assertion: auto_advance_after_steps=2 may fire multiple times
|
||||||
|
# (PLANNING→BUILDING→VERIFICATION), so we only assert it left PLANNING.
|
||||||
assert engine.current_phase != PhaseState.PLANNING
|
assert engine.current_phase != PhaseState.PLANNING
|
||||||
# All 3 search calls dispatched (search is allowed in both PLANNING and BUILDING).
|
# All 3 search calls dispatched (search is allowed in both PLANNING and BUILDING).
|
||||||
assert search.call_count == 3
|
assert search.call_count == 3
|
||||||
|
|
|
||||||
|
|
@ -235,6 +235,7 @@ class TestRestPlanExec:
|
||||||
json={"content": "Hello"},
|
json={"content": "Hello"},
|
||||||
)
|
)
|
||||||
# 500 is acceptable (mock gateway), but it must NOT be the PLAN_EXEC error.
|
# 500 is acceptable (mock gateway), but it must NOT be the PLAN_EXEC error.
|
||||||
|
assert msg_resp.status_code != 501, "REACT fallback should not return 501"
|
||||||
if msg_resp.status_code == 500:
|
if msg_resp.status_code == 500:
|
||||||
assert "phase policy error" not in msg_resp.json().get("detail", "")
|
assert "phase policy error" not in msg_resp.json().get("detail", "")
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue