feat(agent): Wave 3 strategic coupling (G5/G6) #6

Merged
fischer merged 7 commits from feat/agent-wave3-strategic into main 2026-06-30 09:17:20 +08:00
7 changed files with 39 additions and 45 deletions
Showing only changes of commit f50d3485ea - Show all commits

View File

@ -62,8 +62,8 @@ fallback_chain:
# auto_advance_after_steps: 5 # optional, default = manual (LLM calls advance_phase)
# start_phase: planning # optional, default = planning
# whitelist_override: # optional, merges with default (override wins)
# planning: [search, read_file, bash]
# building: [write_file, bash, read_file]
# planning: [search, read_file, shell]
# building: [write_file, shell, read_file]
session: {backend: memory}
bus: {backend: memory}
task_store: {backend: memory}

View File

@ -14,7 +14,7 @@ from __future__ import annotations
import enum
import logging
import re
from dataclasses import dataclass, field
from dataclasses import dataclass, field, replace
from typing import Any
logger = logging.getLogger(__name__)
@ -127,9 +127,9 @@ def default_policy() -> PhasePolicy:
"""Return the KTD5 default PhasePolicy.
Whitelist (R24):
- PLANNING: search, tool_search, read_file, bash (read-only)
- BUILDING: write_file, bash (full), read_file, search
- VERIFICATION: bash (test commands), read_file, search
- PLANNING: search, tool_search, read_file, shell (read-only)
- BUILDING: write_file, shell (full), read_file, search
- VERIFICATION: shell (test commands), read_file, search
- DELIVERY: all tools (wildcard)
Bash filter:
@ -139,11 +139,14 @@ def default_policy() -> PhasePolicy:
"""
return PhasePolicy(
whitelist={
PhaseState.PLANNING: frozenset({"search", "tool_search", "read_file", "bash"}),
# Tool name is "shell" (ShellTool default); bash_command_filter
# gates on the same name. Using "bash" here would make the filter
# dead code and block the LLM from shell access.
PhaseState.PLANNING: frozenset({"search", "tool_search", "read_file", "shell"}),
PhaseState.BUILDING: frozenset(
{"write_file", "bash", "read_file", "search", "tool_search"}
{"write_file", "shell", "read_file", "search", "tool_search"}
),
PhaseState.VERIFICATION: frozenset({"bash", "read_file", "search"}),
PhaseState.VERIFICATION: frozenset({"shell", "read_file", "search"}),
PhaseState.DELIVERY: frozenset({WILDCARD}),
},
bash_command_filter={
@ -181,21 +184,11 @@ def policy_from_config(config: dict[str, Any]) -> PhasePolicy | None:
# Start phase
start_phase_str = config.get("start_phase")
if start_phase_str:
policy = PhasePolicy(
whitelist=policy.whitelist,
bash_command_filter=policy.bash_command_filter,
auto_advance_after_steps=config.get("auto_advance_after_steps"),
start_phase=PhaseState.from_string(start_phase_str),
)
policy = replace(policy, start_phase=PhaseState.from_string(start_phase_str))
# Auto-advance override
if "auto_advance_after_steps" in config:
policy = PhasePolicy(
whitelist=policy.whitelist,
bash_command_filter=policy.bash_command_filter,
auto_advance_after_steps=config["auto_advance_after_steps"],
start_phase=policy.start_phase,
)
policy = replace(policy, auto_advance_after_steps=config["auto_advance_after_steps"])
# Whitelist override — merge with default (override wins on conflict)
override = config.get("whitelist_override") or {}
@ -208,11 +201,6 @@ def policy_from_config(config: dict[str, Any]) -> PhasePolicy | None:
f"whitelist_override[{phase_name!r}] must be a list, got {type(tools).__name__}"
)
new_whitelist[phase] = frozenset(str(t) for t in tools)
policy = PhasePolicy(
whitelist=new_whitelist,
bash_command_filter=policy.bash_command_filter,
auto_advance_after_steps=policy.auto_advance_after_steps,
start_phase=policy.start_phase,
)
policy = replace(policy, whitelist=new_whitelist)
return policy

View File

@ -313,8 +313,8 @@ class ReActEngine:
"tool": tool_name,
"is_error": True,
}
# Bash command filter (only applies to bash tool).
if tool_name == "bash":
# Bash command filter (only applies to shell tool — registered as "shell").
if tool_name == "shell":
command = str(arguments.get("command", ""))
if not self._phase_policy.is_bash_command_allowed(command, self._current_phase):
return {

View File

@ -1118,7 +1118,12 @@ async def _handle_chat_message(
e,
)
await websocket.send_json(
{"type": "error", "data": {"message": f"phase policy error: {e}"}}
{
"type": "error",
# Truncate to 200 chars to match nearby error paths and
# avoid leaking config internals (see chat.py:1090, 1320).
"data": {"message": f"phase policy error: {str(e)[:200]}"},
}
)
return

View File

@ -19,6 +19,7 @@ from agentkit.tools.web_search import WebSearchTool
from agentkit.tools.builtin import RunTestsTool, ToolSearchTool
from agentkit.tools.search import ToolSearchIndex
from agentkit.tools.file_read import ReadFileTool
from agentkit.tools.advance_phase import AdvancePhaseTool
# Conditional import: HeadroomRetrieveTool requires HeadroomCompressor
try:

View File

@ -72,7 +72,7 @@ class TestDefaultPolicy:
allowed = policy.whitelist[PhaseState.PLANNING]
assert "search" in allowed
assert "read_file" in allowed
assert "bash" in allowed
assert "shell" in allowed
assert "tool_search" in allowed
# Planning must NOT allow write_file.
assert "write_file" not in allowed
@ -81,13 +81,13 @@ class TestDefaultPolicy:
policy = default_policy()
allowed = policy.whitelist[PhaseState.BUILDING]
assert "write_file" in allowed
assert "bash" in allowed
assert "shell" in allowed
assert "read_file" in allowed
def test_verification_whitelist_excludes_write(self):
policy = default_policy()
allowed = policy.whitelist[PhaseState.VERIFICATION]
assert "bash" in allowed
assert "shell" in allowed
assert "read_file" in allowed
assert "write_file" not in allowed
@ -148,7 +148,7 @@ class TestIsToolAllowed:
whitelist={
PhaseState.PLANNING: frozenset({"search"}),
PhaseState.BUILDING: frozenset({"write_file"}),
PhaseState.VERIFICATION: frozenset({"bash"}),
PhaseState.VERIFICATION: frozenset({"shell"}),
PhaseState.DELIVERY: frozenset({WILDCARD}),
}
)
@ -211,9 +211,9 @@ class TestPhasePolicyEdgeCases:
custom_filter = re.compile(r"\b(pip install|npm install)\b")
policy = PhasePolicy(
whitelist={
PhaseState.PLANNING: frozenset({"bash"}),
PhaseState.BUILDING: frozenset({"bash"}),
PhaseState.VERIFICATION: frozenset({"bash"}),
PhaseState.PLANNING: frozenset({"shell"}),
PhaseState.BUILDING: frozenset({"shell"}),
PhaseState.VERIFICATION: frozenset({"shell"}),
PhaseState.DELIVERY: frozenset({WILDCARD}),
},
bash_command_filter={PhaseState.BUILDING: custom_filter},
@ -261,15 +261,15 @@ class TestPolicyFromConfig:
{
"enabled": True,
"whitelist_override": {
"planning": ["search", "read_file"], # removes bash from default
"planning": ["search", "read_file"], # removes shell from default
},
}
)
assert policy is not None
# Override wins — bash should be removed from planning.
# Override wins — shell should be removed from planning.
assert policy.is_tool_allowed("search", PhaseState.PLANNING) is True
assert policy.is_tool_allowed("read_file", PhaseState.PLANNING) is True
assert policy.is_tool_allowed("bash", PhaseState.PLANNING) is False
assert policy.is_tool_allowed("shell", PhaseState.PLANNING) is False
# Other phases unchanged.
assert policy.is_tool_allowed("write_file", PhaseState.BUILDING) is True

View File

@ -167,20 +167,20 @@ class TestPhasePermission:
assert engine._check_phase_permission("literally_anything", {}) is None
def test_bash_command_filter_blocks_rm_in_planning(self, engine):
result = engine._check_phase_permission("bash", {"command": "rm -rf /tmp"})
result = engine._check_phase_permission("shell", {"command": "rm -rf /tmp"})
assert result is not None
assert result["error"] == "phase_violation"
assert "rm" in result["message"] or "Bash command" in result["message"]
def test_bash_command_filter_allows_safe_in_planning(self, engine):
# `ls` and `git status` are not blocked.
assert engine._check_phase_permission("bash", {"command": "ls -la"}) is None
assert engine._check_phase_permission("bash", {"command": "git status"}) is None
assert engine._check_phase_permission("shell", {"command": "ls -la"}) is None
assert engine._check_phase_permission("shell", {"command": "git status"}) is None
def test_bash_command_filter_no_restriction_in_building(self, engine):
engine.advance_phase() # → BUILDING
# `rm` is allowed in building phase.
assert engine._check_phase_permission("bash", {"command": "rm -rf build/"}) is None
assert engine._check_phase_permission("shell", {"command": "rm -rf build/"}) is None
# ---------------------------------------------------------------------------
@ -252,7 +252,7 @@ class TestAutoAdvance:
whitelist={
PhaseState.PLANNING: frozenset({"search"}),
PhaseState.BUILDING: frozenset({"write_file"}),
PhaseState.VERIFICATION: frozenset({"bash"}),
PhaseState.VERIFICATION: frozenset({"shell"}),
PhaseState.DELIVERY: frozenset({"*"}),
},
auto_advance_after_steps=2,