fix(review): Wave 3 code review fixes
P1: bash/shell tool name mismatch. PhasePolicy whitelist used "bash" but ShellTool registers as "shell". The bash_command_filter was dead code (never matched the real tool name). Fixed in phase.py whitelist, react.py filter check, agentkit.yaml config, and all tests. P1: AdvancePhaseTool missing import in tools/__init__.py. Was in __all__ but never imported. Added the import. P2: chat.py phase policy error message echoed verbatim to WS client. Truncated to 200 chars to match nearby error paths and avoid leaking config internals. P2: policy_from_config rebuilt PhasePolicy 3x via full-field copy. Replaced with dataclasses.replace() so new PhasePolicy fields are not silently dropped in future reconstructions. ce-code-review (mode:agent) step of LFG pipeline.
This commit is contained in:
parent
ce6eb004a0
commit
92fc38de7e
|
|
@ -62,8 +62,8 @@ fallback_chain:
|
||||||
# auto_advance_after_steps: 5 # optional, default = manual (LLM calls advance_phase)
|
# auto_advance_after_steps: 5 # optional, default = manual (LLM calls advance_phase)
|
||||||
# start_phase: planning # optional, default = planning
|
# start_phase: planning # optional, default = planning
|
||||||
# whitelist_override: # optional, merges with default (override wins)
|
# whitelist_override: # optional, merges with default (override wins)
|
||||||
# planning: [search, read_file, bash]
|
# planning: [search, read_file, shell]
|
||||||
# building: [write_file, bash, read_file]
|
# building: [write_file, shell, read_file]
|
||||||
session: {backend: memory}
|
session: {backend: memory}
|
||||||
bus: {backend: memory}
|
bus: {backend: memory}
|
||||||
task_store: {backend: memory}
|
task_store: {backend: memory}
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ from __future__ import annotations
|
||||||
import enum
|
import enum
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field, replace
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -127,9 +127,9 @@ def default_policy() -> PhasePolicy:
|
||||||
"""Return the KTD5 default PhasePolicy.
|
"""Return the KTD5 default PhasePolicy.
|
||||||
|
|
||||||
Whitelist (R24):
|
Whitelist (R24):
|
||||||
- PLANNING: search, tool_search, read_file, bash (read-only)
|
- PLANNING: search, tool_search, read_file, shell (read-only)
|
||||||
- BUILDING: write_file, bash (full), read_file, search
|
- BUILDING: write_file, shell (full), read_file, search
|
||||||
- VERIFICATION: bash (test commands), read_file, search
|
- VERIFICATION: shell (test commands), read_file, search
|
||||||
- DELIVERY: all tools (wildcard)
|
- DELIVERY: all tools (wildcard)
|
||||||
|
|
||||||
Bash filter:
|
Bash filter:
|
||||||
|
|
@ -139,11 +139,14 @@ def default_policy() -> PhasePolicy:
|
||||||
"""
|
"""
|
||||||
return PhasePolicy(
|
return PhasePolicy(
|
||||||
whitelist={
|
whitelist={
|
||||||
PhaseState.PLANNING: frozenset({"search", "tool_search", "read_file", "bash"}),
|
# Tool name is "shell" (ShellTool default); bash_command_filter
|
||||||
|
# gates on the same name. Using "bash" here would make the filter
|
||||||
|
# dead code and block the LLM from shell access.
|
||||||
|
PhaseState.PLANNING: frozenset({"search", "tool_search", "read_file", "shell"}),
|
||||||
PhaseState.BUILDING: frozenset(
|
PhaseState.BUILDING: frozenset(
|
||||||
{"write_file", "bash", "read_file", "search", "tool_search"}
|
{"write_file", "shell", "read_file", "search", "tool_search"}
|
||||||
),
|
),
|
||||||
PhaseState.VERIFICATION: frozenset({"bash", "read_file", "search"}),
|
PhaseState.VERIFICATION: frozenset({"shell", "read_file", "search"}),
|
||||||
PhaseState.DELIVERY: frozenset({WILDCARD}),
|
PhaseState.DELIVERY: frozenset({WILDCARD}),
|
||||||
},
|
},
|
||||||
bash_command_filter={
|
bash_command_filter={
|
||||||
|
|
@ -181,21 +184,11 @@ def policy_from_config(config: dict[str, Any]) -> PhasePolicy | None:
|
||||||
# Start phase
|
# Start phase
|
||||||
start_phase_str = config.get("start_phase")
|
start_phase_str = config.get("start_phase")
|
||||||
if start_phase_str:
|
if start_phase_str:
|
||||||
policy = PhasePolicy(
|
policy = replace(policy, start_phase=PhaseState.from_string(start_phase_str))
|
||||||
whitelist=policy.whitelist,
|
|
||||||
bash_command_filter=policy.bash_command_filter,
|
|
||||||
auto_advance_after_steps=config.get("auto_advance_after_steps"),
|
|
||||||
start_phase=PhaseState.from_string(start_phase_str),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Auto-advance override
|
# Auto-advance override
|
||||||
if "auto_advance_after_steps" in config:
|
if "auto_advance_after_steps" in config:
|
||||||
policy = PhasePolicy(
|
policy = replace(policy, auto_advance_after_steps=config["auto_advance_after_steps"])
|
||||||
whitelist=policy.whitelist,
|
|
||||||
bash_command_filter=policy.bash_command_filter,
|
|
||||||
auto_advance_after_steps=config["auto_advance_after_steps"],
|
|
||||||
start_phase=policy.start_phase,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Whitelist override — merge with default (override wins on conflict)
|
# Whitelist override — merge with default (override wins on conflict)
|
||||||
override = config.get("whitelist_override") or {}
|
override = config.get("whitelist_override") or {}
|
||||||
|
|
@ -208,11 +201,6 @@ def policy_from_config(config: dict[str, Any]) -> PhasePolicy | None:
|
||||||
f"whitelist_override[{phase_name!r}] must be a list, got {type(tools).__name__}"
|
f"whitelist_override[{phase_name!r}] must be a list, got {type(tools).__name__}"
|
||||||
)
|
)
|
||||||
new_whitelist[phase] = frozenset(str(t) for t in tools)
|
new_whitelist[phase] = frozenset(str(t) for t in tools)
|
||||||
policy = PhasePolicy(
|
policy = replace(policy, whitelist=new_whitelist)
|
||||||
whitelist=new_whitelist,
|
|
||||||
bash_command_filter=policy.bash_command_filter,
|
|
||||||
auto_advance_after_steps=policy.auto_advance_after_steps,
|
|
||||||
start_phase=policy.start_phase,
|
|
||||||
)
|
|
||||||
|
|
||||||
return policy
|
return policy
|
||||||
|
|
|
||||||
|
|
@ -313,8 +313,8 @@ class ReActEngine:
|
||||||
"tool": tool_name,
|
"tool": tool_name,
|
||||||
"is_error": True,
|
"is_error": True,
|
||||||
}
|
}
|
||||||
# Bash command filter (only applies to bash tool).
|
# Bash command filter (only applies to shell tool — registered as "shell").
|
||||||
if tool_name == "bash":
|
if tool_name == "shell":
|
||||||
command = str(arguments.get("command", ""))
|
command = str(arguments.get("command", ""))
|
||||||
if not self._phase_policy.is_bash_command_allowed(command, self._current_phase):
|
if not self._phase_policy.is_bash_command_allowed(command, self._current_phase):
|
||||||
return {
|
return {
|
||||||
|
|
|
||||||
|
|
@ -1118,7 +1118,12 @@ async def _handle_chat_message(
|
||||||
e,
|
e,
|
||||||
)
|
)
|
||||||
await websocket.send_json(
|
await websocket.send_json(
|
||||||
{"type": "error", "data": {"message": f"phase policy error: {e}"}}
|
{
|
||||||
|
"type": "error",
|
||||||
|
# Truncate to 200 chars to match nearby error paths and
|
||||||
|
# avoid leaking config internals (see chat.py:1090, 1320).
|
||||||
|
"data": {"message": f"phase policy error: {str(e)[:200]}"},
|
||||||
|
}
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,7 @@ from agentkit.tools.web_search import WebSearchTool
|
||||||
from agentkit.tools.builtin import RunTestsTool, ToolSearchTool
|
from agentkit.tools.builtin import RunTestsTool, ToolSearchTool
|
||||||
from agentkit.tools.search import ToolSearchIndex
|
from agentkit.tools.search import ToolSearchIndex
|
||||||
from agentkit.tools.file_read import ReadFileTool
|
from agentkit.tools.file_read import ReadFileTool
|
||||||
|
from agentkit.tools.advance_phase import AdvancePhaseTool
|
||||||
|
|
||||||
# Conditional import: HeadroomRetrieveTool requires HeadroomCompressor
|
# Conditional import: HeadroomRetrieveTool requires HeadroomCompressor
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -72,7 +72,7 @@ class TestDefaultPolicy:
|
||||||
allowed = policy.whitelist[PhaseState.PLANNING]
|
allowed = policy.whitelist[PhaseState.PLANNING]
|
||||||
assert "search" in allowed
|
assert "search" in allowed
|
||||||
assert "read_file" in allowed
|
assert "read_file" in allowed
|
||||||
assert "bash" in allowed
|
assert "shell" in allowed
|
||||||
assert "tool_search" in allowed
|
assert "tool_search" in allowed
|
||||||
# Planning must NOT allow write_file.
|
# Planning must NOT allow write_file.
|
||||||
assert "write_file" not in allowed
|
assert "write_file" not in allowed
|
||||||
|
|
@ -81,13 +81,13 @@ class TestDefaultPolicy:
|
||||||
policy = default_policy()
|
policy = default_policy()
|
||||||
allowed = policy.whitelist[PhaseState.BUILDING]
|
allowed = policy.whitelist[PhaseState.BUILDING]
|
||||||
assert "write_file" in allowed
|
assert "write_file" in allowed
|
||||||
assert "bash" in allowed
|
assert "shell" in allowed
|
||||||
assert "read_file" in allowed
|
assert "read_file" in allowed
|
||||||
|
|
||||||
def test_verification_whitelist_excludes_write(self):
|
def test_verification_whitelist_excludes_write(self):
|
||||||
policy = default_policy()
|
policy = default_policy()
|
||||||
allowed = policy.whitelist[PhaseState.VERIFICATION]
|
allowed = policy.whitelist[PhaseState.VERIFICATION]
|
||||||
assert "bash" in allowed
|
assert "shell" in allowed
|
||||||
assert "read_file" in allowed
|
assert "read_file" in allowed
|
||||||
assert "write_file" not in allowed
|
assert "write_file" not in allowed
|
||||||
|
|
||||||
|
|
@ -148,7 +148,7 @@ class TestIsToolAllowed:
|
||||||
whitelist={
|
whitelist={
|
||||||
PhaseState.PLANNING: frozenset({"search"}),
|
PhaseState.PLANNING: frozenset({"search"}),
|
||||||
PhaseState.BUILDING: frozenset({"write_file"}),
|
PhaseState.BUILDING: frozenset({"write_file"}),
|
||||||
PhaseState.VERIFICATION: frozenset({"bash"}),
|
PhaseState.VERIFICATION: frozenset({"shell"}),
|
||||||
PhaseState.DELIVERY: frozenset({WILDCARD}),
|
PhaseState.DELIVERY: frozenset({WILDCARD}),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
@ -211,9 +211,9 @@ class TestPhasePolicyEdgeCases:
|
||||||
custom_filter = re.compile(r"\b(pip install|npm install)\b")
|
custom_filter = re.compile(r"\b(pip install|npm install)\b")
|
||||||
policy = PhasePolicy(
|
policy = PhasePolicy(
|
||||||
whitelist={
|
whitelist={
|
||||||
PhaseState.PLANNING: frozenset({"bash"}),
|
PhaseState.PLANNING: frozenset({"shell"}),
|
||||||
PhaseState.BUILDING: frozenset({"bash"}),
|
PhaseState.BUILDING: frozenset({"shell"}),
|
||||||
PhaseState.VERIFICATION: frozenset({"bash"}),
|
PhaseState.VERIFICATION: frozenset({"shell"}),
|
||||||
PhaseState.DELIVERY: frozenset({WILDCARD}),
|
PhaseState.DELIVERY: frozenset({WILDCARD}),
|
||||||
},
|
},
|
||||||
bash_command_filter={PhaseState.BUILDING: custom_filter},
|
bash_command_filter={PhaseState.BUILDING: custom_filter},
|
||||||
|
|
@ -261,15 +261,15 @@ class TestPolicyFromConfig:
|
||||||
{
|
{
|
||||||
"enabled": True,
|
"enabled": True,
|
||||||
"whitelist_override": {
|
"whitelist_override": {
|
||||||
"planning": ["search", "read_file"], # removes bash from default
|
"planning": ["search", "read_file"], # removes shell from default
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
assert policy is not None
|
assert policy is not None
|
||||||
# Override wins — bash should be removed from planning.
|
# Override wins — shell should be removed from planning.
|
||||||
assert policy.is_tool_allowed("search", PhaseState.PLANNING) is True
|
assert policy.is_tool_allowed("search", PhaseState.PLANNING) is True
|
||||||
assert policy.is_tool_allowed("read_file", PhaseState.PLANNING) is True
|
assert policy.is_tool_allowed("read_file", PhaseState.PLANNING) is True
|
||||||
assert policy.is_tool_allowed("bash", PhaseState.PLANNING) is False
|
assert policy.is_tool_allowed("shell", PhaseState.PLANNING) is False
|
||||||
# Other phases unchanged.
|
# Other phases unchanged.
|
||||||
assert policy.is_tool_allowed("write_file", PhaseState.BUILDING) is True
|
assert policy.is_tool_allowed("write_file", PhaseState.BUILDING) is True
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -167,20 +167,20 @@ class TestPhasePermission:
|
||||||
assert engine._check_phase_permission("literally_anything", {}) is None
|
assert engine._check_phase_permission("literally_anything", {}) is None
|
||||||
|
|
||||||
def test_bash_command_filter_blocks_rm_in_planning(self, engine):
|
def test_bash_command_filter_blocks_rm_in_planning(self, engine):
|
||||||
result = engine._check_phase_permission("bash", {"command": "rm -rf /tmp"})
|
result = engine._check_phase_permission("shell", {"command": "rm -rf /tmp"})
|
||||||
assert result is not None
|
assert result is not None
|
||||||
assert result["error"] == "phase_violation"
|
assert result["error"] == "phase_violation"
|
||||||
assert "rm" in result["message"] or "Bash command" in result["message"]
|
assert "rm" in result["message"] or "Bash command" in result["message"]
|
||||||
|
|
||||||
def test_bash_command_filter_allows_safe_in_planning(self, engine):
|
def test_bash_command_filter_allows_safe_in_planning(self, engine):
|
||||||
# `ls` and `git status` are not blocked.
|
# `ls` and `git status` are not blocked.
|
||||||
assert engine._check_phase_permission("bash", {"command": "ls -la"}) is None
|
assert engine._check_phase_permission("shell", {"command": "ls -la"}) is None
|
||||||
assert engine._check_phase_permission("bash", {"command": "git status"}) is None
|
assert engine._check_phase_permission("shell", {"command": "git status"}) is None
|
||||||
|
|
||||||
def test_bash_command_filter_no_restriction_in_building(self, engine):
|
def test_bash_command_filter_no_restriction_in_building(self, engine):
|
||||||
engine.advance_phase() # → BUILDING
|
engine.advance_phase() # → BUILDING
|
||||||
# `rm` is allowed in building phase.
|
# `rm` is allowed in building phase.
|
||||||
assert engine._check_phase_permission("bash", {"command": "rm -rf build/"}) is None
|
assert engine._check_phase_permission("shell", {"command": "rm -rf build/"}) is None
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -252,7 +252,7 @@ class TestAutoAdvance:
|
||||||
whitelist={
|
whitelist={
|
||||||
PhaseState.PLANNING: frozenset({"search"}),
|
PhaseState.PLANNING: frozenset({"search"}),
|
||||||
PhaseState.BUILDING: frozenset({"write_file"}),
|
PhaseState.BUILDING: frozenset({"write_file"}),
|
||||||
PhaseState.VERIFICATION: frozenset({"bash"}),
|
PhaseState.VERIFICATION: frozenset({"shell"}),
|
||||||
PhaseState.DELIVERY: frozenset({"*"}),
|
PhaseState.DELIVERY: frozenset({"*"}),
|
||||||
},
|
},
|
||||||
auto_advance_after_steps=2,
|
auto_advance_after_steps=2,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue