fischer-agentkit/tests/unit/test_react_phase_enforcemen...

340 lines
14 KiB
Python

"""Unit tests for ReActEngine phase enforcement (G6 wiring, R24).
Per plan U3 Execution note: characterization-first — verify that
`ReActEngine(phase_policy=None)` behaves identically to pre-change (no
enforcement, no advance_phase tool, no _current_phase mutation). Then add
enforcement tests.
"""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock
import pytest
from agentkit.core.phase import PhasePolicy, PhaseState, default_policy
from agentkit.core.react import ReActEngine
from agentkit.tools.advance_phase import AdvancePhaseTool
# ---------------------------------------------------------------------------
# Characterization — phase_policy=None preserves existing behavior
# ---------------------------------------------------------------------------
class TestCharacterizationNoPolicy:
"""When phase_policy=None, no enforcement happens and behavior matches
pre-Wave-3."""
def test_init_without_phase_policy(self):
# Minimal stub LLM gateway — we're only testing constructor.
gateway = MagicMock()
engine = ReActEngine(llm_gateway=gateway)
assert engine._phase_policy is None
assert engine._current_phase is None
assert engine._steps_in_phase == 0
assert engine.current_phase is None
@pytest.mark.asyncio
async def test_execute_tool_dispatches_without_phase_check(self):
"""Tool dispatch proceeds normally when no policy set."""
gateway = MagicMock()
engine = ReActEngine(llm_gateway=gateway)
# MagicMock.name is a special attribute used internally by Mock for
# repr — setting it post-construction does not make mock.name == "x"
# hold. Patch _find_tool directly to bypass the name lookup.
fake_tool = MagicMock()
fake_tool.safe_execute = AsyncMock(return_value={"output": "ok"})
fake_tool.input_schema = None
engine._find_tool = lambda name, tools: fake_tool
result = await engine._execute_tool("any_tool", {"x": 1}, [fake_tool])
assert result == {"output": "ok"}
fake_tool.safe_execute.assert_awaited_once_with(x=1)
@pytest.mark.asyncio
async def test_advance_phase_returns_none_without_policy(self):
gateway = MagicMock()
engine = ReActEngine(llm_gateway=gateway)
assert engine.advance_phase() is None
def test_reset_does_not_touch_phase_state_when_no_policy(self):
gateway = MagicMock()
engine = ReActEngine(llm_gateway=gateway)
engine.reset()
assert engine._current_phase is None
# ---------------------------------------------------------------------------
# Initialization with phase_policy
# ---------------------------------------------------------------------------
class TestPhasePolicyInitialization:
def test_phase_policy_set_initializes_current_phase(self):
gateway = MagicMock()
engine = ReActEngine(
llm_gateway=gateway,
phase_policy=default_policy(),
)
assert engine._phase_policy is not None
assert engine._current_phase == PhaseState.PLANNING
assert engine._steps_in_phase == 0
def test_reset_resets_phase_to_start(self):
gateway = MagicMock()
engine = ReActEngine(
llm_gateway=gateway,
phase_policy=default_policy(),
)
# Manually move phase forward (simulating execute progress).
engine.advance_phase() # PLANNING → BUILDING
assert engine._current_phase == PhaseState.BUILDING
engine._steps_in_phase = 5
engine.reset()
assert engine._current_phase == PhaseState.PLANNING
assert engine._steps_in_phase == 0
# ---------------------------------------------------------------------------
# advance_phase() transitions
# ---------------------------------------------------------------------------
class TestAdvancePhase:
@pytest.fixture
def engine(self):
return ReActEngine(llm_gateway=MagicMock(), phase_policy=default_policy())
def test_planning_to_building(self, engine):
new_phase = engine.advance_phase()
assert new_phase == PhaseState.BUILDING
assert engine.current_phase == PhaseState.BUILDING
assert engine._steps_in_phase == 0 # counter reset on transition
def test_building_to_verification(self, engine):
engine.advance_phase() # → BUILDING
new_phase = engine.advance_phase()
assert new_phase == PhaseState.VERIFICATION
assert engine.current_phase == PhaseState.VERIFICATION
def test_verification_to_delivery(self, engine):
engine.advance_phase() # → BUILDING
engine.advance_phase() # → VERIFICATION
new_phase = engine.advance_phase()
assert new_phase == PhaseState.DELIVERY
assert engine.current_phase == PhaseState.DELIVERY
def test_delivery_returns_none(self, engine):
engine.advance_phase() # → BUILDING
engine.advance_phase() # → VERIFICATION
engine.advance_phase() # → DELIVERY
result = engine.advance_phase()
assert result is None
assert engine.current_phase == PhaseState.DELIVERY
# ---------------------------------------------------------------------------
# _check_phase_permission — whitelist enforcement
# ---------------------------------------------------------------------------
class TestPhasePermission:
@pytest.fixture
def engine(self):
return ReActEngine(llm_gateway=MagicMock(), phase_policy=default_policy())
def test_search_allowed_in_planning(self, engine):
assert engine._check_phase_permission("search", {}) is None
def test_write_file_blocked_in_planning(self, engine):
result = engine._check_phase_permission("write_file", {})
assert result is not None
assert result["error"] == "phase_violation"
assert "write_file" in result["message"]
assert result["current_phase"] == "planning"
def test_write_file_allowed_in_building(self, engine):
engine.advance_phase() # → BUILDING
assert engine._check_phase_permission("write_file", {}) is None
def test_any_tool_allowed_in_delivery(self, engine):
engine.advance_phase() # → BUILDING
engine.advance_phase() # → VERIFICATION
engine.advance_phase() # → DELIVERY
assert engine._check_phase_permission("literally_anything", {}) is None
def test_bash_command_filter_blocks_rm_in_planning(self, engine):
result = engine._check_phase_permission("shell", {"command": "rm -rf /tmp"})
assert result is not None
assert result["error"] == "phase_violation"
assert "rm" in result["message"] or "Bash command" in result["message"]
def test_bash_command_filter_allows_safe_in_planning(self, engine):
# `ls` and `git status` are not blocked.
assert engine._check_phase_permission("shell", {"command": "ls -la"}) is None
assert engine._check_phase_permission("shell", {"command": "git status"}) is None
def test_bash_command_filter_no_restriction_in_building(self, engine):
engine.advance_phase() # → BUILDING
# `rm` is allowed in building phase.
assert engine._check_phase_permission("shell", {"command": "rm -rf build/"}) is None
# ---------------------------------------------------------------------------
# _execute_tool integration — phase enforcement actually blocks dispatch
# ---------------------------------------------------------------------------
class TestExecuteToolPhaseEnforcement:
@pytest.fixture
def engine_with_tools(self):
engine = ReActEngine(llm_gateway=MagicMock(), phase_policy=default_policy())
# Two fake tools: one allowed in PLANNING (search), one not (write_file).
# MagicMock.name can't be set post-construction (special attribute),
# so we patch _find_tool with a dict-based lookup.
search_tool = MagicMock()
search_tool.input_schema = None
search_tool.safe_execute = AsyncMock(return_value={"results": []})
write_tool = MagicMock()
write_tool.input_schema = None
write_tool.safe_execute = AsyncMock(return_value={"written": True})
tools_by_name = {"search": search_tool, "write_file": write_tool}
engine._find_tool = lambda name, tools: tools_by_name.get(name)
return engine, [search_tool, write_tool]
@pytest.mark.asyncio
async def test_blocked_tool_returns_phase_violation_and_skips_dispatch(self, engine_with_tools):
engine, tools = engine_with_tools
# write_file in PLANNING should be blocked — write_tool.safe_execute
# should NEVER be called.
result = await engine._execute_tool("write_file", {"path": "/x"}, tools)
assert result["error"] == "phase_violation"
assert result["current_phase"] == "planning"
write_tool = tools[1]
write_tool.safe_execute.assert_not_called()
@pytest.mark.asyncio
async def test_allowed_tool_dispatches_normally(self, engine_with_tools):
engine, tools = engine_with_tools
result = await engine._execute_tool("search", {"query": "foo"}, tools)
assert result == {"results": []}
search_tool = tools[0]
search_tool.safe_execute.assert_awaited_once_with(query="foo")
@pytest.mark.asyncio
async def test_after_advance_phase_blocked_tool_now_dispatches(self, engine_with_tools):
engine, tools = engine_with_tools
# First: write_file blocked in PLANNING.
result = await engine._execute_tool("write_file", {"path": "/x"}, tools)
assert result["error"] == "phase_violation"
# Advance to BUILDING.
engine.advance_phase()
# Now: write_file allowed.
result = await engine._execute_tool("write_file", {"path": "/x"}, tools)
assert result == {"written": True}
# ---------------------------------------------------------------------------
# Auto-advance safety net (KTD6)
# ---------------------------------------------------------------------------
class TestAutoAdvance:
def test_auto_advance_after_threshold(self):
# Custom policy with auto-advance after 2 steps.
policy = PhasePolicy(
whitelist={
PhaseState.PLANNING: frozenset({"search"}),
PhaseState.BUILDING: frozenset({"write_file"}),
PhaseState.VERIFICATION: frozenset({"shell"}),
PhaseState.DELIVERY: frozenset({"*"}),
},
auto_advance_after_steps=2,
)
engine = ReActEngine(llm_gateway=MagicMock(), phase_policy=policy)
assert engine.current_phase == PhaseState.PLANNING
# Step 1: counter goes to 1, no advance yet.
engine._steps_in_phase += 1
assert engine._maybe_auto_advance() is False
assert engine.current_phase == PhaseState.PLANNING
# Step 2: counter hits 2, advance triggered.
engine._steps_in_phase += 1
assert engine._maybe_auto_advance() is True
assert engine.current_phase == PhaseState.BUILDING
assert engine._steps_in_phase == 0 # reset on advance
def test_auto_advance_none_default(self):
# default_policy has auto_advance_after_steps=None — no auto-advance.
engine = ReActEngine(llm_gateway=MagicMock(), phase_policy=default_policy())
engine._steps_in_phase = 100
assert engine._maybe_auto_advance() is False
assert engine.current_phase == PhaseState.PLANNING
# ---------------------------------------------------------------------------
# AdvancePhaseTool integration
# ---------------------------------------------------------------------------
class TestAdvancePhaseTool:
@pytest.mark.asyncio
async def test_advance_phase_tool_transitions_engine(self):
engine = ReActEngine(llm_gateway=MagicMock(), phase_policy=default_policy())
tool = AdvancePhaseTool(engine=engine)
result = await tool.execute()
assert result["is_error"] is False
assert result["current_phase"] == "building"
assert engine.current_phase == PhaseState.BUILDING
@pytest.mark.asyncio
async def test_advance_phase_tool_at_delivery_returns_error(self):
engine = ReActEngine(llm_gateway=MagicMock(), phase_policy=default_policy())
# Walk through all phases.
engine.advance_phase() # PLANNING → BUILDING
engine.advance_phase() # BUILDING → VERIFICATION
engine.advance_phase() # VERIFICATION → DELIVERY
tool = AdvancePhaseTool(engine=engine)
result = await tool.execute()
assert result["is_error"] is True
assert result["error"] == "already_at_final_phase"
assert result["current_phase"] == "delivery"
@pytest.mark.asyncio
async def test_advance_phase_tool_without_policy_returns_error(self):
engine = ReActEngine(llm_gateway=MagicMock()) # no policy
tool = AdvancePhaseTool(engine=engine)
result = await tool.execute()
assert result["is_error"] is True
assert result["error"] == "no_phase_policy"
def test_tool_schema_accepts_no_arguments(self):
engine = ReActEngine(llm_gateway=MagicMock(), phase_policy=default_policy())
tool = AdvancePhaseTool(engine=engine)
# input_schema has empty properties + additionalProperties:false —
# no arguments expected.
assert tool.input_schema["properties"] == {}
assert tool.input_schema["additionalProperties"] is False
def test_tool_bypasses_phase_check(self):
"""`advance_phase` is the LLM's escape hatch — must never be blocked."""
# _check_phase_permission should NOT block advance_phase even in PLANNING.
# The bypass is implemented in _execute_tool by name check.
# We verify the bypass indirectly: tool dispatches normally even in
# PLANNING (where only search/read_file/bash/tool_search are allowed).
# advance_phase is not in the whitelist, but the name-based bypass
# in _execute_tool lets it through.
# (Direct unit test of the bypass would require mocking _find_tool.)
# Sanity: advance_phase is not in any whitelist.
for phase, allowed in default_policy().whitelist.items():
assert "advance_phase" not in allowed, (
f"advance_phase must not be in {phase.value} whitelist"
)