fischer-agentkit/tests/unit/test_react_phase_enforcemen...

"""Unit tests for ReActEngine phase enforcement (G6 wiring, R24).

Per plan U3 Execution note: characterization-first — verify that
`ReActEngine(phase_policy=None)` behaves identically to pre-change (no
enforcement, no advance_phase tool, no _current_phase mutation). Then add
enforcement tests.
"""

from __future__ import annotations

from unittest.mock import AsyncMock, MagicMock

import pytest

from agentkit.core.phase import PhasePolicy, PhaseState, default_policy
from agentkit.core.react import ReActEngine
from agentkit.tools.advance_phase import AdvancePhaseTool


# ---------------------------------------------------------------------------
# Characterization — phase_policy=None preserves existing behavior
# ---------------------------------------------------------------------------


class TestCharacterizationNoPolicy:
    """When phase_policy=None, no enforcement happens and behavior matches
    pre-Wave-3."""

    def test_init_without_phase_policy(self):
        # Minimal stub LLM gateway — we're only testing constructor.
        gateway = MagicMock()
        engine = ReActEngine(llm_gateway=gateway)
        assert engine._phase_policy is None
        assert engine._current_phase is None
        assert engine._steps_in_phase == 0
        assert engine.current_phase is None

    @pytest.mark.asyncio
    async def test_execute_tool_dispatches_without_phase_check(self):
        """Tool dispatch proceeds normally when no policy set."""
        gateway = MagicMock()
        engine = ReActEngine(llm_gateway=gateway)

        # MagicMock.name is a special attribute used internally by Mock for
        # repr — setting it post-construction does not make mock.name == "x"
        # hold. Patch _find_tool directly to bypass the name lookup.
        fake_tool = MagicMock()
        fake_tool.safe_execute = AsyncMock(return_value={"output": "ok"})
        fake_tool.input_schema = None
        engine._find_tool = lambda name, tools: fake_tool

        result = await engine._execute_tool("any_tool", {"x": 1}, [fake_tool])
        assert result == {"output": "ok"}
        fake_tool.safe_execute.assert_awaited_once_with(x=1)

    @pytest.mark.asyncio
    async def test_advance_phase_returns_none_without_policy(self):
        gateway = MagicMock()
        engine = ReActEngine(llm_gateway=gateway)
        assert engine.advance_phase() is None

    def test_reset_does_not_touch_phase_state_when_no_policy(self):
        gateway = MagicMock()
        engine = ReActEngine(llm_gateway=gateway)
        engine.reset()
        assert engine._current_phase is None


# ---------------------------------------------------------------------------
# Initialization with phase_policy
# ---------------------------------------------------------------------------


class TestPhasePolicyInitialization:
    def test_phase_policy_set_initializes_current_phase(self):
        gateway = MagicMock()
        engine = ReActEngine(
            llm_gateway=gateway,
            phase_policy=default_policy(),
        )
        assert engine._phase_policy is not None
        assert engine._current_phase == PhaseState.PLANNING
        assert engine._steps_in_phase == 0

    def test_reset_resets_phase_to_start(self):
        gateway = MagicMock()
        engine = ReActEngine(
            llm_gateway=gateway,
            phase_policy=default_policy(),
        )
        # Manually move phase forward (simulating execute progress).
        engine.advance_phase()  # PLANNING → BUILDING
        assert engine._current_phase == PhaseState.BUILDING
        engine._steps_in_phase = 5

        engine.reset()
        assert engine._current_phase == PhaseState.PLANNING
        assert engine._steps_in_phase == 0


# ---------------------------------------------------------------------------
# advance_phase() transitions
# ---------------------------------------------------------------------------


class TestAdvancePhase:
    @pytest.fixture
    def engine(self):
        return ReActEngine(llm_gateway=MagicMock(), phase_policy=default_policy())

    def test_planning_to_building(self, engine):
        new_phase = engine.advance_phase()
        assert new_phase == PhaseState.BUILDING
        assert engine.current_phase == PhaseState.BUILDING
        assert engine._steps_in_phase == 0  # counter reset on transition

    def test_building_to_verification(self, engine):
        engine.advance_phase()  # → BUILDING
        new_phase = engine.advance_phase()
        assert new_phase == PhaseState.VERIFICATION
        assert engine.current_phase == PhaseState.VERIFICATION

    def test_verification_to_delivery(self, engine):
        engine.advance_phase()  # → BUILDING
        engine.advance_phase()  # → VERIFICATION
        new_phase = engine.advance_phase()
        assert new_phase == PhaseState.DELIVERY
        assert engine.current_phase == PhaseState.DELIVERY

    def test_delivery_returns_none(self, engine):
        engine.advance_phase()  # → BUILDING
        engine.advance_phase()  # → VERIFICATION
        engine.advance_phase()  # → DELIVERY
        result = engine.advance_phase()
        assert result is None
        assert engine.current_phase == PhaseState.DELIVERY


# ---------------------------------------------------------------------------
# _check_phase_permission — whitelist enforcement
# ---------------------------------------------------------------------------


class TestPhasePermission:
    @pytest.fixture
    def engine(self):
        return ReActEngine(llm_gateway=MagicMock(), phase_policy=default_policy())

    def test_search_allowed_in_planning(self, engine):
        assert engine._check_phase_permission("search", {}) is None

    def test_write_file_blocked_in_planning(self, engine):
        result = engine._check_phase_permission("write_file", {})
        assert result is not None
        assert result["error"] == "phase_violation"
        assert "write_file" in result["message"]
        assert result["current_phase"] == "planning"

    def test_write_file_allowed_in_building(self, engine):
        engine.advance_phase()  # → BUILDING
        assert engine._check_phase_permission("write_file", {}) is None

    def test_any_tool_allowed_in_delivery(self, engine):
        engine.advance_phase()  # → BUILDING
        engine.advance_phase()  # → VERIFICATION
        engine.advance_phase()  # → DELIVERY
        assert engine._check_phase_permission("literally_anything", {}) is None

    def test_bash_command_filter_blocks_rm_in_planning(self, engine):
        result = engine._check_phase_permission("shell", {"command": "rm -rf /tmp"})
        assert result is not None
        assert result["error"] == "phase_violation"
        assert "rm" in result["message"] or "Bash command" in result["message"]

    def test_bash_command_filter_allows_safe_in_planning(self, engine):
        # `ls` and `git status` are not blocked.
        assert engine._check_phase_permission("shell", {"command": "ls -la"}) is None
        assert engine._check_phase_permission("shell", {"command": "git status"}) is None

    def test_bash_command_filter_no_restriction_in_building(self, engine):
        engine.advance_phase()  # → BUILDING
        # `rm` is allowed in building phase.
        assert engine._check_phase_permission("shell", {"command": "rm -rf build/"}) is None


# ---------------------------------------------------------------------------
# _execute_tool integration — phase enforcement actually blocks dispatch
# ---------------------------------------------------------------------------


class TestExecuteToolPhaseEnforcement:
    @pytest.fixture
    def engine_with_tools(self):
        engine = ReActEngine(llm_gateway=MagicMock(), phase_policy=default_policy())
        # Two fake tools: one allowed in PLANNING (search), one not (write_file).
        # MagicMock.name can't be set post-construction (special attribute),
        # so we patch _find_tool with a dict-based lookup.
        search_tool = MagicMock()
        search_tool.input_schema = None
        search_tool.safe_execute = AsyncMock(return_value={"results": []})

        write_tool = MagicMock()
        write_tool.input_schema = None
        write_tool.safe_execute = AsyncMock(return_value={"written": True})

        tools_by_name = {"search": search_tool, "write_file": write_tool}
        engine._find_tool = lambda name, tools: tools_by_name.get(name)

        return engine, [search_tool, write_tool]

    @pytest.mark.asyncio
    async def test_blocked_tool_returns_phase_violation_and_skips_dispatch(self, engine_with_tools):
        engine, tools = engine_with_tools
        # write_file in PLANNING should be blocked — write_tool.safe_execute
        # should NEVER be called.
        result = await engine._execute_tool("write_file", {"path": "/x"}, tools)
        assert result["error"] == "phase_violation"
        assert result["current_phase"] == "planning"
        write_tool = tools[1]
        write_tool.safe_execute.assert_not_called()

    @pytest.mark.asyncio
    async def test_allowed_tool_dispatches_normally(self, engine_with_tools):
        engine, tools = engine_with_tools
        result = await engine._execute_tool("search", {"query": "foo"}, tools)
        assert result == {"results": []}
        search_tool = tools[0]
        search_tool.safe_execute.assert_awaited_once_with(query="foo")

    @pytest.mark.asyncio
    async def test_after_advance_phase_blocked_tool_now_dispatches(self, engine_with_tools):
        engine, tools = engine_with_tools
        # First: write_file blocked in PLANNING.
        result = await engine._execute_tool("write_file", {"path": "/x"}, tools)
        assert result["error"] == "phase_violation"
        # Advance to BUILDING.
        engine.advance_phase()
        # Now: write_file allowed.
        result = await engine._execute_tool("write_file", {"path": "/x"}, tools)
        assert result == {"written": True}


# ---------------------------------------------------------------------------
# Auto-advance safety net (KTD6)
# ---------------------------------------------------------------------------


class TestAutoAdvance:
    def test_auto_advance_after_threshold(self):
        # Custom policy with auto-advance after 2 steps.
        policy = PhasePolicy(
            whitelist={
                PhaseState.PLANNING: frozenset({"search"}),
                PhaseState.BUILDING: frozenset({"write_file"}),
                PhaseState.VERIFICATION: frozenset({"shell"}),
                PhaseState.DELIVERY: frozenset({"*"}),
            },
            auto_advance_after_steps=2,
        )
        engine = ReActEngine(llm_gateway=MagicMock(), phase_policy=policy)
        assert engine.current_phase == PhaseState.PLANNING

        # Step 1: counter goes to 1, no advance yet.
        engine._steps_in_phase += 1
        assert engine._maybe_auto_advance() is False
        assert engine.current_phase == PhaseState.PLANNING

        # Step 2: counter hits 2, advance triggered.
        engine._steps_in_phase += 1
        assert engine._maybe_auto_advance() is True
        assert engine.current_phase == PhaseState.BUILDING
        assert engine._steps_in_phase == 0  # reset on advance

    def test_auto_advance_none_default(self):
        # default_policy has auto_advance_after_steps=None — no auto-advance.
        engine = ReActEngine(llm_gateway=MagicMock(), phase_policy=default_policy())
        engine._steps_in_phase = 100
        assert engine._maybe_auto_advance() is False
        assert engine.current_phase == PhaseState.PLANNING


# ---------------------------------------------------------------------------
# AdvancePhaseTool integration
# ---------------------------------------------------------------------------


class TestAdvancePhaseTool:
    @pytest.mark.asyncio
    async def test_advance_phase_tool_transitions_engine(self):
        engine = ReActEngine(llm_gateway=MagicMock(), phase_policy=default_policy())
        tool = AdvancePhaseTool(engine=engine)
        result = await tool.execute()
        assert result["is_error"] is False
        assert result["current_phase"] == "building"
        assert engine.current_phase == PhaseState.BUILDING

    @pytest.mark.asyncio
    async def test_advance_phase_tool_at_delivery_returns_error(self):
        engine = ReActEngine(llm_gateway=MagicMock(), phase_policy=default_policy())
        # Walk through all phases.
        engine.advance_phase()  # PLANNING → BUILDING
        engine.advance_phase()  # BUILDING → VERIFICATION
        engine.advance_phase()  # VERIFICATION → DELIVERY
        tool = AdvancePhaseTool(engine=engine)
        result = await tool.execute()
        assert result["is_error"] is True
        assert result["error"] == "already_at_final_phase"
        assert result["current_phase"] == "delivery"

    @pytest.mark.asyncio
    async def test_advance_phase_tool_without_policy_returns_error(self):
        engine = ReActEngine(llm_gateway=MagicMock())  # no policy
        tool = AdvancePhaseTool(engine=engine)
        result = await tool.execute()
        assert result["is_error"] is True
        assert result["error"] == "no_phase_policy"

    def test_tool_schema_accepts_no_arguments(self):
        engine = ReActEngine(llm_gateway=MagicMock(), phase_policy=default_policy())
        tool = AdvancePhaseTool(engine=engine)
        # input_schema has empty properties + additionalProperties:false —
        # no arguments expected.
        assert tool.input_schema["properties"] == {}
        assert tool.input_schema["additionalProperties"] is False

    def test_tool_bypasses_phase_check(self):
        """`advance_phase` is the LLM's escape hatch — must never be blocked."""
        # _check_phase_permission should NOT block advance_phase even in PLANNING.
        # The bypass is implemented in _execute_tool by name check.
        # We verify the bypass indirectly: tool dispatches normally even in
        # PLANNING (where only search/read_file/bash/tool_search are allowed).
        # advance_phase is not in the whitelist, but the name-based bypass
        # in _execute_tool lets it through.
        # (Direct unit test of the bypass would require mocking _find_tool.)
        # Sanity: advance_phase is not in any whitelist.
        for phase, allowed in default_policy().whitelist.items():
            assert "advance_phase" not in allowed, (
                f"advance_phase must not be in {phase.value} whitelist"
            )


# ---------------------------------------------------------------------------
# Wave 4 U2 — phase_violation accumulator + drain
# ---------------------------------------------------------------------------


class TestPhaseViolationAccumulator:
    """_check_phase_permission records violations; _drain_phase_violations
    yields them as ReActEvents and clears the accumulator."""

    @pytest.fixture
    def engine(self):
        return ReActEngine(
            llm_gateway=MagicMock(),
            phase_policy=default_policy(),
        )

    def test_violation_appended_on_tool_block(self, engine):
        # write_file is blocked in PLANNING.
        engine._check_phase_permission("write_file", {})
        assert len(engine._phase_violations) == 1
        v = engine._phase_violations[0]
        assert v["error"] == "phase_violation"
        assert v["tool"] == "write_file"
        assert v["current_phase"] == "planning"
        assert v["violation_kind"] == "tool_not_allowed"

    def test_violation_appended_on_bash_block(self, engine):
        engine._check_phase_permission("shell", {"command": "rm -rf /tmp"})
        assert len(engine._phase_violations) == 1
        v = engine._phase_violations[0]
        assert v["violation_kind"] == "bash_command_blocked"
        assert v["tool"] == "shell"
        assert v["command_preview"] == "rm -rf /tmp"

    def test_no_violation_when_allowed(self, engine):
        # search is allowed in PLANNING.
        engine._check_phase_permission("search", {})
        assert engine._phase_violations == []

    def test_no_violation_without_policy(self):
        engine = ReActEngine(llm_gateway=MagicMock())  # no policy
        engine._check_phase_permission("anything", {})
        assert engine._phase_violations == []

    def test_drain_returns_events_and_clears(self, engine):
        # Trigger two violations.
        engine._check_phase_permission("write_file", {"path": "/a"})
        engine._check_phase_permission("write_file", {"path": "/b"})
        assert len(engine._phase_violations) == 2

        events = engine._drain_phase_violations(step=3)
        assert len(events) == 2
        assert all(e.event_type == "phase_violation" for e in events)
        assert all(e.step == 3 for e in events)
        # Each event data is a copy (caller can't mutate the accumulator).
        assert events[0].data["tool"] == "write_file"
        # Accumulator cleared after drain.
        assert engine._phase_violations == []

    def test_drain_empty_returns_empty(self, engine):
        assert engine._drain_phase_violations(step=1) == []

    def test_drain_returns_shallow_copy(self, engine):
        """Drained event data must not alias the original violation dict —
        mutating one must not mutate the other."""
        engine._check_phase_permission("write_file", {})
        events = engine._drain_phase_violations(step=1)
        # Mutate the drained event data.
        events[0].data["tool"] = "MUTATED"
        # Original accumulator (now empty) is unaffected — but more importantly,
        # a fresh violation recorded after drain is unaffected.
        engine._check_phase_permission("write_file", {})
        new_violations = engine._phase_violations
        assert new_violations[0]["tool"] == "write_file"  # not "MUTATED"

    def test_reset_clears_violations(self, engine):
        engine._check_phase_permission("write_file", {})
        assert len(engine._phase_violations) == 1
        engine.reset()
        assert engine._phase_violations == []


# ---------------------------------------------------------------------------
# Wave 4 U2 — execute_stream yields phase_violation events
# ---------------------------------------------------------------------------


class TestExecuteStreamPhaseViolationEvents:
    """execute_stream must yield phase_violation ReActEvents when a tool is
    blocked by _check_phase_permission. The events are drained after each
    tool_result yield."""

    @pytest.mark.asyncio
    async def test_stream_yields_phase_violation_on_tool_block(self):
        """When the LLM calls a tool blocked by the phase policy, execute_stream
        yields a tool_call event, a tool_result event (with the error dict),
        and a phase_violation event."""
        from agentkit.core.react import ReActEvent

        engine = ReActEngine(
            llm_gateway=llm_mock_gateway_with_response(
                tool_calls=[{"name": "write_file", "arguments": {"path": "/x"}}],
                content=None,
            ),
            phase_policy=default_policy(),
            max_steps=1,
        )
        # Patch _find_tool so we don't need real tools registered. write_file
        # should be blocked by phase_policy before _find_tool is called.
        engine._find_tool = lambda name, tools: None

        events: list[ReActEvent] = []
        async for ev in engine.execute_stream(
            messages=[{"role": "user", "content": "test"}],
            tools=[],
        ):
            events.append(ev)

        # Expect: thinking → tool_call → tool_result → phase_violation → final_answer
        event_types = [e.event_type for e in events]
        assert "tool_call" in event_types
        assert "tool_result" in event_types
        assert "phase_violation" in event_types

        # The phase_violation event must come AFTER tool_result.
        tool_result_idx = next(i for i, e in enumerate(events) if e.event_type == "tool_result")
        violation_idx = next(i for i, e in enumerate(events) if e.event_type == "phase_violation")
        assert violation_idx > tool_result_idx

        # Verify event data.
        violation = events[violation_idx]
        assert violation.data["error"] == "phase_violation"
        assert violation.data["tool"] == "write_file"
        assert violation.data["current_phase"] == "planning"
        assert violation.data["violation_kind"] == "tool_not_allowed"

    @pytest.mark.asyncio
    async def test_stream_yields_phase_violation_on_bash_block(self):
        """When the LLM calls shell with a dangerous command in PLANNING,
        execute_stream yields a phase_violation event with violation_kind
        = bash_command_blocked."""
        from agentkit.core.react import ReActEvent

        engine = ReActEngine(
            llm_gateway=llm_mock_gateway_with_response(
                tool_calls=[{"name": "shell", "arguments": {"command": "rm -rf /tmp"}}],
                content=None,
            ),
            phase_policy=default_policy(),
            max_steps=1,
        )
        engine._find_tool = lambda name, tools: None

        events: list[ReActEvent] = []
        async for ev in engine.execute_stream(
            messages=[{"role": "user", "content": "test"}],
            tools=[],
        ):
            events.append(ev)

        violation_events = [e for e in events if e.event_type == "phase_violation"]
        assert len(violation_events) == 1
        v = violation_events[0].data
        assert v["violation_kind"] == "bash_command_blocked"
        assert v["tool"] == "shell"
        assert "rm -rf /tmp" in v["command_preview"]

    @pytest.mark.asyncio
    async def test_stream_no_violation_when_tool_allowed(self):
        """When the LLM calls an allowed tool, no phase_violation event is yielded."""
        from agentkit.core.react import ReActEvent

        engine = ReActEngine(
            llm_gateway=llm_mock_gateway_with_response(
                tool_calls=[{"name": "search", "arguments": {"query": "foo"}}],
                content=None,
            ),
            phase_policy=default_policy(),
            max_steps=1,
        )
        # search is allowed in PLANNING; we still need _find_tool to return a
        # tool object so dispatch proceeds.
        search_tool = MagicMock()
        search_tool.input_schema = None
        search_tool.safe_execute = AsyncMock(return_value={"results": []})
        engine._find_tool = lambda name, tools: search_tool

        events: list[ReActEvent] = []
        async for ev in engine.execute_stream(
            messages=[{"role": "user", "content": "test"}],
            tools=[search_tool],
        ):
            events.append(ev)

        assert not any(e.event_type == "phase_violation" for e in events)

    @pytest.mark.asyncio
    async def test_stream_no_violation_without_policy(self):
        """Without a phase_policy, no phase_violation events are yielded —
        characterization of the no-policy path."""
        from agentkit.core.react import ReActEvent

        engine = ReActEngine(
            llm_gateway=llm_mock_gateway_with_response(
                tool_calls=[{"name": "any_tool", "arguments": {}}],
                content=None,
            ),
            max_steps=1,
        )
        any_tool = MagicMock()
        any_tool.input_schema = None
        any_tool.safe_execute = AsyncMock(return_value={"output": "ok"})
        engine._find_tool = lambda name, tools: any_tool

        events: list[ReActEvent] = []
        async for ev in engine.execute_stream(
            messages=[{"role": "user", "content": "test"}],
            tools=[any_tool],
        ):
            events.append(ev)

        assert not any(e.event_type == "phase_violation" for e in events)


# ---------------------------------------------------------------------------
# Helpers — minimal LLM gateway mocks for execute_stream tests
# ---------------------------------------------------------------------------


def llm_mock_gateway():
    """Return a MagicMock LLM gateway (sufficient for constructor tests)."""
    return MagicMock()


def llm_mock_gateway_with_response(tool_calls: list[dict], content: str | None):
    """Return a MagicMock LLM gateway whose chat_stream yields a single chunk
    containing the given tool_calls (or content for a final-answer response).

    The mock is shaped to match what execute_stream expects from
    LLMGateway.chat_stream — an async iterable of chunks with attributes
    `content`, `tool_calls`, `usage`, `model`.
    """
    gateway = MagicMock()

    # Build a fake chunk. execute_stream reads chunk.content, chunk.tool_calls,
    # chunk.usage, chunk.model. The first three are typically accessed via
    # attribute access; we make a small dataclass-like object.
    class _Chunk:
        def __init__(self, content, tool_calls, usage=None, model="default"):
            self.content = content
            self.tool_calls = tool_calls
            self.usage = usage
            self.model = model

    # If tool_calls provided, emit a chunk with tool_calls (non-streaming path).
    # Otherwise, emit a chunk with content (final answer path).
    if tool_calls:
        # Convert raw dicts to objects with .name/.arguments/.id attributes
        # (LLMGateway normally returns tool_call objects).
        class _TC:
            def __init__(self, d):
                self.name = d.get("name", "")
                self.arguments = d.get("arguments", {})
                self.id = d.get("id", "tc_test")

        chunks = [_Chunk(content=None, tool_calls=[_TC(tc) for tc in tool_calls])]
        # Follow with a final-answer chunk so execute_stream's loop exits
        # cleanly after the tool call.
        chunks.append(_Chunk(content="done", tool_calls=None))
    else:
        chunks = [_Chunk(content=content or "final answer", tool_calls=None)]

    async def _fake_chat_stream(*args, **kwargs):
        for c in chunks:
            yield c

    gateway.chat_stream = _fake_chat_stream
    return gateway