"""Unit tests for PhasePolicy + PhaseState (G6 core, R24/R25/R26). Covers: - PhaseState enum (next_of, from_string) - default_policy() KTD5 whitelist - PhasePolicy.is_tool_allowed / is_bash_command_allowed - policy_from_config parsing (R26 config-driven) - ServerConfig.plan_exec integration """ from __future__ import annotations import re import pytest from agentkit.core.phase import ( WILDCARD, PhasePolicy, PhaseState, default_policy, policy_from_config, ) from agentkit.server.config import ServerConfig # --------------------------------------------------------------------------- # PhaseState enum # --------------------------------------------------------------------------- class TestPhaseState: def test_values(self): assert PhaseState.PLANNING.value == "planning" assert PhaseState.BUILDING.value == "building" assert PhaseState.VERIFICATION.value == "verification" assert PhaseState.DELIVERY.value == "delivery" def test_next_of(self): assert PhaseState.next_of(PhaseState.PLANNING) == PhaseState.BUILDING assert PhaseState.next_of(PhaseState.BUILDING) == PhaseState.VERIFICATION assert PhaseState.next_of(PhaseState.VERIFICATION) == PhaseState.DELIVERY assert PhaseState.next_of(PhaseState.DELIVERY) is None def test_from_string_case_insensitive(self): assert PhaseState.from_string("planning") == PhaseState.PLANNING assert PhaseState.from_string("PLANNING") == PhaseState.PLANNING assert PhaseState.from_string("Building") == PhaseState.BUILDING def test_from_string_invalid_raises(self): with pytest.raises(ValueError, match="Invalid phase name"): PhaseState.from_string("unknown") with pytest.raises(ValueError, match="Valid:"): PhaseState.from_string("exploration") # --------------------------------------------------------------------------- # default_policy() — KTD5 whitelist # --------------------------------------------------------------------------- class TestDefaultPolicy: def test_has_all_four_phases(self): policy = default_policy() assert PhaseState.PLANNING in policy.whitelist assert PhaseState.BUILDING in policy.whitelist assert PhaseState.VERIFICATION in policy.whitelist assert PhaseState.DELIVERY in policy.whitelist def test_planning_whitelist_matches_r24(self): policy = default_policy() allowed = policy.whitelist[PhaseState.PLANNING] assert "search" in allowed assert "read_file" in allowed assert "shell" in allowed assert "tool_search" in allowed # Planning must NOT allow write_file. assert "write_file" not in allowed def test_building_whitelist_includes_write_file(self): policy = default_policy() allowed = policy.whitelist[PhaseState.BUILDING] assert "write_file" in allowed assert "shell" in allowed assert "read_file" in allowed def test_verification_whitelist_excludes_write(self): policy = default_policy() allowed = policy.whitelist[PhaseState.VERIFICATION] assert "shell" in allowed assert "read_file" in allowed assert "write_file" not in allowed def test_delivery_wildcard(self): policy = default_policy() allowed = policy.whitelist[PhaseState.DELIVERY] assert WILDCARD in allowed def test_start_phase_default_planning(self): assert default_policy().start_phase == PhaseState.PLANNING def test_auto_advance_default_none(self): # KTD6: manual by default. assert default_policy().auto_advance_after_steps is None def test_bash_filter_blocks_rm_in_planning(self): policy = default_policy() assert policy.is_bash_command_allowed("ls -la", PhaseState.PLANNING) is True assert policy.is_bash_command_allowed("git status", PhaseState.PLANNING) is True assert policy.is_bash_command_allowed("rm -rf /tmp/x", PhaseState.PLANNING) is False assert policy.is_bash_command_allowed("echo x > file.txt", PhaseState.PLANNING) is False def test_bash_filter_no_restriction_in_building(self): policy = default_policy() assert policy.is_bash_command_allowed("rm -rf build/", PhaseState.BUILDING) is True assert policy.is_bash_command_allowed("echo x > out.log", PhaseState.BUILDING) is True # --------------------------------------------------------------------------- # PhasePolicy — is_tool_allowed # --------------------------------------------------------------------------- class TestIsToolAllowed: def test_planning_allows_search(self): policy = default_policy() assert policy.is_tool_allowed("search", PhaseState.PLANNING) is True def test_planning_blocks_write_file(self): policy = default_policy() assert policy.is_tool_allowed("write_file", PhaseState.PLANNING) is False def test_building_allows_write_file(self): policy = default_policy() assert policy.is_tool_allowed("write_file", PhaseState.BUILDING) is True def test_delivery_wildcard_allows_anything(self): policy = default_policy() assert policy.is_tool_allowed("any_random_tool", PhaseState.DELIVERY) is True assert policy.is_tool_allowed("write_file", PhaseState.DELIVERY) is True def test_unknown_phase_returns_false(self): # ponytail: unknown phase → empty whitelist → no tool allowed. # We can't construct an unknown PhaseState (enum), but if a phase # were missing from the whitelist dict, is_tool_allowed should # return False (defensive). policy = PhasePolicy( whitelist={ PhaseState.PLANNING: frozenset({"search"}), PhaseState.BUILDING: frozenset({"write_file"}), PhaseState.VERIFICATION: frozenset({"shell"}), PhaseState.DELIVERY: frozenset({WILDCARD}), } ) # BUILDING is in whitelist, so allowed checks work normally. assert policy.is_tool_allowed("write_file", PhaseState.BUILDING) is True # Phase missing from whitelist would return False (defensive .get default). # We test this by constructing a minimal policy. minimal = PhasePolicy( whitelist={ PhaseState.PLANNING: frozenset({WILDCARD}), PhaseState.BUILDING: frozenset({WILDCARD}), PhaseState.VERIFICATION: frozenset({WILDCARD}), PhaseState.DELIVERY: frozenset({WILDCARD}), } ) # VERIFICATION is in whitelist — wildcard allows all. assert minimal.is_tool_allowed("anything", PhaseState.VERIFICATION) is True # --------------------------------------------------------------------------- # PhasePolicy — edge cases & errors # --------------------------------------------------------------------------- class TestPhasePolicyEdgeCases: def test_empty_whitelist_raises(self): # Fail-fast: an empty whitelist for a non-wildcard phase is a bug. with pytest.raises(ValueError, match="empty whitelist"): PhasePolicy( whitelist={ PhaseState.PLANNING: frozenset(), # empty! PhaseState.BUILDING: frozenset({WILDCARD}), PhaseState.VERIFICATION: frozenset({WILDCARD}), PhaseState.DELIVERY: frozenset({WILDCARD}), } ) def test_wildcard_only_does_not_raise(self): # Wildcard-only whitelist is valid (means "all tools"). policy = PhasePolicy( whitelist={ PhaseState.PLANNING: frozenset({WILDCARD}), PhaseState.BUILDING: frozenset({WILDCARD}), PhaseState.VERIFICATION: frozenset({WILDCARD}), PhaseState.DELIVERY: frozenset({WILDCARD}), } ) assert policy.is_tool_allowed("anything", PhaseState.PLANNING) is True def test_to_dict_serializable(self): policy = default_policy() d = policy.to_dict() assert "whitelist" in d assert "planning" in d["whitelist"] assert "delivery" in d["whitelist"] assert d["start_phase"] == "planning" assert d["auto_advance_after_steps"] is None def test_custom_bash_filter(self): custom_filter = re.compile(r"\b(pip install|npm install)\b") policy = PhasePolicy( whitelist={ PhaseState.PLANNING: frozenset({"shell"}), PhaseState.BUILDING: frozenset({"shell"}), PhaseState.VERIFICATION: frozenset({"shell"}), PhaseState.DELIVERY: frozenset({WILDCARD}), }, bash_command_filter={PhaseState.BUILDING: custom_filter}, ) assert policy.is_bash_command_allowed("npm install foo", PhaseState.BUILDING) is False assert policy.is_bash_command_allowed("npm run build", PhaseState.BUILDING) is True # --------------------------------------------------------------------------- # policy_from_config — R26 (config-driven) # --------------------------------------------------------------------------- class TestPolicyFromConfig: def test_empty_config_returns_none(self): assert policy_from_config({}) is None def test_enabled_false_returns_none(self): # Opt-out — explicit `enabled: false` disables policy. result = policy_from_config({"enabled": False}) assert result is None def test_enabled_default_true_when_section_present(self): # When section is present but `enabled` is missing, default is True. result = policy_from_config({"auto_advance_after_steps": 3}) assert result is not None assert result.auto_advance_after_steps == 3 def test_auto_advance_after_steps(self): policy = policy_from_config({"enabled": True, "auto_advance_after_steps": 5}) assert policy is not None assert policy.auto_advance_after_steps == 5 def test_start_phase_custom(self): policy = policy_from_config({"enabled": True, "start_phase": "building"}) assert policy is not None assert policy.start_phase == PhaseState.BUILDING def test_start_phase_invalid_raises(self): with pytest.raises(ValueError, match="Invalid phase name"): policy_from_config({"enabled": True, "start_phase": "unknown"}) def test_whitelist_override_merges_with_default(self): policy = policy_from_config( { "enabled": True, "whitelist_override": { "planning": ["search", "read_file"], # removes shell from default }, } ) assert policy is not None # Override wins — shell should be removed from planning. assert policy.is_tool_allowed("search", PhaseState.PLANNING) is True assert policy.is_tool_allowed("read_file", PhaseState.PLANNING) is True assert policy.is_tool_allowed("shell", PhaseState.PLANNING) is False # Other phases unchanged. assert policy.is_tool_allowed("write_file", PhaseState.BUILDING) is True def test_whitelist_override_invalid_phase_raises(self): with pytest.raises(ValueError, match="Invalid phase name"): policy_from_config( { "enabled": True, "whitelist_override": {"unknown_phase": ["tool"]}, } ) def test_whitelist_override_non_list_raises(self): with pytest.raises(ValueError, match="must be a list"): policy_from_config( { "enabled": True, "whitelist_override": {"planning": "not a list"}, } ) def test_to_dict_round_trip_via_default(self): # Sanity: default policy serializes to a dict with expected keys. policy = default_policy() d = policy.to_dict() assert set(d["whitelist"].keys()) == { "planning", "building", "verification", "delivery", } # --------------------------------------------------------------------------- # ServerConfig.plan_exec integration (R26) # --------------------------------------------------------------------------- class TestServerConfigPlanExec: def test_default_plan_exec_empty(self): config = ServerConfig.from_dict({}) assert config.plan_exec == {} def test_plan_exec_loaded_from_dict(self): config = ServerConfig.from_dict( { "plan_exec": { "enabled": True, "auto_advance_after_steps": 5, } } ) assert config.plan_exec == {"enabled": True, "auto_advance_after_steps": 5} def test_plan_exec_empty_dict_default(self): config = ServerConfig.from_dict({"plan_exec": {}}) assert config.plan_exec == {} def test_plan_exec_resolved_to_policy(self): # Wire the config dict through policy_from_config to verify integration. config = ServerConfig.from_dict( { "plan_exec": { "enabled": True, "auto_advance_after_steps": 3, } } ) policy = policy_from_config(config.plan_exec) assert policy is not None assert policy.auto_advance_after_steps == 3 def test_plan_exec_disabled_via_config(self): config = ServerConfig.from_dict({"plan_exec": {"enabled": False}}) policy = policy_from_config(config.plan_exec) assert policy is None