fischer-agentkit/tests/e2e/conftest.py

"""E2E test fixtures: server lifecycle, CLI runner, API client, WebSocket helpers.

Design principles:
  1. Start a real uvicorn server with MockLLMProvider once per session
  2. CLI tests use subprocess to invoke `agentkit` commands (OpenCLI pattern)
  3. API tests use httpx against the live server
  4. WebSocket tests use the `websockets` library against the live server
  5. All tests are idempotent and repeatable
"""

import asyncio
import json
import os
import shutil
import subprocess
import sys
import time
from typing import Any, Generator

import httpx
import pytest

# ---------------------------------------------------------------------------
# Markers
# ---------------------------------------------------------------------------

pytestmark = pytest.mark.integration


def pytest_configure(config: pytest.Config) -> None:
    config.addinivalue_line("markers", "e2e: end-to-end backtest (requires server)")
    config.addinivalue_line("markers", "e2e_basic: basic function correctness test")
    config.addinivalue_line("markers", "e2e_capability: agent intelligence capability test")
    # Initialize session-scoped metrics collector
    from tests.e2e.capability_metrics import MetricsCollector

    config._e2e_metrics_collector = MetricsCollector()  # type: ignore[attr-defined]


def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None:
    """After all tests, generate capability analysis report if data was collected."""
    collector = session.config._e2e_metrics_collector  # type: ignore[attr-defined]
    if collector is None or not collector.observations:
        return

    from tests.e2e.capability_metrics import MetricsAnalyzer, MetricsReporter

    analyzer = MetricsAnalyzer()
    report = analyzer.generate_report(collector)

    # L3 Output Quality Evaluation (optional, requires LLM)
    try:
        from tests.e2e.test_capability_router_direct import _get_components

        router, skill_registry, intent_router = _get_components()
        llm_gateway = getattr(router, "_llm_gateway", None)
        if llm_gateway is not None:
            quality_evals = collector.evaluate_output_quality(llm_gateway)
            report = analyzer.generate_report(collector)
            # Attach quality evaluations to report
            report.output_quality_evaluations = quality_evals
    except Exception as e:
        print(f"Warning: L3 output quality evaluation skipped: {e}")

    output_dir = os.path.join(os.path.dirname(__file__), "..", "..", "test-results", "e2e")
    paths = MetricsReporter.save_report(report, output_dir)

    # Print summary to console
    print("\n" + MetricsReporter.to_text(report))
    print(f"\nReport saved to: {paths['json']}")
    print(f"Text report: {paths['text']}")


# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------

E2E_HOST = "127.0.0.1"
E2E_PORT = 18765  # dedicated port to avoid conflict with dev server
E2E_BASE_URL = f"http://{E2E_HOST}:{E2E_PORT}"
E2E_WS_URL = f"ws://{E2E_HOST}:{E2E_PORT}"
E2E_API_KEY = "ak_live_e2e_test_key_000000000000000000000000000000000000000000000000"


# ---------------------------------------------------------------------------
# Mock LLM Provider (deterministic responses for backtest)
# ---------------------------------------------------------------------------

MOCK_LLM_RESPONSES: dict[str, str] = {
    # Default / generic
    "default": '{"result": "mock response", "content": "This is a mock LLM response for e2e testing."}',
    # Content generation
    "content_writer": '{"result": "article generated", "content": "AI is transforming industries by enabling automation and intelligent decision-making."}',
    # Translation
    "translator": '{"result": "translation complete", "content": "This is the translated text."}',
    # Summarization
    "summarizer": '{"result": "summary generated", "content": "Key points: 1) Topic overview 2) Main findings 3) Conclusion."}',
    # Code generation
    "coder": '{"result": "code generated", "content": "def hello():\\n    print(\\"Hello, World!\\")"}',
    # Analysis
    "analyst": '{"result": "analysis complete", "content": "The data shows a positive trend with 15% growth."}',
    # ReAct tool call
    "react_tool_call": '{"thought": "I need to search for information", "action": "web_search", "action_input": {"query": "test"}, "observation": "Search results found"}',
    # ReAct final answer
    "react_final": '{"thought": "I have enough information", "final_answer": "Based on my analysis, the answer is 42."}',
}


def _build_mock_env(tmp_path: Any) -> dict[str, str]:
    """Build environment variables for a server with MockLLMProvider."""
    env = os.environ.copy()
    env.update(
        {
            "AGENTKIT_E2E_MODE": "1",
            "AGENTKIT_E2E_MOCK_RESPONSES": json.dumps(MOCK_LLM_RESPONSES),
            "AGENTKIT_API_KEY": E2E_API_KEY,
            "AGENTKIT_WS_TIMEOUT": "0",
            # Disable real LLM calls
            "OPENAI_API_KEY": "",
            "ANTHROPIC_API_KEY": "",
            "DEEPSEEK_API_KEY": "",
        }
    )
    return env


# ---------------------------------------------------------------------------
# Server lifecycle fixture
# ---------------------------------------------------------------------------


@pytest.fixture(scope="session")
def e2e_server(tmp_path_factory: pytest.TempPathFactory) -> Generator[str, None, None]:
    """Start a real AgentKit server for the entire E2E session.

    Returns the base URL (e.g. http://127.0.0.1:18765).
    The server uses MockLLMProvider so no real LLM calls are made.
    """
    tmp_path = tmp_path_factory.mktemp("e2e_server")

    # Generate a minimal agentkit.yaml for the test server
    config_dir = tmp_path / "config"
    config_dir.mkdir()
    config_file = config_dir / "agentkit.yaml"

    import yaml

    config_file.write_text(
        yaml.dump(
            {
                "server": {"host": E2E_HOST, "port": E2E_PORT},
                "llm": {"default_provider": "mock", "providers": {"mock": {"type": "mock"}}},
                "auth": {"enabled": True, "api_keys": [E2E_API_KEY]},
            }
        )
    )

    env = _build_mock_env(tmp_path)
    env["AGENTKIT_CONFIG"] = str(config_file)

    # Start server as subprocess
    proc = subprocess.Popen(
        [
            sys.executable,
            "-m",
            "agentkit.cli.main",
            "serve",
            "--host",
            E2E_HOST,
            "--port",
            str(E2E_PORT),
        ],
        env=env,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        cwd=str(tmp_path),
    )

    # Wait for server to be ready (max 30s)
    base_url = E2E_BASE_URL
    deadline = time.monotonic() + 30
    ready = False
    while time.monotonic() < deadline:
        try:
            resp = httpx.get(f"{base_url}/api/v1/health", timeout=2)
            if resp.status_code == 200:
                ready = True
                break
        except httpx.ConnectError:
            pass
        time.sleep(0.5)

    if not ready:
        proc.terminate()
        stdout, stderr = proc.communicate(timeout=5)
        pytest.fail(
            f"E2E server failed to start within 30s.\n"
            f"stdout: {stdout.decode()[:2000]}\n"
            f"stderr: {stderr.decode()[:2000]}"
        )

    yield base_url

    # Teardown
    proc.terminate()
    try:
        proc.wait(timeout=10)
    except subprocess.TimeoutExpired:
        proc.kill()


# ---------------------------------------------------------------------------
# API client fixture
# ---------------------------------------------------------------------------


@pytest.fixture(scope="session")
def api_client(e2e_server: str) -> httpx.Client:
    """Synchronous httpx client configured for the E2E server."""
    return httpx.Client(
        base_url=e2e_server,
        headers={"X-API-Key": E2E_API_KEY, "Content-Type": "application/json"},
        timeout=30,
    )


# ---------------------------------------------------------------------------
# CLI runner (subprocess-based, OpenCLI pattern)
# ---------------------------------------------------------------------------


class CLIRunner:
    """Simulate user CLI operations via subprocess.

    This is the 'OpenCLI' pattern: invoke the real `agentkit` binary
    as a subprocess and capture its output, exactly as a user would.
    """

    def __init__(self, env: dict[str, str] | None = None, cwd: str | None = None):
        self.env = env or os.environ.copy()
        self.cwd = cwd

    def _resolve_agentkit_cmd(self) -> list[str]:
        """Resolve the agentkit command to use.

        Prefer the installed `agentkit` script (handles Rich/Typer output correctly),
        fall back to `python -m agentkit.cli.main`.
        """
        agentkit_path = shutil.which("agentkit")
        if agentkit_path:
            return [agentkit_path]
        return [sys.executable, "-m", "agentkit.cli.main"]

    def run(self, args: list[str], timeout: int = 30) -> subprocess.CompletedProcess[str]:
        """Run an agentkit CLI command and return the result.

        Args:
            args: CLI arguments, e.g. ["version"] or ["task", "submit", ...]
            timeout: maximum seconds to wait

        Returns:
            CompletedProcess with stdout, stderr, returncode
        """
        cmd = [*self._resolve_agentkit_cmd(), *args]
        return subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            timeout=timeout,
            env=self.env,
            cwd=self.cwd,
        )

    def run_server_command(
        self, args: list[str], server_url: str, timeout: int = 30
    ) -> subprocess.CompletedProcess[str]:
        """Run a CLI command that requires --server-url."""
        full_args = [*args, "--server-url", server_url]
        return self.run(full_args, timeout=timeout)


@pytest.fixture
def cli_runner(tmp_path: Any) -> CLIRunner:
    """CLI runner with isolated environment."""
    env = os.environ.copy()
    env["AGENTKIT_CONFIG_DIR"] = str(tmp_path / "config")
    env["AGENTKIT_WS_TIMEOUT"] = "0"
    # Prevent onboarding prompts
    env["AGENTKIT_E2E_MODE"] = "1"
    return CLIRunner(env=env, cwd=str(tmp_path))


@pytest.fixture(scope="session")
def cli_runner_session(e2e_server: str) -> CLIRunner:
    """CLI runner configured to talk to the E2E server."""
    env = os.environ.copy()
    env["AGENTKIT_SERVER_URL"] = e2e_server
    env["AGENTKIT_API_KEY"] = E2E_API_KEY
    env["AGENTKIT_WS_TIMEOUT"] = "0"
    env["AGENTKIT_E2E_MODE"] = "1"
    return CLIRunner(env=env)


# ---------------------------------------------------------------------------
# WebSocket helper
# ---------------------------------------------------------------------------


class WSChatHelper:
    """Helper for WebSocket chat E2E tests."""

    def __init__(self, base_ws_url: str, api_key: str):
        self.base_ws_url = base_ws_url
        self.api_key = api_key

    async def connect_and_chat(
        self,
        session_id: str,
        messages: list[dict[str, str]],
        timeout: float = 10.0,
    ) -> list[dict[str, Any]]:
        """Connect to a chat WebSocket, send messages, collect responses.

        Args:
            session_id: chat session ID
            messages: list of {"type": "message", "content": "..."}
            timeout: max seconds to wait for final_answer

        Returns:
            list of all server-sent messages
        """
        try:
            import websockets
        except ImportError:
            pytest.skip("websockets package not installed")

        uri = f"{self.base_ws_url}/api/v1/chat/ws/{session_id}?api_key={self.api_key}"
        received: list[dict[str, Any]] = []

        async with websockets.connect(uri) as ws:
            # Wait for connected event
            msg = await asyncio.wait_for(ws.recv(), timeout=timeout)
            data = json.loads(msg)
            received.append(data)
            assert data.get("type") == "connected", f"Expected connected, got {data}"

            # Send user messages
            for user_msg in messages:
                await ws.send(json.dumps(user_msg))

                # Collect responses until final_answer or error
                while True:
                    try:
                        raw = await asyncio.wait_for(ws.recv(), timeout=timeout)
                        resp = json.loads(raw)
                        received.append(resp)

                        if resp.get("type") in ("final_answer", "error"):
                            break
                    except asyncio.TimeoutError:
                        received.append({"type": "timeout"})
                        break

        return received


@pytest.fixture(scope="session")
def ws_helper(e2e_server: str) -> WSChatHelper:
    """WebSocket chat helper for the E2E server."""
    ws_url = e2e_server.replace("http://", "ws://").replace("https://", "wss://")
    return WSChatHelper(base_ws_url=ws_url, api_key=E2E_API_KEY)


# ---------------------------------------------------------------------------
# Skill / Agent setup helpers
# ---------------------------------------------------------------------------


def register_skill_via_api(
    api_client: httpx.Client,
    name: str,
    keywords: list[str] | None = None,
    execution_mode: str = "direct",
    task_mode: str = "llm_generate",
) -> httpx.Response:
    """Register a skill via the API for E2E testing."""
    config: dict[str, Any] = {
        "name": name,
        "agent_type": name,
        "task_mode": task_mode,
        "description": f"E2E test skill: {name}",
        "prompt": {
            "identity": f"You are a {name} assistant",
            "instructions": f"Perform {name} tasks",
            "output_format": "JSON",
        },
        "intent": {
            "keywords": keywords or [name],
            "description": f"{name} skill for e2e testing",
        },
    }
    if execution_mode != "direct":
        config["execution_mode"] = execution_mode
        config["max_steps"] = 5

    return api_client.post("/api/v1/skills", json={"config": config})


def create_session_via_api(api_client: httpx.Client, agent_name: str = "test") -> str:
    """Create a chat session and return the session ID."""
    resp = api_client.post("/api/v1/chat/sessions", json={"agent_name": agent_name})
    assert resp.status_code == 201, f"Failed to create session: {resp.text}"
    return resp.json()["session_id"]


# ---------------------------------------------------------------------------
# Metrics Collector fixture
# ---------------------------------------------------------------------------


@pytest.fixture(scope="session")
def metrics_collector(request: pytest.FixtureRequest):
    """Session-scoped metrics collector for capability analysis."""
    from tests.e2e.capability_metrics import MetricsCollector

    collector: MetricsCollector = request.config._e2e_metrics_collector  # type: ignore[attr-defined]
    return collector