"""E2E test fixtures: server lifecycle, CLI runner, API client, WebSocket helpers. Design principles: 1. Start a real uvicorn server with MockLLMProvider once per session 2. CLI tests use subprocess to invoke `agentkit` commands (OpenCLI pattern) 3. API tests use httpx against the live server 4. WebSocket tests use the `websockets` library against the live server 5. All tests are idempotent and repeatable """ import asyncio import json import os import shutil import subprocess import sys import time from typing import Any, Generator import httpx import pytest # --------------------------------------------------------------------------- # Markers # --------------------------------------------------------------------------- pytestmark = pytest.mark.integration def pytest_configure(config: pytest.Config) -> None: config.addinivalue_line("markers", "e2e: end-to-end backtest (requires server)") config.addinivalue_line("markers", "e2e_basic: basic function correctness test") config.addinivalue_line("markers", "e2e_capability: agent intelligence capability test") # Initialize session-scoped metrics collector from tests.e2e.capability_metrics import MetricsCollector config._e2e_metrics_collector = MetricsCollector() # type: ignore[attr-defined] def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None: """After all tests, generate capability analysis report if data was collected.""" collector = session.config._e2e_metrics_collector # type: ignore[attr-defined] if collector is None or not collector.observations: return from tests.e2e.capability_metrics import MetricsAnalyzer, MetricsReporter analyzer = MetricsAnalyzer() report = analyzer.generate_report(collector) output_dir = os.path.join(os.path.dirname(__file__), "..", "..", "test-results", "e2e") paths = MetricsReporter.save_report(report, output_dir) # Print summary to console print("\n" + MetricsReporter.to_text(report)) print(f"\nReport saved to: {paths['json']}") print(f"Text report: {paths['text']}") # --------------------------------------------------------------------------- # Constants # --------------------------------------------------------------------------- E2E_HOST = "127.0.0.1" E2E_PORT = 18765 # dedicated port to avoid conflict with dev server E2E_BASE_URL = f"http://{E2E_HOST}:{E2E_PORT}" E2E_WS_URL = f"ws://{E2E_HOST}:{E2E_PORT}" E2E_API_KEY = "ak_live_e2e_test_key_000000000000000000000000000000000000000000000000" # --------------------------------------------------------------------------- # Mock LLM Provider (deterministic responses for backtest) # --------------------------------------------------------------------------- MOCK_LLM_RESPONSES: dict[str, str] = { # Default / generic "default": '{"result": "mock response", "content": "This is a mock LLM response for e2e testing."}', # Content generation "content_writer": '{"result": "article generated", "content": "AI is transforming industries by enabling automation and intelligent decision-making."}', # Translation "translator": '{"result": "translation complete", "content": "This is the translated text."}', # Summarization "summarizer": '{"result": "summary generated", "content": "Key points: 1) Topic overview 2) Main findings 3) Conclusion."}', # Code generation "coder": '{"result": "code generated", "content": "def hello():\\n print(\\"Hello, World!\\")"}', # Analysis "analyst": '{"result": "analysis complete", "content": "The data shows a positive trend with 15% growth."}', # ReAct tool call "react_tool_call": '{"thought": "I need to search for information", "action": "web_search", "action_input": {"query": "test"}, "observation": "Search results found"}', # ReAct final answer "react_final": '{"thought": "I have enough information", "final_answer": "Based on my analysis, the answer is 42."}', } def _build_mock_env(tmp_path: Any) -> dict[str, str]: """Build environment variables for a server with MockLLMProvider.""" env = os.environ.copy() env.update( { "AGENTKIT_E2E_MODE": "1", "AGENTKIT_E2E_MOCK_RESPONSES": json.dumps(MOCK_LLM_RESPONSES), "AGENTKIT_API_KEY": E2E_API_KEY, "AGENTKIT_WS_TIMEOUT": "0", # Disable real LLM calls "OPENAI_API_KEY": "", "ANTHROPIC_API_KEY": "", "DEEPSEEK_API_KEY": "", } ) return env # --------------------------------------------------------------------------- # Server lifecycle fixture # --------------------------------------------------------------------------- @pytest.fixture(scope="session") def e2e_server(tmp_path_factory: pytest.TempPathFactory) -> Generator[str, None, None]: """Start a real AgentKit server for the entire E2E session. Returns the base URL (e.g. http://127.0.0.1:18765). The server uses MockLLMProvider so no real LLM calls are made. """ tmp_path = tmp_path_factory.mktemp("e2e_server") # Generate a minimal agentkit.yaml for the test server config_dir = tmp_path / "config" config_dir.mkdir() config_file = config_dir / "agentkit.yaml" import yaml config_file.write_text( yaml.dump( { "server": {"host": E2E_HOST, "port": E2E_PORT}, "llm": {"default_provider": "mock", "providers": {"mock": {"type": "mock"}}}, "auth": {"enabled": True, "api_keys": [E2E_API_KEY]}, } ) ) env = _build_mock_env(tmp_path) env["AGENTKIT_CONFIG"] = str(config_file) # Start server as subprocess proc = subprocess.Popen( [ sys.executable, "-m", "agentkit.cli.main", "serve", "--host", E2E_HOST, "--port", str(E2E_PORT), ], env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=str(tmp_path), ) # Wait for server to be ready (max 30s) base_url = E2E_BASE_URL deadline = time.monotonic() + 30 ready = False while time.monotonic() < deadline: try: resp = httpx.get(f"{base_url}/api/v1/health", timeout=2) if resp.status_code == 200: ready = True break except httpx.ConnectError: pass time.sleep(0.5) if not ready: proc.terminate() stdout, stderr = proc.communicate(timeout=5) pytest.fail( f"E2E server failed to start within 30s.\n" f"stdout: {stdout.decode()[:2000]}\n" f"stderr: {stderr.decode()[:2000]}" ) yield base_url # Teardown proc.terminate() try: proc.wait(timeout=10) except subprocess.TimeoutExpired: proc.kill() # --------------------------------------------------------------------------- # API client fixture # --------------------------------------------------------------------------- @pytest.fixture(scope="session") def api_client(e2e_server: str) -> httpx.Client: """Synchronous httpx client configured for the E2E server.""" return httpx.Client( base_url=e2e_server, headers={"X-API-Key": E2E_API_KEY, "Content-Type": "application/json"}, timeout=30, ) # --------------------------------------------------------------------------- # CLI runner (subprocess-based, OpenCLI pattern) # --------------------------------------------------------------------------- class CLIRunner: """Simulate user CLI operations via subprocess. This is the 'OpenCLI' pattern: invoke the real `agentkit` binary as a subprocess and capture its output, exactly as a user would. """ def __init__(self, env: dict[str, str] | None = None, cwd: str | None = None): self.env = env or os.environ.copy() self.cwd = cwd def _resolve_agentkit_cmd(self) -> list[str]: """Resolve the agentkit command to use. Prefer the installed `agentkit` script (handles Rich/Typer output correctly), fall back to `python -m agentkit.cli.main`. """ agentkit_path = shutil.which("agentkit") if agentkit_path: return [agentkit_path] return [sys.executable, "-m", "agentkit.cli.main"] def run(self, args: list[str], timeout: int = 30) -> subprocess.CompletedProcess[str]: """Run an agentkit CLI command and return the result. Args: args: CLI arguments, e.g. ["version"] or ["task", "submit", ...] timeout: maximum seconds to wait Returns: CompletedProcess with stdout, stderr, returncode """ cmd = [*self._resolve_agentkit_cmd(), *args] return subprocess.run( cmd, capture_output=True, text=True, timeout=timeout, env=self.env, cwd=self.cwd, ) def run_server_command( self, args: list[str], server_url: str, timeout: int = 30 ) -> subprocess.CompletedProcess[str]: """Run a CLI command that requires --server-url.""" full_args = [*args, "--server-url", server_url] return self.run(full_args, timeout=timeout) @pytest.fixture def cli_runner(tmp_path: Any) -> CLIRunner: """CLI runner with isolated environment.""" env = os.environ.copy() env["AGENTKIT_CONFIG_DIR"] = str(tmp_path / "config") env["AGENTKIT_WS_TIMEOUT"] = "0" # Prevent onboarding prompts env["AGENTKIT_E2E_MODE"] = "1" return CLIRunner(env=env, cwd=str(tmp_path)) @pytest.fixture(scope="session") def cli_runner_session(e2e_server: str) -> CLIRunner: """CLI runner configured to talk to the E2E server.""" env = os.environ.copy() env["AGENTKIT_SERVER_URL"] = e2e_server env["AGENTKIT_API_KEY"] = E2E_API_KEY env["AGENTKIT_WS_TIMEOUT"] = "0" env["AGENTKIT_E2E_MODE"] = "1" return CLIRunner(env=env) # --------------------------------------------------------------------------- # WebSocket helper # --------------------------------------------------------------------------- class WSChatHelper: """Helper for WebSocket chat E2E tests.""" def __init__(self, base_ws_url: str, api_key: str): self.base_ws_url = base_ws_url self.api_key = api_key async def connect_and_chat( self, session_id: str, messages: list[dict[str, str]], timeout: float = 10.0, ) -> list[dict[str, Any]]: """Connect to a chat WebSocket, send messages, collect responses. Args: session_id: chat session ID messages: list of {"type": "message", "content": "..."} timeout: max seconds to wait for final_answer Returns: list of all server-sent messages """ try: import websockets except ImportError: pytest.skip("websockets package not installed") uri = f"{self.base_ws_url}/api/v1/chat/ws/{session_id}?api_key={self.api_key}" received: list[dict[str, Any]] = [] async with websockets.connect(uri) as ws: # Wait for connected event msg = await asyncio.wait_for(ws.recv(), timeout=timeout) data = json.loads(msg) received.append(data) assert data.get("type") == "connected", f"Expected connected, got {data}" # Send user messages for user_msg in messages: await ws.send(json.dumps(user_msg)) # Collect responses until final_answer or error while True: try: raw = await asyncio.wait_for(ws.recv(), timeout=timeout) resp = json.loads(raw) received.append(resp) if resp.get("type") in ("final_answer", "error"): break except asyncio.TimeoutError: received.append({"type": "timeout"}) break return received @pytest.fixture(scope="session") def ws_helper(e2e_server: str) -> WSChatHelper: """WebSocket chat helper for the E2E server.""" ws_url = e2e_server.replace("http://", "ws://").replace("https://", "wss://") return WSChatHelper(base_ws_url=ws_url, api_key=E2E_API_KEY) # --------------------------------------------------------------------------- # Skill / Agent setup helpers # --------------------------------------------------------------------------- def register_skill_via_api( api_client: httpx.Client, name: str, keywords: list[str] | None = None, execution_mode: str = "direct", task_mode: str = "llm_generate", ) -> httpx.Response: """Register a skill via the API for E2E testing.""" config: dict[str, Any] = { "name": name, "agent_type": name, "task_mode": task_mode, "description": f"E2E test skill: {name}", "prompt": { "identity": f"You are a {name} assistant", "instructions": f"Perform {name} tasks", "output_format": "JSON", }, "intent": { "keywords": keywords or [name], "description": f"{name} skill for e2e testing", }, } if execution_mode != "direct": config["execution_mode"] = execution_mode config["max_steps"] = 5 return api_client.post("/api/v1/skills", json={"config": config}) def create_session_via_api(api_client: httpx.Client, agent_name: str = "test") -> str: """Create a chat session and return the session ID.""" resp = api_client.post("/api/v1/chat/sessions", json={"agent_name": agent_name}) assert resp.status_code == 201, f"Failed to create session: {resp.text}" return resp.json()["session_id"] # --------------------------------------------------------------------------- # Metrics Collector fixture # --------------------------------------------------------------------------- @pytest.fixture(scope="session") def metrics_collector(request: pytest.FixtureRequest): """Session-scoped metrics collector for capability analysis.""" from tests.e2e.capability_metrics import MetricsCollector collector: MetricsCollector = request.config._e2e_metrics_collector # type: ignore[attr-defined] return collector