fischer-agentkit/tests/e2e/conftest.py

428 lines
15 KiB
Python

"""E2E test fixtures: server lifecycle, CLI runner, API client, WebSocket helpers.
Design principles:
1. Start a real uvicorn server with MockLLMProvider once per session
2. CLI tests use subprocess to invoke `agentkit` commands (OpenCLI pattern)
3. API tests use httpx against the live server
4. WebSocket tests use the `websockets` library against the live server
5. All tests are idempotent and repeatable
"""
import asyncio
import json
import os
import shutil
import subprocess
import sys
import time
from typing import Any, Generator
import httpx
import pytest
# ---------------------------------------------------------------------------
# Markers
# ---------------------------------------------------------------------------
pytestmark = pytest.mark.integration
def pytest_configure(config: pytest.Config) -> None:
config.addinivalue_line("markers", "e2e: end-to-end backtest (requires server)")
config.addinivalue_line("markers", "e2e_basic: basic function correctness test")
config.addinivalue_line("markers", "e2e_capability: agent intelligence capability test")
# Initialize session-scoped metrics collector
from tests.e2e.capability_metrics import MetricsCollector
config._e2e_metrics_collector = MetricsCollector() # type: ignore[attr-defined]
def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None:
"""After all tests, generate capability analysis report if data was collected."""
collector = session.config._e2e_metrics_collector # type: ignore[attr-defined]
if collector is None or not collector.observations:
return
from tests.e2e.capability_metrics import MetricsAnalyzer, MetricsReporter
analyzer = MetricsAnalyzer()
report = analyzer.generate_report(collector)
# L3 Output Quality Evaluation (optional, requires LLM)
try:
from tests.e2e.test_capability_router_direct import _get_components
router, skill_registry, intent_router = _get_components()
llm_gateway = getattr(router, "_llm_gateway", None)
if llm_gateway is not None:
quality_evals = collector.evaluate_output_quality(llm_gateway)
report = analyzer.generate_report(collector)
# Attach quality evaluations to report
report.output_quality_evaluations = quality_evals
except Exception as e:
print(f"Warning: L3 output quality evaluation skipped: {e}")
output_dir = os.path.join(os.path.dirname(__file__), "..", "..", "test-results", "e2e")
paths = MetricsReporter.save_report(report, output_dir)
# Print summary to console
print("\n" + MetricsReporter.to_text(report))
print(f"\nReport saved to: {paths['json']}")
print(f"Text report: {paths['text']}")
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
E2E_HOST = "127.0.0.1"
E2E_PORT = 18765 # dedicated port to avoid conflict with dev server
E2E_BASE_URL = f"http://{E2E_HOST}:{E2E_PORT}"
E2E_WS_URL = f"ws://{E2E_HOST}:{E2E_PORT}"
E2E_API_KEY = "ak_live_e2e_test_key_000000000000000000000000000000000000000000000000"
# ---------------------------------------------------------------------------
# Mock LLM Provider (deterministic responses for backtest)
# ---------------------------------------------------------------------------
MOCK_LLM_RESPONSES: dict[str, str] = {
# Default / generic
"default": '{"result": "mock response", "content": "This is a mock LLM response for e2e testing."}',
# Content generation
"content_writer": '{"result": "article generated", "content": "AI is transforming industries by enabling automation and intelligent decision-making."}',
# Translation
"translator": '{"result": "translation complete", "content": "This is the translated text."}',
# Summarization
"summarizer": '{"result": "summary generated", "content": "Key points: 1) Topic overview 2) Main findings 3) Conclusion."}',
# Code generation
"coder": '{"result": "code generated", "content": "def hello():\\n print(\\"Hello, World!\\")"}',
# Analysis
"analyst": '{"result": "analysis complete", "content": "The data shows a positive trend with 15% growth."}',
# ReAct tool call
"react_tool_call": '{"thought": "I need to search for information", "action": "web_search", "action_input": {"query": "test"}, "observation": "Search results found"}',
# ReAct final answer
"react_final": '{"thought": "I have enough information", "final_answer": "Based on my analysis, the answer is 42."}',
}
def _build_mock_env(tmp_path: Any) -> dict[str, str]:
"""Build environment variables for a server with MockLLMProvider."""
env = os.environ.copy()
env.update(
{
"AGENTKIT_E2E_MODE": "1",
"AGENTKIT_E2E_MOCK_RESPONSES": json.dumps(MOCK_LLM_RESPONSES),
"AGENTKIT_API_KEY": E2E_API_KEY,
"AGENTKIT_WS_TIMEOUT": "0",
# Disable real LLM calls
"OPENAI_API_KEY": "",
"ANTHROPIC_API_KEY": "",
"DEEPSEEK_API_KEY": "",
}
)
return env
# ---------------------------------------------------------------------------
# Server lifecycle fixture
# ---------------------------------------------------------------------------
@pytest.fixture(scope="session")
def e2e_server(tmp_path_factory: pytest.TempPathFactory) -> Generator[str, None, None]:
"""Start a real AgentKit server for the entire E2E session.
Returns the base URL (e.g. http://127.0.0.1:18765).
The server uses MockLLMProvider so no real LLM calls are made.
"""
tmp_path = tmp_path_factory.mktemp("e2e_server")
# Generate a minimal agentkit.yaml for the test server
config_dir = tmp_path / "config"
config_dir.mkdir()
config_file = config_dir / "agentkit.yaml"
import yaml
config_file.write_text(
yaml.dump(
{
"server": {"host": E2E_HOST, "port": E2E_PORT},
"llm": {"default_provider": "mock", "providers": {"mock": {"type": "mock"}}},
"auth": {"enabled": True, "api_keys": [E2E_API_KEY]},
}
)
)
env = _build_mock_env(tmp_path)
env["AGENTKIT_CONFIG"] = str(config_file)
# Start server as subprocess
proc = subprocess.Popen(
[
sys.executable,
"-m",
"agentkit.cli.main",
"serve",
"--host",
E2E_HOST,
"--port",
str(E2E_PORT),
],
env=env,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=str(tmp_path),
)
# Wait for server to be ready (max 30s)
base_url = E2E_BASE_URL
deadline = time.monotonic() + 30
ready = False
while time.monotonic() < deadline:
try:
resp = httpx.get(f"{base_url}/api/v1/health", timeout=2)
if resp.status_code == 200:
ready = True
break
except httpx.ConnectError:
pass
time.sleep(0.5)
if not ready:
proc.terminate()
stdout, stderr = proc.communicate(timeout=5)
pytest.fail(
f"E2E server failed to start within 30s.\n"
f"stdout: {stdout.decode()[:2000]}\n"
f"stderr: {stderr.decode()[:2000]}"
)
yield base_url
# Teardown
proc.terminate()
try:
proc.wait(timeout=10)
except subprocess.TimeoutExpired:
proc.kill()
# ---------------------------------------------------------------------------
# API client fixture
# ---------------------------------------------------------------------------
@pytest.fixture(scope="session")
def api_client(e2e_server: str) -> httpx.Client:
"""Synchronous httpx client configured for the E2E server."""
return httpx.Client(
base_url=e2e_server,
headers={"X-API-Key": E2E_API_KEY, "Content-Type": "application/json"},
timeout=30,
)
# ---------------------------------------------------------------------------
# CLI runner (subprocess-based, OpenCLI pattern)
# ---------------------------------------------------------------------------
class CLIRunner:
"""Simulate user CLI operations via subprocess.
This is the 'OpenCLI' pattern: invoke the real `agentkit` binary
as a subprocess and capture its output, exactly as a user would.
"""
def __init__(self, env: dict[str, str] | None = None, cwd: str | None = None):
self.env = env or os.environ.copy()
self.cwd = cwd
def _resolve_agentkit_cmd(self) -> list[str]:
"""Resolve the agentkit command to use.
Prefer the installed `agentkit` script (handles Rich/Typer output correctly),
fall back to `python -m agentkit.cli.main`.
"""
agentkit_path = shutil.which("agentkit")
if agentkit_path:
return [agentkit_path]
return [sys.executable, "-m", "agentkit.cli.main"]
def run(self, args: list[str], timeout: int = 30) -> subprocess.CompletedProcess[str]:
"""Run an agentkit CLI command and return the result.
Args:
args: CLI arguments, e.g. ["version"] or ["task", "submit", ...]
timeout: maximum seconds to wait
Returns:
CompletedProcess with stdout, stderr, returncode
"""
cmd = [*self._resolve_agentkit_cmd(), *args]
return subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=timeout,
env=self.env,
cwd=self.cwd,
)
def run_server_command(
self, args: list[str], server_url: str, timeout: int = 30
) -> subprocess.CompletedProcess[str]:
"""Run a CLI command that requires --server-url."""
full_args = [*args, "--server-url", server_url]
return self.run(full_args, timeout=timeout)
@pytest.fixture
def cli_runner(tmp_path: Any) -> CLIRunner:
"""CLI runner with isolated environment."""
env = os.environ.copy()
env["AGENTKIT_CONFIG_DIR"] = str(tmp_path / "config")
env["AGENTKIT_WS_TIMEOUT"] = "0"
# Prevent onboarding prompts
env["AGENTKIT_E2E_MODE"] = "1"
return CLIRunner(env=env, cwd=str(tmp_path))
@pytest.fixture(scope="session")
def cli_runner_session(e2e_server: str) -> CLIRunner:
"""CLI runner configured to talk to the E2E server."""
env = os.environ.copy()
env["AGENTKIT_SERVER_URL"] = e2e_server
env["AGENTKIT_API_KEY"] = E2E_API_KEY
env["AGENTKIT_WS_TIMEOUT"] = "0"
env["AGENTKIT_E2E_MODE"] = "1"
return CLIRunner(env=env)
# ---------------------------------------------------------------------------
# WebSocket helper
# ---------------------------------------------------------------------------
class WSChatHelper:
"""Helper for WebSocket chat E2E tests."""
def __init__(self, base_ws_url: str, api_key: str):
self.base_ws_url = base_ws_url
self.api_key = api_key
async def connect_and_chat(
self,
session_id: str,
messages: list[dict[str, str]],
timeout: float = 10.0,
) -> list[dict[str, Any]]:
"""Connect to a chat WebSocket, send messages, collect responses.
Args:
session_id: chat session ID
messages: list of {"type": "message", "content": "..."}
timeout: max seconds to wait for final_answer
Returns:
list of all server-sent messages
"""
try:
import websockets
except ImportError:
pytest.skip("websockets package not installed")
uri = f"{self.base_ws_url}/api/v1/chat/ws/{session_id}?api_key={self.api_key}"
received: list[dict[str, Any]] = []
async with websockets.connect(uri) as ws:
# Wait for connected event
msg = await asyncio.wait_for(ws.recv(), timeout=timeout)
data = json.loads(msg)
received.append(data)
assert data.get("type") == "connected", f"Expected connected, got {data}"
# Send user messages
for user_msg in messages:
await ws.send(json.dumps(user_msg))
# Collect responses until final_answer or error
while True:
try:
raw = await asyncio.wait_for(ws.recv(), timeout=timeout)
resp = json.loads(raw)
received.append(resp)
if resp.get("type") in ("final_answer", "error"):
break
except asyncio.TimeoutError:
received.append({"type": "timeout"})
break
return received
@pytest.fixture(scope="session")
def ws_helper(e2e_server: str) -> WSChatHelper:
"""WebSocket chat helper for the E2E server."""
ws_url = e2e_server.replace("http://", "ws://").replace("https://", "wss://")
return WSChatHelper(base_ws_url=ws_url, api_key=E2E_API_KEY)
# ---------------------------------------------------------------------------
# Skill / Agent setup helpers
# ---------------------------------------------------------------------------
def register_skill_via_api(
api_client: httpx.Client,
name: str,
keywords: list[str] | None = None,
execution_mode: str = "direct",
task_mode: str = "llm_generate",
) -> httpx.Response:
"""Register a skill via the API for E2E testing."""
config: dict[str, Any] = {
"name": name,
"agent_type": name,
"task_mode": task_mode,
"description": f"E2E test skill: {name}",
"prompt": {
"identity": f"You are a {name} assistant",
"instructions": f"Perform {name} tasks",
"output_format": "JSON",
},
"intent": {
"keywords": keywords or [name],
"description": f"{name} skill for e2e testing",
},
}
if execution_mode != "direct":
config["execution_mode"] = execution_mode
config["max_steps"] = 5
return api_client.post("/api/v1/skills", json={"config": config})
def create_session_via_api(api_client: httpx.Client, agent_name: str = "test") -> str:
"""Create a chat session and return the session ID."""
resp = api_client.post("/api/v1/chat/sessions", json={"agent_name": agent_name})
assert resp.status_code == 201, f"Failed to create session: {resp.text}"
return resp.json()["session_id"]
# ---------------------------------------------------------------------------
# Metrics Collector fixture
# ---------------------------------------------------------------------------
@pytest.fixture(scope="session")
def metrics_collector(request: pytest.FixtureRequest):
"""Session-scoped metrics collector for capability analysis."""
from tests.e2e.capability_metrics import MetricsCollector
collector: MetricsCollector = request.config._e2e_metrics_collector # type: ignore[attr-defined]
return collector