428 lines
15 KiB
Python
428 lines
15 KiB
Python
"""E2E test fixtures: server lifecycle, CLI runner, API client, WebSocket helpers.
|
|
|
|
Design principles:
|
|
1. Start a real uvicorn server with MockLLMProvider once per session
|
|
2. CLI tests use subprocess to invoke `agentkit` commands (OpenCLI pattern)
|
|
3. API tests use httpx against the live server
|
|
4. WebSocket tests use the `websockets` library against the live server
|
|
5. All tests are idempotent and repeatable
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
from typing import Any, Generator
|
|
|
|
import httpx
|
|
import pytest
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Markers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
pytestmark = pytest.mark.integration
|
|
|
|
|
|
def pytest_configure(config: pytest.Config) -> None:
|
|
config.addinivalue_line("markers", "e2e: end-to-end backtest (requires server)")
|
|
config.addinivalue_line("markers", "e2e_basic: basic function correctness test")
|
|
config.addinivalue_line("markers", "e2e_capability: agent intelligence capability test")
|
|
# Initialize session-scoped metrics collector
|
|
from tests.e2e.capability_metrics import MetricsCollector
|
|
|
|
config._e2e_metrics_collector = MetricsCollector() # type: ignore[attr-defined]
|
|
|
|
|
|
def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None:
|
|
"""After all tests, generate capability analysis report if data was collected."""
|
|
collector = session.config._e2e_metrics_collector # type: ignore[attr-defined]
|
|
if collector is None or not collector.observations:
|
|
return
|
|
|
|
from tests.e2e.capability_metrics import MetricsAnalyzer, MetricsReporter
|
|
|
|
analyzer = MetricsAnalyzer()
|
|
report = analyzer.generate_report(collector)
|
|
|
|
# L3 Output Quality Evaluation (optional, requires LLM)
|
|
try:
|
|
from tests.e2e.test_capability_router_direct import _get_components
|
|
|
|
router, skill_registry, intent_router = _get_components()
|
|
llm_gateway = getattr(router, "_llm_gateway", None)
|
|
if llm_gateway is not None:
|
|
quality_evals = collector.evaluate_output_quality(llm_gateway)
|
|
report = analyzer.generate_report(collector)
|
|
# Attach quality evaluations to report
|
|
report.output_quality_evaluations = quality_evals
|
|
except Exception as e:
|
|
print(f"Warning: L3 output quality evaluation skipped: {e}")
|
|
|
|
output_dir = os.path.join(os.path.dirname(__file__), "..", "..", "test-results", "e2e")
|
|
paths = MetricsReporter.save_report(report, output_dir)
|
|
|
|
# Print summary to console
|
|
print("\n" + MetricsReporter.to_text(report))
|
|
print(f"\nReport saved to: {paths['json']}")
|
|
print(f"Text report: {paths['text']}")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Constants
|
|
# ---------------------------------------------------------------------------
|
|
|
|
E2E_HOST = "127.0.0.1"
|
|
E2E_PORT = 18765 # dedicated port to avoid conflict with dev server
|
|
E2E_BASE_URL = f"http://{E2E_HOST}:{E2E_PORT}"
|
|
E2E_WS_URL = f"ws://{E2E_HOST}:{E2E_PORT}"
|
|
E2E_API_KEY = "ak_live_e2e_test_key_000000000000000000000000000000000000000000000000"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Mock LLM Provider (deterministic responses for backtest)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
MOCK_LLM_RESPONSES: dict[str, str] = {
|
|
# Default / generic
|
|
"default": '{"result": "mock response", "content": "This is a mock LLM response for e2e testing."}',
|
|
# Content generation
|
|
"content_writer": '{"result": "article generated", "content": "AI is transforming industries by enabling automation and intelligent decision-making."}',
|
|
# Translation
|
|
"translator": '{"result": "translation complete", "content": "This is the translated text."}',
|
|
# Summarization
|
|
"summarizer": '{"result": "summary generated", "content": "Key points: 1) Topic overview 2) Main findings 3) Conclusion."}',
|
|
# Code generation
|
|
"coder": '{"result": "code generated", "content": "def hello():\\n print(\\"Hello, World!\\")"}',
|
|
# Analysis
|
|
"analyst": '{"result": "analysis complete", "content": "The data shows a positive trend with 15% growth."}',
|
|
# ReAct tool call
|
|
"react_tool_call": '{"thought": "I need to search for information", "action": "web_search", "action_input": {"query": "test"}, "observation": "Search results found"}',
|
|
# ReAct final answer
|
|
"react_final": '{"thought": "I have enough information", "final_answer": "Based on my analysis, the answer is 42."}',
|
|
}
|
|
|
|
|
|
def _build_mock_env(tmp_path: Any) -> dict[str, str]:
|
|
"""Build environment variables for a server with MockLLMProvider."""
|
|
env = os.environ.copy()
|
|
env.update(
|
|
{
|
|
"AGENTKIT_E2E_MODE": "1",
|
|
"AGENTKIT_E2E_MOCK_RESPONSES": json.dumps(MOCK_LLM_RESPONSES),
|
|
"AGENTKIT_API_KEY": E2E_API_KEY,
|
|
"AGENTKIT_WS_TIMEOUT": "0",
|
|
# Disable real LLM calls
|
|
"OPENAI_API_KEY": "",
|
|
"ANTHROPIC_API_KEY": "",
|
|
"DEEPSEEK_API_KEY": "",
|
|
}
|
|
)
|
|
return env
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Server lifecycle fixture
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def e2e_server(tmp_path_factory: pytest.TempPathFactory) -> Generator[str, None, None]:
|
|
"""Start a real AgentKit server for the entire E2E session.
|
|
|
|
Returns the base URL (e.g. http://127.0.0.1:18765).
|
|
The server uses MockLLMProvider so no real LLM calls are made.
|
|
"""
|
|
tmp_path = tmp_path_factory.mktemp("e2e_server")
|
|
|
|
# Generate a minimal agentkit.yaml for the test server
|
|
config_dir = tmp_path / "config"
|
|
config_dir.mkdir()
|
|
config_file = config_dir / "agentkit.yaml"
|
|
|
|
import yaml
|
|
|
|
config_file.write_text(
|
|
yaml.dump(
|
|
{
|
|
"server": {"host": E2E_HOST, "port": E2E_PORT},
|
|
"llm": {"default_provider": "mock", "providers": {"mock": {"type": "mock"}}},
|
|
"auth": {"enabled": True, "api_keys": [E2E_API_KEY]},
|
|
}
|
|
)
|
|
)
|
|
|
|
env = _build_mock_env(tmp_path)
|
|
env["AGENTKIT_CONFIG"] = str(config_file)
|
|
|
|
# Start server as subprocess
|
|
proc = subprocess.Popen(
|
|
[
|
|
sys.executable,
|
|
"-m",
|
|
"agentkit.cli.main",
|
|
"serve",
|
|
"--host",
|
|
E2E_HOST,
|
|
"--port",
|
|
str(E2E_PORT),
|
|
],
|
|
env=env,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
cwd=str(tmp_path),
|
|
)
|
|
|
|
# Wait for server to be ready (max 30s)
|
|
base_url = E2E_BASE_URL
|
|
deadline = time.monotonic() + 30
|
|
ready = False
|
|
while time.monotonic() < deadline:
|
|
try:
|
|
resp = httpx.get(f"{base_url}/api/v1/health", timeout=2)
|
|
if resp.status_code == 200:
|
|
ready = True
|
|
break
|
|
except httpx.ConnectError:
|
|
pass
|
|
time.sleep(0.5)
|
|
|
|
if not ready:
|
|
proc.terminate()
|
|
stdout, stderr = proc.communicate(timeout=5)
|
|
pytest.fail(
|
|
f"E2E server failed to start within 30s.\n"
|
|
f"stdout: {stdout.decode()[:2000]}\n"
|
|
f"stderr: {stderr.decode()[:2000]}"
|
|
)
|
|
|
|
yield base_url
|
|
|
|
# Teardown
|
|
proc.terminate()
|
|
try:
|
|
proc.wait(timeout=10)
|
|
except subprocess.TimeoutExpired:
|
|
proc.kill()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# API client fixture
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def api_client(e2e_server: str) -> httpx.Client:
|
|
"""Synchronous httpx client configured for the E2E server."""
|
|
return httpx.Client(
|
|
base_url=e2e_server,
|
|
headers={"X-API-Key": E2E_API_KEY, "Content-Type": "application/json"},
|
|
timeout=30,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CLI runner (subprocess-based, OpenCLI pattern)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class CLIRunner:
|
|
"""Simulate user CLI operations via subprocess.
|
|
|
|
This is the 'OpenCLI' pattern: invoke the real `agentkit` binary
|
|
as a subprocess and capture its output, exactly as a user would.
|
|
"""
|
|
|
|
def __init__(self, env: dict[str, str] | None = None, cwd: str | None = None):
|
|
self.env = env or os.environ.copy()
|
|
self.cwd = cwd
|
|
|
|
def _resolve_agentkit_cmd(self) -> list[str]:
|
|
"""Resolve the agentkit command to use.
|
|
|
|
Prefer the installed `agentkit` script (handles Rich/Typer output correctly),
|
|
fall back to `python -m agentkit.cli.main`.
|
|
"""
|
|
agentkit_path = shutil.which("agentkit")
|
|
if agentkit_path:
|
|
return [agentkit_path]
|
|
return [sys.executable, "-m", "agentkit.cli.main"]
|
|
|
|
def run(self, args: list[str], timeout: int = 30) -> subprocess.CompletedProcess[str]:
|
|
"""Run an agentkit CLI command and return the result.
|
|
|
|
Args:
|
|
args: CLI arguments, e.g. ["version"] or ["task", "submit", ...]
|
|
timeout: maximum seconds to wait
|
|
|
|
Returns:
|
|
CompletedProcess with stdout, stderr, returncode
|
|
"""
|
|
cmd = [*self._resolve_agentkit_cmd(), *args]
|
|
return subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=timeout,
|
|
env=self.env,
|
|
cwd=self.cwd,
|
|
)
|
|
|
|
def run_server_command(
|
|
self, args: list[str], server_url: str, timeout: int = 30
|
|
) -> subprocess.CompletedProcess[str]:
|
|
"""Run a CLI command that requires --server-url."""
|
|
full_args = [*args, "--server-url", server_url]
|
|
return self.run(full_args, timeout=timeout)
|
|
|
|
|
|
@pytest.fixture
|
|
def cli_runner(tmp_path: Any) -> CLIRunner:
|
|
"""CLI runner with isolated environment."""
|
|
env = os.environ.copy()
|
|
env["AGENTKIT_CONFIG_DIR"] = str(tmp_path / "config")
|
|
env["AGENTKIT_WS_TIMEOUT"] = "0"
|
|
# Prevent onboarding prompts
|
|
env["AGENTKIT_E2E_MODE"] = "1"
|
|
return CLIRunner(env=env, cwd=str(tmp_path))
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def cli_runner_session(e2e_server: str) -> CLIRunner:
|
|
"""CLI runner configured to talk to the E2E server."""
|
|
env = os.environ.copy()
|
|
env["AGENTKIT_SERVER_URL"] = e2e_server
|
|
env["AGENTKIT_API_KEY"] = E2E_API_KEY
|
|
env["AGENTKIT_WS_TIMEOUT"] = "0"
|
|
env["AGENTKIT_E2E_MODE"] = "1"
|
|
return CLIRunner(env=env)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# WebSocket helper
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class WSChatHelper:
|
|
"""Helper for WebSocket chat E2E tests."""
|
|
|
|
def __init__(self, base_ws_url: str, api_key: str):
|
|
self.base_ws_url = base_ws_url
|
|
self.api_key = api_key
|
|
|
|
async def connect_and_chat(
|
|
self,
|
|
session_id: str,
|
|
messages: list[dict[str, str]],
|
|
timeout: float = 10.0,
|
|
) -> list[dict[str, Any]]:
|
|
"""Connect to a chat WebSocket, send messages, collect responses.
|
|
|
|
Args:
|
|
session_id: chat session ID
|
|
messages: list of {"type": "message", "content": "..."}
|
|
timeout: max seconds to wait for final_answer
|
|
|
|
Returns:
|
|
list of all server-sent messages
|
|
"""
|
|
try:
|
|
import websockets
|
|
except ImportError:
|
|
pytest.skip("websockets package not installed")
|
|
|
|
uri = f"{self.base_ws_url}/api/v1/chat/ws/{session_id}?api_key={self.api_key}"
|
|
received: list[dict[str, Any]] = []
|
|
|
|
async with websockets.connect(uri) as ws:
|
|
# Wait for connected event
|
|
msg = await asyncio.wait_for(ws.recv(), timeout=timeout)
|
|
data = json.loads(msg)
|
|
received.append(data)
|
|
assert data.get("type") == "connected", f"Expected connected, got {data}"
|
|
|
|
# Send user messages
|
|
for user_msg in messages:
|
|
await ws.send(json.dumps(user_msg))
|
|
|
|
# Collect responses until final_answer or error
|
|
while True:
|
|
try:
|
|
raw = await asyncio.wait_for(ws.recv(), timeout=timeout)
|
|
resp = json.loads(raw)
|
|
received.append(resp)
|
|
|
|
if resp.get("type") in ("final_answer", "error"):
|
|
break
|
|
except asyncio.TimeoutError:
|
|
received.append({"type": "timeout"})
|
|
break
|
|
|
|
return received
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def ws_helper(e2e_server: str) -> WSChatHelper:
|
|
"""WebSocket chat helper for the E2E server."""
|
|
ws_url = e2e_server.replace("http://", "ws://").replace("https://", "wss://")
|
|
return WSChatHelper(base_ws_url=ws_url, api_key=E2E_API_KEY)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Skill / Agent setup helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def register_skill_via_api(
|
|
api_client: httpx.Client,
|
|
name: str,
|
|
keywords: list[str] | None = None,
|
|
execution_mode: str = "direct",
|
|
task_mode: str = "llm_generate",
|
|
) -> httpx.Response:
|
|
"""Register a skill via the API for E2E testing."""
|
|
config: dict[str, Any] = {
|
|
"name": name,
|
|
"agent_type": name,
|
|
"task_mode": task_mode,
|
|
"description": f"E2E test skill: {name}",
|
|
"prompt": {
|
|
"identity": f"You are a {name} assistant",
|
|
"instructions": f"Perform {name} tasks",
|
|
"output_format": "JSON",
|
|
},
|
|
"intent": {
|
|
"keywords": keywords or [name],
|
|
"description": f"{name} skill for e2e testing",
|
|
},
|
|
}
|
|
if execution_mode != "direct":
|
|
config["execution_mode"] = execution_mode
|
|
config["max_steps"] = 5
|
|
|
|
return api_client.post("/api/v1/skills", json={"config": config})
|
|
|
|
|
|
def create_session_via_api(api_client: httpx.Client, agent_name: str = "test") -> str:
|
|
"""Create a chat session and return the session ID."""
|
|
resp = api_client.post("/api/v1/chat/sessions", json={"agent_name": agent_name})
|
|
assert resp.status_code == 201, f"Failed to create session: {resp.text}"
|
|
return resp.json()["session_id"]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Metrics Collector fixture
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def metrics_collector(request: pytest.FixtureRequest):
|
|
"""Session-scoped metrics collector for capability analysis."""
|
|
from tests.e2e.capability_metrics import MetricsCollector
|
|
|
|
collector: MetricsCollector = request.config._e2e_metrics_collector # type: ignore[attr-defined]
|
|
return collector
|