fischer-agentkit/tests/unit/server/test_evolution_dashboard.py

519 lines
18 KiB
Python

"""Tests for Evolution Dashboard API routes"""
from __future__ import annotations
import json
from datetime import datetime, timezone
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from fastapi.testclient import TestClient
from agentkit.llm.gateway import LLMGateway
from agentkit.server.app import create_app
from agentkit.server.routes.evolution_dashboard import (
DashboardExperience,
DashboardOptimization,
_experiences,
_optimizations,
)
from agentkit.skills.registry import SkillRegistry
from agentkit.tools.registry import ToolRegistry
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def mock_llm_gateway():
return LLMGateway()
@pytest.fixture
def skill_registry():
return SkillRegistry()
@pytest.fixture
def tool_registry():
return ToolRegistry()
@pytest.fixture
def app(mock_llm_gateway, skill_registry, tool_registry):
return create_app(
llm_gateway=mock_llm_gateway,
skill_registry=skill_registry,
tool_registry=tool_registry,
)
@pytest.fixture
def client(app):
return TestClient(app)
@pytest.fixture(autouse=True)
def clear_in_memory_stores():
"""Clear in-memory stores before each test"""
_experiences.clear()
_optimizations.clear()
yield
_experiences.clear()
_optimizations.clear()
def _add_experience(
task_type: str = "code_review",
goal: str = "Review PR #123",
outcome: str = "success",
duration: float = 30.0,
):
"""Helper to add an experience to the in-memory store"""
exp = DashboardExperience(
id=f"exp-{len(_experiences)}",
task_type=task_type,
goal=goal,
outcome=outcome,
duration_seconds=duration,
created_at=datetime.now(timezone.utc),
)
_experiences.append(exp)
return exp
def _add_optimization(
task_type: str = "code_review",
previous_path: list[str] | None = None,
current_path: list[str] | None = None,
improvement: float = 0.15,
):
"""Helper to add an optimization to the in-memory store"""
opt = DashboardOptimization(
id=f"opt-{len(_optimizations)}",
task_type=task_type,
previous_path=previous_path or ["step1", "step2", "step3"],
current_path=current_path or ["step1", "step3"],
improvement=improvement,
updated_at=datetime.now(timezone.utc),
)
_optimizations.append(opt)
return opt
# ---------------------------------------------------------------------------
# GET /evolution-dashboard/experiences
# ---------------------------------------------------------------------------
class TestListExperiences:
def test_list_experiences_empty(self, client):
response = client.get("/api/v1/evolution-dashboard/experiences")
assert response.status_code == 200
data = response.json()
assert "experiences" in data
assert "total" in data
assert data["total"] == 0
assert data["experiences"] == []
def test_list_experiences_with_data(self, client):
_add_experience(goal="Review PR #1", outcome="success")
_add_experience(goal="Review PR #2", outcome="failure")
response = client.get("/api/v1/evolution-dashboard/experiences")
assert response.status_code == 200
data = response.json()
assert data["total"] == 2
assert len(data["experiences"]) == 2
def test_list_experiences_filter_by_task_type(self, client):
_add_experience(task_type="code_review", goal="Review PR")
_add_experience(task_type="data_analysis", goal="Analyze data")
response = client.get(
"/api/v1/evolution-dashboard/experiences?task_type=code_review"
)
assert response.status_code == 200
data = response.json()
assert data["total"] == 1
assert data["experiences"][0]["task_type"] == "code_review"
def test_list_experiences_filter_by_outcome(self, client):
_add_experience(outcome="success", goal="Success task")
_add_experience(outcome="failure", goal="Failed task")
response = client.get(
"/api/v1/evolution-dashboard/experiences?outcome=success"
)
assert response.status_code == 200
data = response.json()
assert data["total"] == 1
assert data["experiences"][0]["outcome"] == "success"
def test_list_experiences_limit(self, client):
for i in range(10):
_add_experience(goal=f"Task {i}")
response = client.get(
"/api/v1/evolution-dashboard/experiences?limit=3"
)
assert response.status_code == 200
data = response.json()
assert len(data["experiences"]) == 3
def test_experience_structure(self, client):
_add_experience()
response = client.get("/api/v1/evolution-dashboard/experiences")
data = response.json()
exp = data["experiences"][0]
assert "id" in exp
assert "task_type" in exp
assert "goal" in exp
assert "outcome" in exp
assert "duration" in exp
assert "created_at" in exp
# ---------------------------------------------------------------------------
# GET /evolution-dashboard/metrics
# ---------------------------------------------------------------------------
class TestGetMetrics:
def test_metrics_empty(self, client):
response = client.get("/api/v1/evolution-dashboard/metrics")
assert response.status_code == 200
data = response.json()
assert "metrics" in data
assert "trends" in data
metrics = data["metrics"]
assert metrics["total_tasks"] == 0
assert metrics["success_rate"] == 0.0
def test_metrics_with_data(self, client):
_add_experience(outcome="success", duration=10.0)
_add_experience(outcome="success", duration=20.0)
_add_experience(outcome="failure", duration=30.0)
response = client.get("/api/v1/evolution-dashboard/metrics")
assert response.status_code == 200
data = response.json()
metrics = data["metrics"]
assert metrics["total_tasks"] == 3
assert metrics["success_rate"] == pytest.approx(2 / 3, abs=0.01)
assert metrics["avg_duration"] == pytest.approx(20.0, abs=0.1)
def test_metrics_period_7d(self, client):
response = client.get("/api/v1/evolution-dashboard/metrics?period=7d")
assert response.status_code == 200
data = response.json()
assert len(data["trends"]) == 7
def test_metrics_period_30d(self, client):
response = client.get("/api/v1/evolution-dashboard/metrics?period=30d")
assert response.status_code == 200
data = response.json()
assert len(data["trends"]) == 30
def test_metrics_trends_structure(self, client):
response = client.get("/api/v1/evolution-dashboard/metrics?period=7d")
data = response.json()
for trend in data["trends"]:
assert "date" in trend
assert "success_rate" in trend
assert "avg_duration" in trend
assert "retry_rate" in trend
def test_metrics_period_all(self, client):
response = client.get("/api/v1/evolution-dashboard/metrics?period=all")
assert response.status_code == 200
data = response.json()
assert len(data["trends"]) == 30
# ---------------------------------------------------------------------------
# GET /evolution-dashboard/pitfalls
# ---------------------------------------------------------------------------
class TestCheckPitfalls:
def test_pitfalls_no_detector(self, client):
"""When pitfall_detector is not configured, return empty warnings"""
response = client.get(
"/api/v1/evolution-dashboard/pitfalls?task_type=code_review&steps=step1,step2"
)
assert response.status_code == 200
data = response.json()
assert "warnings" in data
assert data["warnings"] == []
def test_pitfalls_no_steps(self, client):
"""When no steps provided, return empty warnings"""
response = client.get(
"/api/v1/evolution-dashboard/pitfalls?task_type=code_review"
)
assert response.status_code == 200
data = response.json()
assert data["warnings"] == []
def test_pitfalls_with_steps(self, client):
"""When steps are provided but no detector, return empty warnings"""
response = client.get(
"/api/v1/evolution-dashboard/pitfalls?task_type=code_review&steps=analyze,review,approve"
)
assert response.status_code == 200
data = response.json()
assert "warnings" in data
def test_pitfalls_missing_task_type(self, client):
"""When task_type is missing, should return 422"""
response = client.get("/api/v1/evolution-dashboard/pitfalls")
assert response.status_code == 422
# ---------------------------------------------------------------------------
# GET /evolution-dashboard/path-optimizations
# ---------------------------------------------------------------------------
class TestListPathOptimizations:
def test_optimizations_empty(self, client):
response = client.get("/api/v1/evolution-dashboard/path-optimizations")
assert response.status_code == 200
data = response.json()
assert "optimizations" in data
assert data["optimizations"] == []
def test_optimizations_with_data(self, client):
_add_optimization(
task_type="code_review",
previous_path=["analyze", "review", "approve"],
current_path=["analyze", "approve"],
improvement=0.2,
)
response = client.get("/api/v1/evolution-dashboard/path-optimizations")
assert response.status_code == 200
data = response.json()
assert len(data["optimizations"]) == 1
opt = data["optimizations"][0]
assert opt["task_type"] == "code_review"
assert opt["improvement"] == 0.2
assert opt["previous_path"] == ["analyze", "review", "approve"]
assert opt["current_path"] == ["analyze", "approve"]
def test_optimizations_filter_by_task_type(self, client):
_add_optimization(task_type="code_review")
_add_optimization(task_type="data_analysis")
response = client.get(
"/api/v1/evolution-dashboard/path-optimizations?task_type=code_review"
)
assert response.status_code == 200
data = response.json()
assert len(data["optimizations"]) == 1
assert data["optimizations"][0]["task_type"] == "code_review"
def test_optimizations_limit(self, client):
for i in range(10):
_add_optimization(task_type=f"task_{i}")
response = client.get(
"/api/v1/evolution-dashboard/path-optimizations?limit=3"
)
assert response.status_code == 200
data = response.json()
assert len(data["optimizations"]) <= 3
def test_optimization_structure(self, client):
_add_optimization()
response = client.get("/api/v1/evolution-dashboard/path-optimizations")
data = response.json()
opt = data["optimizations"][0]
assert "id" in opt
assert "task_type" in opt
assert "previous_path" in opt
assert "current_path" in opt
assert "improvement" in opt
assert "updated_at" in opt
# ---------------------------------------------------------------------------
# WebSocket /evolution-dashboard/ws
# ---------------------------------------------------------------------------
class TestEvolutionDashboardWebSocket:
def test_ws_connect(self, client):
with client.websocket_connect("/api/v1/evolution-dashboard/ws") as ws:
data = ws.receive_json()
assert data["type"] == "connected"
def test_ws_ping_pong(self, client):
with client.websocket_connect("/api/v1/evolution-dashboard/ws") as ws:
# Receive connected message
connected = ws.receive_json()
assert connected["type"] == "connected"
# Send ping
ws.send_json({"type": "ping"})
pong = ws.receive_json()
assert pong["type"] == "pong"
def test_ws_subscribe(self, client):
with client.websocket_connect("/api/v1/evolution-dashboard/ws") as ws:
connected = ws.receive_json()
assert connected["type"] == "connected"
ws.send_json({"type": "subscribe", "channels": ["experiences"]})
sub = ws.receive_json()
assert sub["type"] == "subscribed"
# ---------------------------------------------------------------------------
# With experience_store configured
# ---------------------------------------------------------------------------
class TestWithExperienceStore:
def test_experiences_with_store(self, app, client):
"""Test that experiences endpoint works when experience_store is configured"""
mock_store = AsyncMock()
mock_store.search = AsyncMock(return_value=[])
app.state.experience_store = mock_store
response = client.get("/api/v1/evolution-dashboard/experiences")
assert response.status_code == 200
data = response.json()
assert "experiences" in data
# Clean up
app.state.experience_store = None
def test_metrics_with_store(self, app, client):
"""Test that metrics endpoint works when experience_store is configured"""
from agentkit.evolution.experience_schema import EvolutionMetrics
mock_store = AsyncMock()
mock_metrics = EvolutionMetrics(
task_type="code_review",
time_window="7d",
completion_rate=0.85,
avg_duration=25.0,
retry_rate=0.1,
sample_count=100,
window_start=datetime.now(timezone.utc),
window_end=datetime.now(timezone.utc),
)
mock_store.get_metrics = AsyncMock(return_value=[mock_metrics])
app.state.experience_store = mock_store
response = client.get("/api/v1/evolution-dashboard/metrics?period=7d")
assert response.status_code == 200
data = response.json()
assert data["metrics"]["total_tasks"] == 100
assert data["metrics"]["success_rate"] == 0.85
# Clean up
app.state.experience_store = None
def test_pitfalls_with_detector(self, app, client):
"""Test that pitfalls endpoint works when pitfall_detector is configured"""
from agentkit.evolution.pitfall_detector import PitfallWarning, WarningLevel
mock_detector = AsyncMock()
mock_detector.check_pitfalls = AsyncMock(
return_value=[
PitfallWarning(
step_name="deploy",
warning_level=WarningLevel.HIGH,
failure_rate=0.6,
historical_failures=["timeout", "config error"],
suggestion="该步骤历史失败率高达 60%,建议重点关注",
)
]
)
app.state.pitfall_detector = mock_detector
response = client.get(
"/api/v1/evolution-dashboard/pitfalls?task_type=deployment&steps=build,deploy,verify"
)
assert response.status_code == 200
data = response.json()
assert len(data["warnings"]) == 1
assert data["warnings"][0]["step"] == "deploy"
assert data["warnings"][0]["risk_level"] == "high"
assert data["warnings"][0]["historical_failure_rate"] == 0.6
# Clean up
app.state.pitfall_detector = None
def test_optimizations_with_optimizer(self, app, client):
"""Test that path-optimizations endpoint works when path_optimizer is configured"""
from agentkit.evolution.path_optimizer import ExecutionPath
mock_optimizer = MagicMock()
recommended_path = ExecutionPath(
path_id="path-001",
task_type="code_review",
steps=["analyze", "approve"],
total_duration=15.0,
success_rate=0.9,
sample_count=10,
is_recommended=True,
created_at=datetime.now(timezone.utc),
)
mock_optimizer.get_recommended_path = MagicMock(return_value=recommended_path)
mock_optimizer._recommended_paths = {"code_review": recommended_path}
app.state.path_optimizer = mock_optimizer
response = client.get("/api/v1/evolution-dashboard/path-optimizations")
assert response.status_code == 200
data = response.json()
assert len(data["optimizations"]) >= 1
# Clean up
app.state.path_optimizer = None
# ---------------------------------------------------------------------------
# Without experience_store configured (graceful degradation)
# ---------------------------------------------------------------------------
class TestWithoutExperienceStore:
def test_experiences_graceful(self, client):
"""When experience_store is None, should still return valid response"""
response = client.get("/api/v1/evolution-dashboard/experiences")
assert response.status_code == 200
assert "experiences" in response.json()
def test_metrics_graceful(self, client):
"""When experience_store is None, should still return valid response"""
response = client.get("/api/v1/evolution-dashboard/metrics")
assert response.status_code == 200
assert "metrics" in response.json()
assert "trends" in response.json()
def test_pitfalls_graceful(self, client):
"""When pitfall_detector is None, should return empty warnings"""
response = client.get(
"/api/v1/evolution-dashboard/pitfalls?task_type=test&steps=step1"
)
assert response.status_code == 200
assert response.json()["warnings"] == []
def test_optimizations_graceful(self, client):
"""When path_optimizer is None, should still return valid response"""
response = client.get("/api/v1/evolution-dashboard/path-optimizations")
assert response.status_code == 200
assert "optimizations" in response.json()