fischer-agentkit/scripts/run_e2e.sh

329 lines
10 KiB
Bash
Executable File

#!/usr/bin/env bash
# =============================================================================
# Fischer AgentKit — E2E Backtest Runner
# =============================================================================
#
# Usage:
# ./scripts/run_e2e.sh # Run all E2E tests
# ./scripts/run_e2e.sh --basic # Run basic function tests only
# ./scripts/run_e2e.sh --capability # Run agent capability tests only
# ./scripts/run_e2e.sh --cli # Run CLI tests only
# ./scripts/run_e2e.sh --api # Run API tests only
# ./scripts/run_e2e.sh --ws # Run WebSocket tests only
# ./scripts/run_e2e.sh --routing # Run routing intelligence tests
# ./scripts/run_e2e.sh --react # Run ReAct intelligence tests
# ./scripts/run_e2e.sh --team # Run team collaboration tests
# ./scripts/run_e2e.sh --report # Generate HTML report
# ./scripts/run_e2e.sh --analyze # Run capability tests + generate analysis report
# ./scripts/run_e2e.sh --direct # Run router direct backtest only (no HTTP)
# ./scripts/run_e2e.sh --alignment # Run alignment guard tests only
# ./scripts/run_e2e.sh --full # Run all: API + direct + alignment
# ./scripts/run_e2e.sh --baseline # Compare with last baseline report
#
# Environment:
# E2E_PORT - Server port (default: 18765)
# E2E_API_KEY - API key for auth (default: ak_live_e2e_test_key_...)
# SKIP_SERVER - Set to "1" to skip server startup (use existing)
# =============================================================================
set -euo pipefail
# ── Configuration ────────────────────────────────────────────────────────────
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
E2E_PORT="${E2E_PORT:-18765}"
E2E_API_KEY="${E2E_API_KEY:-ak_live_e2e_test_key_000000000000000000000000000000000000000000000000}"
REPORT_DIR="${PROJECT_ROOT}/test-results/e2e"
SKIP_SERVER="${SKIP_SERVER:-0}"
cd "$PROJECT_ROOT"
# ── Colors ───────────────────────────────────────────────────────────────────
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# ── Helper Functions ─────────────────────────────────────────────────────────
info() { echo -e "${BLUE}[INFO]${NC} $*"; }
ok() { echo -e "${GREEN}[OK]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
fail() { echo -e "${RED}[FAIL]${NC} $*"; }
check_deps() {
local missing=0
for cmd in python3; do
if ! command -v "$cmd" &>/dev/null; then
fail "Missing dependency: $cmd"
missing=1
fi
done
if [ "$missing" -eq 1 ]; then
exit 1
fi
}
wait_for_server() {
local max_attempts=60
local attempt=0
info "Waiting for server on port $E2E_PORT..."
while [ $attempt -lt $max_attempts ]; do
if curl -s "http://127.0.0.1:$E2E_PORT/api/v1/health" &>/dev/null; then
ok "Server is ready on port $E2E_PORT"
return 0
fi
attempt=$((attempt + 1))
sleep 0.5
done
fail "Server failed to start within 30 seconds"
return 1
}
start_server() {
if [ "$SKIP_SERVER" = "1" ]; then
info "SKIP_SERVER=1, using existing server on port $E2E_PORT"
if curl -s "http://127.0.0.1:$E2E_PORT/api/v1/health" &>/dev/null; then
ok "Existing server is healthy"
return 0
else
fail "Existing server is not responding"
return 1
fi
fi
info "Starting AgentKit E2E server on port $E2E_PORT..."
export AGENTKIT_E2E_MODE=1
export AGENTKIT_WS_TIMEOUT=0
export AGENTKIT_API_KEY="$E2E_API_KEY"
# Start server in background
python3 -m agentkit.cli.main serve --host 127.0.0.1 --port "$E2E_PORT" &
SERVER_PID=$!
if wait_for_server; then
return 0
else
kill "$SERVER_PID" 2>/dev/null || true
return 1
fi
}
stop_server() {
if [ "$SKIP_SERVER" = "1" ]; then
info "SKIP_SERVER=1, not stopping server"
return 0
fi
if [ -n "${SERVER_PID:-}" ]; then
info "Stopping E2E server (PID: $SERVER_PID)..."
kill "$SERVER_PID" 2>/dev/null || true
wait "$SERVER_PID" 2>/dev/null || true
ok "Server stopped"
fi
}
# ── Test Selection ───────────────────────────────────────────────────────────
PYTEST_ARGS=("--timeout=120" "-v" "--tb=short" "-s")
TEST_TARGET="tests/e2e/"
GENERATE_REPORT=0
ANALYZE=0
SKIP_SERVER_FLAG=0
BASELINE_COMPARE=0
while [[ $# -gt 0 ]]; do
case $1 in
--basic)
PYTEST_ARGS+=("-m" "e2e_basic")
shift
;;
--capability)
PYTEST_ARGS+=("-m" "e2e_capability")
shift
;;
--cli)
TEST_TARGET="tests/e2e/test_basic_cli.py"
shift
;;
--api)
TEST_TARGET="tests/e2e/test_basic_api.py"
shift
;;
--ws)
TEST_TARGET="tests/e2e/test_basic_websocket.py"
shift
;;
--routing)
TEST_TARGET="tests/e2e/test_capability_routing.py"
shift
;;
--react)
TEST_TARGET="tests/e2e/test_capability_react.py"
shift
;;
--team)
TEST_TARGET="tests/e2e/test_capability_team.py"
shift
;;
--direct)
# Router direct backtest — no HTTP server needed
TEST_TARGET="tests/e2e/test_capability_router_direct.py"
SKIP_SERVER_FLAG=1
shift
;;
--alignment)
# Alignment guard tests — no HTTP server needed
TEST_TARGET="tests/e2e/test_capability_alignment.py"
SKIP_SERVER_FLAG=1
shift
;;
--full)
# Run all capability tests: API + direct + alignment
PYTEST_ARGS+=("-m" "e2e_capability")
shift
;;
--baseline)
BASELINE_COMPARE=1
shift
;;
--report)
GENERATE_REPORT=1
shift
;;
--analyze)
ANALYZE=1
PYTEST_ARGS+=("-m" "e2e_capability")
shift
;;
--fast)
PYTEST_ARGS+=("-x" "--timeout=30")
shift
;;
--help|-h)
echo "Usage: $0 [--basic|--capability|--cli|--api|--ws|--routing|--react|--team|--direct|--alignment|--full|--baseline|--report|--analyze|--fast]"
exit 0
;;
*)
PYTEST_ARGS+=("$1")
shift
;;
esac
done
if [ "$GENERATE_REPORT" -eq 1 ]; then
mkdir -p "$REPORT_DIR"
PYTEST_ARGS+=(
"--html=$REPORT_DIR/e2e_report.html"
"--self-contained-html"
"--junitxml=$REPORT_DIR/e2e_junit.xml"
)
fi
if [ "$ANALYZE" -eq 1 ]; then
info "Analysis mode: will generate capability report with recall/F1/overfitting analysis"
fi
# Override SKIP_SERVER when --direct or --alignment is used (no HTTP needed)
if [ "$SKIP_SERVER_FLAG" -eq 1 ]; then
SKIP_SERVER=1
fi
# ── Main ─────────────────────────────────────────────────────────────────────
info "Fischer AgentKit E2E Backtest Runner"
info "====================================="
info "Project: $PROJECT_ROOT"
info "Port: $E2E_PORT"
info "Target: $TEST_TARGET"
info ""
check_deps
# Trap to ensure server cleanup
trap stop_server EXIT INT TERM
if ! start_server; then
fail "Could not start E2E server"
exit 1
fi
info ""
info "Running E2E tests..."
info "===================="
info ""
export AGENTKIT_SERVER_URL="http://127.0.0.1:$E2E_PORT"
export AGENTKIT_API_KEY="$E2E_API_KEY"
EXIT_CODE=0
python3 -m pytest "$TEST_TARGET" "${PYTEST_ARGS[@]}" || EXIT_CODE=$?
echo ""
if [ $EXIT_CODE -eq 0 ]; then
ok "All E2E tests passed!"
else
fail "Some E2E tests failed (exit code: $EXIT_CODE)"
fi
if [ "$GENERATE_REPORT" -eq 1 ]; then
info "Report generated at: $REPORT_DIR/e2e_report.html"
fi
if [ "$ANALYZE" -eq 1 ]; then
CAPABILITY_REPORT="$PROJECT_ROOT/test-results/e2e/capability_report.txt"
if [ -f "$CAPABILITY_REPORT" ]; then
info "Capability analysis report:"
echo ""
cat "$CAPABILITY_REPORT"
else
warn "Capability report not found (may need capability tests to run first)"
fi
fi
if [ "$BASELINE_COMPARE" -eq 1 ]; then
CURRENT_REPORT="$PROJECT_ROOT/test-results/e2e/capability_report.json"
BASELINE_REPORT="$PROJECT_ROOT/test-results/e2e/baseline_capability_report.json"
if [ -f "$CURRENT_REPORT" ] && [ -f "$BASELINE_REPORT" ]; then
info "Baseline comparison:"
python3 -c "
import json, sys
def load_metrics(path):
with open(path) as f:
return json.load(f)
cur = load_metrics('$CURRENT_REPORT')
base = load_metrics('$BASELINE_REPORT')
metrics = [
('overall_skill_recall', '技能路由召回率'),
('overall_skill_precision', '技能路由精确率'),
('overall_skill_f1', '技能路由F1'),
('overall_execution_mode_accuracy', '执行模式准确率'),
('overall_task_success_rate', '任务成功率'),
('overfitting_score', '过拟合分数'),
]
print()
for key, label in metrics:
c = cur.get(key, 0)
b = base.get(key, 0)
delta = c - b
arrow = '↑' if delta > 0 else ('↓' if delta < 0 else '→')
print(f' {label}: {b:.2%} → {c:.2%} {arrow} {delta:+.2%}')
print()
"
elif [ -f "$CURRENT_REPORT" ]; then
info "No baseline report found. Saving current report as baseline."
cp "$CURRENT_REPORT" "$BASELINE_REPORT"
info "Baseline saved to: $BASELINE_REPORT"
else
warn "No current report found. Run with --analyze first."
fi
fi
exit $EXIT_CODE