test: 全面回测 + 真实 LLM E2E + 能力 benchmark + 问题修复
## 测试结果 ### 后端 E2E(真实 LLM,真实服务器)— 13/13 通过 - tests/e2e/test_real_llm_e2e.py: 认证流程、LLM 网关、Chat API、WebSocket - 使用百炼 coding plan(qwen3.7-plus)真实 LLM,无 mock - 修复 SQLite 写锁竞争导致的间歇性 500(_login_with_retry 重试机制) ### 前端 E2E(Playwright + 真实 LLM)— 11/11 通过 - login.spec.ts (4): 登录流程、表单验证、token 存储 - chat.spec.ts (3): 真实 LLM 对话、消息渲染 - terminal.spec.ts (4): 终端面板、白名单管理 - 使用系统 Chrome(channel: 'chrome')避免浏览器下载 ### Benchmark 能力评估(真实 LLM) - full 模式: 60% 准确率(5 用例 3 通过 2 超时) - fast 模式: 100% 准确率 - 失败用例: llm-001 (intent_understanding) / llm-004 (code_generation) 均为超时 ### 单元测试 - 174 个新测试通过 - 28 个预存失败(非本次架构变更引入) ## 代码修复 ### chat.ts: 消除 any 类型 TODO(line 406) - handleWsMessage 参数从 Record<string, any> 改为 WsServerMessage 联合类型 - 使用判别联合窄化,每个 case 分支直接访问类型化字段 - 移除通用 payload 变量,移除未使用的类型导入 - vue-tsc --noEmit 零错误 ### 基础设施修复 - playwright.config.ts: 修复 PROJECT_ROOT 路径(4 级而非 2 级) - playwright.config.ts: 用 uvicorn.run() 替代 agentkit serve(避免非 tty 交互提示) - helpers.ts: API_BASE 改为绝对 URL(Node.js fetch 不支持相对 URL) - helpers.ts: clearAuth 修复 page.evaluate 上下文问题(Node 常量传入浏览器) - helpers.ts: loginViaApi 添加 429 限流重试 + token 缓存 - login.spec.ts / terminal.spec.ts: 修复 Ant Design Vue autoInsertSpace 导致的选择器不匹配 - chat.spec.ts: .first() 改 .last() 避免拾取历史消息 - setup-test-user.py: .local 邮箱改为 .com(EmailStr 拒绝 .local TLD) - .gitignore: Playwright 产物路径限定到 frontend 目录 ### 依赖 - pyproject.toml: 补充 pyjwt, bcrypt, aiosqlite 依赖 - package.json: 添加 @playwright/test 依赖 ## 未完成计划清单(核对结果) ### 计划 001(聊天主区 VI 重梳)— active - U7: SkillsTab/SystemTab/KnowledgeTab 三子组件未实现 - U8: Preview 样例场景精修未完成 - U9: BoardMeetingModal VI 适配收尾未完成 - U10: 质量门与后端回归测试未完成 ### 计划 002(企业级 C/S 架构)— 方案评审中 - 8 个待决策问题未明确(卖给谁/部署位置/终端形态等) - P2/P3/P4 模块延后 ### 计划 003(企业级 C/S 演进)— completed - 7 项 Deferred(Web 管理台/技能市场/SSO/代码索引/多租户等) ### 代码 stub - DockerComputerUseSession: start/stop/screenshot/execute_action 4 个方法为 stub (需真实 Docker + VNC + Anthropic Computer Use API,属未来功能)
This commit is contained in:
parent
aeb82ad7a0
commit
2e404cf1a0
|
|
@ -21,6 +21,11 @@ venv/
|
||||||
.coverage
|
.coverage
|
||||||
htmlcov/
|
htmlcov/
|
||||||
|
|
||||||
|
# Playwright E2E (scoped to frontend dir to avoid ignoring project-level test-results/)
|
||||||
|
src/agentkit/server/frontend/playwright-report/
|
||||||
|
src/agentkit/server/frontend/test-results/
|
||||||
|
src/agentkit/server/frontend/blob-report/
|
||||||
|
|
||||||
# OS
|
# OS
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,9 @@ dependencies = [
|
||||||
"pyyaml>=6.0",
|
"pyyaml>=6.0",
|
||||||
"jsonschema>=4.0",
|
"jsonschema>=4.0",
|
||||||
"typer>=0.12",
|
"typer>=0.12",
|
||||||
"rich>=13.0",
|
"pyjwt>=2.8",
|
||||||
|
"bcrypt>=4.0",
|
||||||
|
"aiosqlite>=0.20",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,85 @@
|
||||||
|
import { test, expect } from '@playwright/test'
|
||||||
|
import {
|
||||||
|
loginAndHydrate,
|
||||||
|
sendChatMessage,
|
||||||
|
waitForLlmResponse,
|
||||||
|
LLM_RESPONSE_TIMEOUT_MS,
|
||||||
|
} from './helpers'
|
||||||
|
|
||||||
|
test.describe('Chat flow', () => {
|
||||||
|
test.beforeEach(async ({ page }) => {
|
||||||
|
// Authenticate via API and hydrate localStorage before navigating
|
||||||
|
await loginAndHydrate(page)
|
||||||
|
await page.goto('/agent/chat')
|
||||||
|
|
||||||
|
// Wait for the chat view to mount — the input textarea should be visible
|
||||||
|
await expect(page.getByPlaceholder('输入消息,按 Enter 发送...')).toBeVisible({
|
||||||
|
timeout: 15_000,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
test('should send a message and receive a real LLM response', async ({ page }) => {
|
||||||
|
const testMessage = '你好,请用一句话介绍自己'
|
||||||
|
|
||||||
|
// Send the message
|
||||||
|
await sendChatMessage(page, testMessage)
|
||||||
|
|
||||||
|
// The user's message should appear immediately in the chat view.
|
||||||
|
// Use .last() because the conversation may contain prior messages.
|
||||||
|
const userMessage = page.locator('.message-shell--user .user-bubble')
|
||||||
|
await expect(userMessage.last()).toContainText('你好', { timeout: 10_000 })
|
||||||
|
|
||||||
|
// Wait for the real LLM response (up to 60 seconds).
|
||||||
|
// The assistant message is rendered inside .message-shell--assistant
|
||||||
|
// with markdown content in .assistant-text__markdown.
|
||||||
|
test.setTimeout(LLM_RESPONSE_TIMEOUT_MS + 30_000)
|
||||||
|
await waitForLlmResponse(page, expect, LLM_RESPONSE_TIMEOUT_MS)
|
||||||
|
|
||||||
|
// The response should contain some text (non-empty, non-error)
|
||||||
|
const assistantContent = page.locator(
|
||||||
|
'.message-shell--assistant .assistant-text__markdown',
|
||||||
|
)
|
||||||
|
const responseText = (await assistantContent.last().textContent()) ?? ''
|
||||||
|
expect(responseText.trim().length).toBeGreaterThan(0)
|
||||||
|
|
||||||
|
// The response should not be an error message
|
||||||
|
const errorCard = page.locator('.message-shell--assistant .error-card')
|
||||||
|
await expect(errorCard).toHaveCount(0)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('should display both user and assistant messages in history', async ({ page }) => {
|
||||||
|
const testMessage = '1+1等于几?请只回答数字'
|
||||||
|
|
||||||
|
await sendChatMessage(page, testMessage)
|
||||||
|
|
||||||
|
// Verify user message is displayed (use .last() for most recent)
|
||||||
|
await expect(
|
||||||
|
page.locator('.message-shell--user .user-bubble').last(),
|
||||||
|
).toContainText('1+1', { timeout: 10_000 })
|
||||||
|
|
||||||
|
// Wait for assistant response
|
||||||
|
test.setTimeout(LLM_RESPONSE_TIMEOUT_MS + 30_000)
|
||||||
|
await waitForLlmResponse(page, expect, LLM_RESPONSE_TIMEOUT_MS)
|
||||||
|
|
||||||
|
// Both user and assistant message shells should be present
|
||||||
|
const userMessages = page.locator('.message-shell--user')
|
||||||
|
const assistantMessages = page.locator('.message-shell--assistant')
|
||||||
|
|
||||||
|
await expect(userMessages.first()).toBeVisible()
|
||||||
|
await expect(assistantMessages.first()).toBeVisible()
|
||||||
|
|
||||||
|
// There should be at least one user message and one assistant message
|
||||||
|
expect(await userMessages.count()).toBeGreaterThanOrEqual(1)
|
||||||
|
expect(await assistantMessages.count()).toBeGreaterThanOrEqual(1)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('should clear input after sending', async ({ page }) => {
|
||||||
|
const textarea = page.getByPlaceholder('输入消息,按 Enter 发送...')
|
||||||
|
|
||||||
|
await textarea.fill('测试消息清空')
|
||||||
|
await textarea.press('Enter')
|
||||||
|
|
||||||
|
// The textarea should be cleared after sending
|
||||||
|
await expect(textarea).toHaveText('', { timeout: 5_000 })
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
@ -0,0 +1,61 @@
|
||||||
|
/**
|
||||||
|
* Playwright global setup — runs once before all test files.
|
||||||
|
*
|
||||||
|
* Responsibilities:
|
||||||
|
* 1. Wait for the backend health endpoint to respond (the webServer config
|
||||||
|
* already polls the URL, but we double-check here for robustness).
|
||||||
|
* 2. Invoke the Python script that creates / updates the E2E test admin user
|
||||||
|
* in the auth SQLite DB.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { execFileSync } from 'node:child_process'
|
||||||
|
import { existsSync } from 'node:fs'
|
||||||
|
import { dirname, resolve } from 'node:path'
|
||||||
|
import { fileURLToPath } from 'node:url'
|
||||||
|
|
||||||
|
const __filename = fileURLToPath(import.meta.url)
|
||||||
|
const __dirname = dirname(__filename)
|
||||||
|
|
||||||
|
const BACKEND_HEALTH_URL = 'http://127.0.0.1:8000/api/v1/health'
|
||||||
|
const SETUP_SCRIPT = resolve(__dirname, 'setup-test-user.py')
|
||||||
|
|
||||||
|
/** Poll a URL until it returns 200 or the timeout expires. */
|
||||||
|
async function waitForUrl(url: string, timeoutMs = 60_000): Promise<void> {
|
||||||
|
const deadline = Date.now() + timeoutMs
|
||||||
|
while (Date.now() < deadline) {
|
||||||
|
try {
|
||||||
|
const resp = await fetch(url)
|
||||||
|
if (resp.ok) return
|
||||||
|
} catch {
|
||||||
|
// server not ready yet
|
||||||
|
}
|
||||||
|
await new Promise((r) => setTimeout(r, 1000))
|
||||||
|
}
|
||||||
|
throw new Error(`Timed out waiting for ${url}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default async function globalSetup(): Promise<void> {
|
||||||
|
// 1. Verify backend is up (webServer should have started it already).
|
||||||
|
await waitForUrl(BACKEND_HEALTH_URL, 60_000)
|
||||||
|
console.log('[global-setup] Backend health check passed')
|
||||||
|
|
||||||
|
// 2. Create / update the test admin user.
|
||||||
|
if (!existsSync(SETUP_SCRIPT)) {
|
||||||
|
throw new Error(`Setup script not found: ${SETUP_SCRIPT}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
const pythonBin = process.env.E2E_PYTHON ?? 'python3'
|
||||||
|
try {
|
||||||
|
execFileSync(pythonBin, [SETUP_SCRIPT], {
|
||||||
|
stdio: 'inherit',
|
||||||
|
timeout: 30_000,
|
||||||
|
})
|
||||||
|
} catch (err) {
|
||||||
|
throw new Error(
|
||||||
|
`Failed to create test user via ${pythonBin} ${SETUP_SCRIPT}: ${
|
||||||
|
err instanceof Error ? err.message : String(err)
|
||||||
|
}`
|
||||||
|
)
|
||||||
|
}
|
||||||
|
console.log('[global-setup] Test user ready')
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,233 @@
|
||||||
|
/**
|
||||||
|
* Shared E2E test helpers.
|
||||||
|
*
|
||||||
|
* - Login via API and hydrate localStorage so the Vue auth store picks up
|
||||||
|
* the tokens on page load (the store reads from localStorage on init).
|
||||||
|
* - Server health check.
|
||||||
|
* - Wait for a real LLM response in the chat view.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type { Page, expect as ExpectType } from '@playwright/test'
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Constants
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Backend API base — absolute URL so fetch() works in both Node.js (Playwright
|
||||||
|
* test context) and browser context. The Vite dev-server proxy is not available
|
||||||
|
* in Node.js, so we target the backend directly.
|
||||||
|
*/
|
||||||
|
export const API_BASE = 'http://127.0.0.1:8000/api/v1'
|
||||||
|
|
||||||
|
/** Backend health endpoint (absolute URL for direct fetch). */
|
||||||
|
export const BACKEND_HEALTH_URL = 'http://127.0.0.1:8000/api/v1/health'
|
||||||
|
|
||||||
|
/** Test admin credentials — must match setup-test-user.py defaults. */
|
||||||
|
export const TEST_USER = {
|
||||||
|
username: process.env.E2E_TEST_USERNAME ?? 'e2e_test_admin',
|
||||||
|
password: process.env.E2E_TEST_PASSWORD ?? 'E2eTestPass123!',
|
||||||
|
email: process.env.E2E_TEST_EMAIL ?? 'e2e-test@example.com',
|
||||||
|
} as const
|
||||||
|
|
||||||
|
/** localStorage keys used by the auth store (see stores/auth.ts). */
|
||||||
|
const ACCESS_TOKEN_KEY = 'agentkit.access_token'
|
||||||
|
const REFRESH_TOKEN_KEY = 'agentkit.refresh_token'
|
||||||
|
const USER_KEY = 'agentkit.user'
|
||||||
|
|
||||||
|
/** Max wait for a real LLM response (seconds → ms). */
|
||||||
|
export const LLM_RESPONSE_TIMEOUT_MS = 60_000
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Types
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
interface IAuthUser {
|
||||||
|
id: string
|
||||||
|
username: string
|
||||||
|
email: string
|
||||||
|
role: string
|
||||||
|
is_active: boolean
|
||||||
|
is_terminal_authorized: boolean
|
||||||
|
is_server_terminal_authorized: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ITokenPair {
|
||||||
|
access_token: string
|
||||||
|
refresh_token: string
|
||||||
|
token_type: string
|
||||||
|
expires_in: number
|
||||||
|
user: IAuthUser
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Server health
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Poll the backend health endpoint until it responds 200 or times out.
|
||||||
|
* Useful as a sanity check inside tests.
|
||||||
|
*/
|
||||||
|
export async function waitForServer(
|
||||||
|
url: string = BACKEND_HEALTH_URL,
|
||||||
|
timeoutMs = 30_000,
|
||||||
|
): Promise<void> {
|
||||||
|
const deadline = Date.now() + timeoutMs
|
||||||
|
while (Date.now() < deadline) {
|
||||||
|
try {
|
||||||
|
const resp = await fetch(url)
|
||||||
|
if (resp.ok) return
|
||||||
|
} catch {
|
||||||
|
// not ready
|
||||||
|
}
|
||||||
|
await new Promise((r) => setTimeout(r, 1_000))
|
||||||
|
}
|
||||||
|
throw new Error(`Server at ${url} did not become healthy within ${timeoutMs}ms`)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Login helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Authenticate via the REST API and return the token pair.
|
||||||
|
* Retries on 429 (rate limit) with exponential backoff.
|
||||||
|
* Caches the token pair module-level so subsequent calls reuse it
|
||||||
|
* (avoids triggering the server's rate limiter).
|
||||||
|
* Throws on other non-200 responses.
|
||||||
|
*/
|
||||||
|
let _cachedTokenPair: ITokenPair | null = null
|
||||||
|
|
||||||
|
export async function loginViaApi(): Promise<ITokenPair> {
|
||||||
|
// Return cached token if available (avoids rate limiting across tests).
|
||||||
|
if (_cachedTokenPair) {
|
||||||
|
return _cachedTokenPair
|
||||||
|
}
|
||||||
|
|
||||||
|
const maxRetries = 5
|
||||||
|
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
||||||
|
const resp = await fetch(`${API_BASE}/auth/login`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({
|
||||||
|
username: TEST_USER.username,
|
||||||
|
password: TEST_USER.password,
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
|
||||||
|
if (resp.ok) {
|
||||||
|
_cachedTokenPair = (await resp.json()) as ITokenPair
|
||||||
|
return _cachedTokenPair
|
||||||
|
}
|
||||||
|
|
||||||
|
if (resp.status === 429 && attempt < maxRetries - 1) {
|
||||||
|
// Rate limited — wait and retry (5s, 10s, 20s, 40s)
|
||||||
|
const delayMs = 5000 * Math.pow(2, attempt)
|
||||||
|
await new Promise((r) => setTimeout(r, delayMs))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
const detail = await resp.text().catch(() => '<no body>')
|
||||||
|
throw new Error(`Login failed (${resp.status}): ${detail}`)
|
||||||
|
}
|
||||||
|
throw new Error('Login failed: max retries exceeded')
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log in via the API and hydrate localStorage so the Pinia auth store
|
||||||
|
* picks up the tokens on the next page navigation.
|
||||||
|
*
|
||||||
|
* The auth store (stores/auth.ts) reads `agentkit.access_token`,
|
||||||
|
* `agentkit.refresh_token`, and `agentkit.user` from localStorage on
|
||||||
|
* construction, so populating these before navigating is sufficient.
|
||||||
|
*/
|
||||||
|
export async function loginAndHydrate(page: Page): Promise<ITokenPair> {
|
||||||
|
const tokens = await loginViaApi()
|
||||||
|
|
||||||
|
await page.goto('/login')
|
||||||
|
|
||||||
|
await page.evaluate(
|
||||||
|
({ access, refresh, user }) => {
|
||||||
|
localStorage.setItem('agentkit.access_token', access)
|
||||||
|
localStorage.setItem('agentkit.refresh_token', refresh)
|
||||||
|
localStorage.setItem('agentkit.user', JSON.stringify(user))
|
||||||
|
},
|
||||||
|
{
|
||||||
|
access: tokens.access_token,
|
||||||
|
refresh: tokens.refresh_token,
|
||||||
|
user: tokens.user,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
return tokens
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear auth state from localStorage — useful for testing the
|
||||||
|
* unauthenticated-redirect behaviour.
|
||||||
|
*/
|
||||||
|
export async function clearAuth(page: Page): Promise<void> {
|
||||||
|
await page.evaluate(
|
||||||
|
({ access, refresh, user }) => {
|
||||||
|
localStorage.removeItem(access)
|
||||||
|
localStorage.removeItem(refresh)
|
||||||
|
localStorage.removeItem(user)
|
||||||
|
},
|
||||||
|
{
|
||||||
|
access: ACCESS_TOKEN_KEY,
|
||||||
|
refresh: REFRESH_TOKEN_KEY,
|
||||||
|
user: USER_KEY,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Chat helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait for a real LLM response in the chat view.
|
||||||
|
*
|
||||||
|
* After sending a message, the assistant's response is rendered inside
|
||||||
|
* `.message-shell--assistant .assistant-text__markdown`. While the LLM is
|
||||||
|
* still streaming, the element may be empty or show a spinner. This helper
|
||||||
|
* waits until the assistant message contains non-whitespace text.
|
||||||
|
*
|
||||||
|
* @param page Playwright page
|
||||||
|
* @param expect The `expect` function from @playwright/test
|
||||||
|
* @param timeoutMs Max wait time (default 60s for real LLM calls)
|
||||||
|
*/
|
||||||
|
export async function waitForLlmResponse(
|
||||||
|
page: Page,
|
||||||
|
expect: typeof ExpectType,
|
||||||
|
timeoutMs = LLM_RESPONSE_TIMEOUT_MS,
|
||||||
|
): Promise<void> {
|
||||||
|
// The assistant message content is rendered as sanitized HTML inside
|
||||||
|
// .assistant-text__markdown. Wait for it to have non-empty text content.
|
||||||
|
const assistantContent = page.locator(
|
||||||
|
'.message-shell--assistant .assistant-text__markdown',
|
||||||
|
)
|
||||||
|
|
||||||
|
await expect
|
||||||
|
.poll(
|
||||||
|
async () => {
|
||||||
|
// Check count first to avoid auto-wait on a non-existent element.
|
||||||
|
const count = await assistantContent.count()
|
||||||
|
if (count === 0) return 0
|
||||||
|
const text = await assistantContent.last().textContent()
|
||||||
|
return (text ?? '').trim().length
|
||||||
|
},
|
||||||
|
{ timeout: timeoutMs, intervals: [1_000, 2_000, 5_000] },
|
||||||
|
)
|
||||||
|
.toBeGreaterThan(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Send a chat message by typing into the textarea and pressing Enter.
|
||||||
|
* Falls back to clicking the send button if Enter doesn't trigger send.
|
||||||
|
*/
|
||||||
|
export async function sendChatMessage(page: Page, message: string): Promise<void> {
|
||||||
|
const textarea = page.getByPlaceholder('输入消息,按 Enter 发送...')
|
||||||
|
await textarea.fill(message)
|
||||||
|
await textarea.press('Enter')
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,79 @@
|
||||||
|
import { test, expect } from '@playwright/test'
|
||||||
|
import { TEST_USER, clearAuth } from './helpers'
|
||||||
|
|
||||||
|
test.describe('Login flow', () => {
|
||||||
|
test.beforeEach(async ({ page }) => {
|
||||||
|
// Ensure no stale tokens from a previous test
|
||||||
|
await page.goto('/login')
|
||||||
|
await clearAuth(page)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('should login successfully with valid credentials', async ({ page }) => {
|
||||||
|
await page.goto('/login')
|
||||||
|
|
||||||
|
// Fill in the form
|
||||||
|
await page.getByPlaceholder('请输入用户名').fill(TEST_USER.username)
|
||||||
|
await page.getByPlaceholder('请输入密码').fill(TEST_USER.password)
|
||||||
|
|
||||||
|
// Submit
|
||||||
|
await page.getByRole('button', { name: /登\s*录/ }).click()
|
||||||
|
|
||||||
|
// Should redirect to /agent (which redirects to /agent/chat)
|
||||||
|
await expect(page).toHaveURL(/\/agent/)
|
||||||
|
|
||||||
|
// The login logo should no longer be visible
|
||||||
|
await expect(page.locator('.login-logo')).not.toBeVisible()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('should show error for wrong password', async ({ page }) => {
|
||||||
|
await page.goto('/login')
|
||||||
|
|
||||||
|
await page.getByPlaceholder('请输入用户名').fill(TEST_USER.username)
|
||||||
|
await page.getByPlaceholder('请输入密码').fill('definitely-wrong-password-12345')
|
||||||
|
|
||||||
|
await page.getByRole('button', { name: /登\s*录/ }).click()
|
||||||
|
|
||||||
|
// The LoginView shows an a-alert with type="error" containing the
|
||||||
|
// server's error message ("Invalid username or password").
|
||||||
|
const errorAlert = page.locator('.ant-alert-error')
|
||||||
|
await expect(errorAlert).toBeVisible({ timeout: 10_000 })
|
||||||
|
|
||||||
|
// Should still be on the login page
|
||||||
|
await expect(page).toHaveURL(/\/login/)
|
||||||
|
|
||||||
|
// The error message should mention invalid credentials
|
||||||
|
const alertText = await errorAlert.textContent()
|
||||||
|
expect(alertText?.toLowerCase()).toMatch(/invalid|无效|错误|incorrect|失败/)
|
||||||
|
})
|
||||||
|
|
||||||
|
test('should redirect unauthenticated users to login', async ({ page }) => {
|
||||||
|
// Clear any existing auth state, then try to visit a protected route
|
||||||
|
await clearAuth(page)
|
||||||
|
|
||||||
|
await page.goto('/agent/chat')
|
||||||
|
|
||||||
|
// The router guard should redirect to /login?redirect=/agent/chat
|
||||||
|
await expect(page).toHaveURL(/\/login/)
|
||||||
|
await expect(page).toHaveURL(/redirect=/)
|
||||||
|
|
||||||
|
// The login form should be visible
|
||||||
|
await expect(page.getByPlaceholder('请输入用户名')).toBeVisible()
|
||||||
|
await expect(page.getByPlaceholder('请输入密码')).toBeVisible()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('should redirect to original page after login', async ({ page }) => {
|
||||||
|
await clearAuth(page)
|
||||||
|
|
||||||
|
// Visit a protected route — should redirect to login with redirect param
|
||||||
|
await page.goto('/agent/chat')
|
||||||
|
await expect(page).toHaveURL(/\/login\?redirect=/)
|
||||||
|
|
||||||
|
// Now log in
|
||||||
|
await page.getByPlaceholder('请输入用户名').fill(TEST_USER.username)
|
||||||
|
await page.getByPlaceholder('请输入密码').fill(TEST_USER.password)
|
||||||
|
await page.getByRole('button', { name: /登\s*录/ }).click()
|
||||||
|
|
||||||
|
// Should be redirected back to the originally requested page
|
||||||
|
await expect(page).toHaveURL(/\/agent\/chat/)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
@ -0,0 +1,92 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Create or update the E2E test admin user in the auth SQLite DB.
|
||||||
|
|
||||||
|
This script is invoked by Playwright's ``globalSetup`` (via ``global-setup.ts``)
|
||||||
|
before any test runs. It ensures the auth DB schema exists and that a test
|
||||||
|
admin user with known credentials is present.
|
||||||
|
|
||||||
|
The user credentials default to:
|
||||||
|
username: e2e_test_admin
|
||||||
|
password: E2eTestPass123!
|
||||||
|
email: e2e-test@agentkit.local
|
||||||
|
role: admin
|
||||||
|
|
||||||
|
Override via environment variables ``E2E_TEST_USERNAME``, ``E2E_TEST_PASSWORD``,
|
||||||
|
``E2E_TEST_EMAIL`` if needed.
|
||||||
|
|
||||||
|
Exit codes:
|
||||||
|
0 — user created or updated successfully
|
||||||
|
1 — unexpected error
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import uuid
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Resolve project root so we can import agentkit regardless of CWD.
|
||||||
|
# This file lives at src/agentkit/server/frontend/e2e/setup-test-user.py
|
||||||
|
_PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
||||||
|
_SRC_ROOT = _PROJECT_ROOT / "src"
|
||||||
|
if str(_SRC_ROOT) not in sys.path:
|
||||||
|
sys.path.insert(0, str(_SRC_ROOT))
|
||||||
|
|
||||||
|
import aiosqlite # noqa: E402
|
||||||
|
|
||||||
|
from agentkit.server.auth.models import DEFAULT_AUTH_DB_PATH, init_auth_db # noqa: E402
|
||||||
|
from agentkit.server.auth.password import hash_password # noqa: E402
|
||||||
|
|
||||||
|
TEST_USERNAME = os.environ.get("E2E_TEST_USERNAME", "e2e_test_admin")
|
||||||
|
TEST_PASSWORD = os.environ.get("E2E_TEST_PASSWORD", "E2eTestPass123!")
|
||||||
|
TEST_EMAIL = os.environ.get("E2E_TEST_EMAIL", "e2e-test@example.com")
|
||||||
|
|
||||||
|
|
||||||
|
async def ensure_test_user() -> None:
|
||||||
|
db_path = DEFAULT_AUTH_DB_PATH
|
||||||
|
# Create schema (idempotent) — mirrors what /auth/login does on first hit.
|
||||||
|
await init_auth_db(db_path)
|
||||||
|
|
||||||
|
password_hash = hash_password(TEST_PASSWORD)
|
||||||
|
now_iso = datetime.now(timezone.utc).isoformat()
|
||||||
|
|
||||||
|
async with aiosqlite.connect(str(db_path)) as db:
|
||||||
|
cursor = await db.execute("SELECT id FROM users WHERE username = ?", (TEST_USERNAME,))
|
||||||
|
existing = await cursor.fetchone()
|
||||||
|
|
||||||
|
if existing:
|
||||||
|
# Update password + ensure admin role + terminal authorization
|
||||||
|
await db.execute(
|
||||||
|
"UPDATE users SET password_hash = ?, role = 'admin', is_active = 1, "
|
||||||
|
"is_terminal_authorized = 1, is_server_terminal_authorized = 1, "
|
||||||
|
"email = ?, updated_at = ? WHERE username = ?",
|
||||||
|
(password_hash, TEST_EMAIL, now_iso, TEST_USERNAME),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
print(f"[setup-test-user] Updated existing test user '{TEST_USERNAME}'")
|
||||||
|
else:
|
||||||
|
user_id = str(uuid.uuid4())
|
||||||
|
await db.execute(
|
||||||
|
"INSERT INTO users (id, username, email, password_hash, role, "
|
||||||
|
"is_active, is_terminal_authorized, is_server_terminal_authorized, "
|
||||||
|
"created_at, updated_at) VALUES (?, ?, ?, ?, 'admin', 1, 1, 1, ?, ?)",
|
||||||
|
(user_id, TEST_USERNAME, TEST_EMAIL, password_hash, now_iso, now_iso),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
print(f"[setup-test-user] Created test admin user '{TEST_USERNAME}'")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
try:
|
||||||
|
asyncio.run(ensure_test_user())
|
||||||
|
return 0
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
print(f"[setup-test-user] ERROR: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
|
|
@ -0,0 +1,76 @@
|
||||||
|
import { test, expect } from '@playwright/test'
|
||||||
|
import { loginAndHydrate } from './helpers'
|
||||||
|
|
||||||
|
test.describe('Terminal panel', () => {
|
||||||
|
test.beforeEach(async ({ page }) => {
|
||||||
|
await loginAndHydrate(page)
|
||||||
|
// The terminal view lives at /legacy/terminal (the /terminal route
|
||||||
|
// redirects there — see router/index.ts).
|
||||||
|
await page.goto('/legacy/terminal')
|
||||||
|
})
|
||||||
|
|
||||||
|
test('should display the terminal panel with mode tabs', async ({ page }) => {
|
||||||
|
// The TerminalPanel component renders .terminal-panel
|
||||||
|
const terminalPanel = page.locator('.terminal-panel')
|
||||||
|
await expect(terminalPanel).toBeVisible({ timeout: 10_000 })
|
||||||
|
|
||||||
|
// The "本地终端" (local terminal) tab should always be visible
|
||||||
|
await expect(
|
||||||
|
terminalPanel.getByRole('button', { name: /本地终端/ }),
|
||||||
|
).toBeVisible()
|
||||||
|
|
||||||
|
// The connection status indicator should be present
|
||||||
|
await expect(terminalPanel.locator('.terminal-panel__indicator')).toBeVisible()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('should show server terminal tab for admin users', async ({ page }) => {
|
||||||
|
// The test user is an admin, so the "服务端终端" tab should be visible
|
||||||
|
// (it's gated behind authStore.canUseServerTerminal()).
|
||||||
|
const terminalPanel = page.locator('.terminal-panel')
|
||||||
|
await expect(terminalPanel).toBeVisible({ timeout: 10_000 })
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
terminalPanel.getByRole('button', { name: /服务端终端/ }),
|
||||||
|
).toBeVisible()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('should open the whitelist manager drawer', async ({ page }) => {
|
||||||
|
// Wait for the terminal view to mount
|
||||||
|
await expect(page.locator('.terminal-panel')).toBeVisible({ timeout: 10_000 })
|
||||||
|
|
||||||
|
// The whitelist button is positioned in the top-right corner of the
|
||||||
|
// terminal view (SafetyOutlined icon inside .terminal-view__whitelist-btn).
|
||||||
|
const whitelistBtn = page.locator('.terminal-view__whitelist-btn')
|
||||||
|
await expect(whitelistBtn).toBeVisible()
|
||||||
|
await whitelistBtn.click()
|
||||||
|
|
||||||
|
// The drawer should open and contain the WhitelistManager component.
|
||||||
|
// The drawer title is "终端白名单管理".
|
||||||
|
const drawer = page.locator('.ant-drawer-content')
|
||||||
|
await expect(drawer).toBeVisible({ timeout: 5_000 })
|
||||||
|
|
||||||
|
// The WhitelistManager renders an a-tabs with "我的白名单" tab
|
||||||
|
await expect(page.getByRole('tab', { name: '我的白名单' })).toBeVisible()
|
||||||
|
|
||||||
|
// The "添加" button and the input for new patterns should be visible.
|
||||||
|
// Use regex to match possible Ant Design Vue auto-inserted space.
|
||||||
|
await expect(
|
||||||
|
drawer.getByPlaceholder('输入命令模式,如: git, npm, ls'),
|
||||||
|
).toBeVisible()
|
||||||
|
await expect(drawer.getByRole('button', { name: /添\s*加/ })).toBeVisible()
|
||||||
|
})
|
||||||
|
|
||||||
|
test('should display admin-only tabs in whitelist manager', async ({ page }) => {
|
||||||
|
// Open the whitelist drawer
|
||||||
|
await expect(page.locator('.terminal-panel')).toBeVisible({ timeout: 10_000 })
|
||||||
|
await page.locator('.terminal-view__whitelist-btn').click()
|
||||||
|
|
||||||
|
const drawer = page.locator('.ant-drawer-content')
|
||||||
|
await expect(drawer).toBeVisible({ timeout: 5_000 })
|
||||||
|
|
||||||
|
// Admin users should see the "全局白名单", "黑名单", and "审计日志" tabs
|
||||||
|
await expect(page.getByRole('tab', { name: '全局白名单' })).toBeVisible()
|
||||||
|
await expect(page.getByRole('tab', { name: '黑名单' })).toBeVisible()
|
||||||
|
await expect(page.getByRole('tab', { name: '审计日志' })).toBeVisible()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
@ -23,6 +23,7 @@
|
||||||
"vue-router": "^4.4.0"
|
"vue-router": "^4.4.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
"@playwright/test": "^1.59.0",
|
||||||
"@tauri-apps/cli": "^2.11.2",
|
"@tauri-apps/cli": "^2.11.2",
|
||||||
"@types/dompurify": "^3.0.5",
|
"@types/dompurify": "^3.0.5",
|
||||||
"@types/markdown-it": "^14.1.2",
|
"@types/markdown-it": "^14.1.2",
|
||||||
|
|
@ -579,6 +580,22 @@
|
||||||
"@jridgewell/sourcemap-codec": "^1.4.14"
|
"@jridgewell/sourcemap-codec": "^1.4.14"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@playwright/test": {
|
||||||
|
"version": "1.61.0",
|
||||||
|
"resolved": "https://registry.npmmirror.com/@playwright/test/-/test-1.61.0.tgz",
|
||||||
|
"integrity": "sha512-cKA5B6lpFEMyMGjxF54QihfYpB4FkEGH+qZhtArDEG+wezQAJY8Pq6C7T1SjWz+FFzt3TbyoXBQYk/0292TdJA==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"dependencies": {
|
||||||
|
"playwright": "1.61.0"
|
||||||
|
},
|
||||||
|
"bin": {
|
||||||
|
"playwright": "cli.js"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@rollup/rollup-android-arm-eabi": {
|
"node_modules/@rollup/rollup-android-arm-eabi": {
|
||||||
"version": "4.61.1",
|
"version": "4.61.1",
|
||||||
"resolved": "https://registry.npmmirror.com/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.61.1.tgz",
|
"resolved": "https://registry.npmmirror.com/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.61.1.tgz",
|
||||||
|
|
@ -2220,6 +2237,53 @@
|
||||||
"pathe": "^2.0.3"
|
"pathe": "^2.0.3"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/playwright": {
|
||||||
|
"version": "1.61.0",
|
||||||
|
"resolved": "https://registry.npmmirror.com/playwright/-/playwright-1.61.0.tgz",
|
||||||
|
"integrity": "sha512-Z+7BeeqQPRRzklHsVFP4KTGIyMxKUmfeRA4WisM6G3/XW6nwGeX6fX9qYaDa+CiUqpOkb2f6X3nar05R3kSuJQ==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"dependencies": {
|
||||||
|
"playwright-core": "1.61.0"
|
||||||
|
},
|
||||||
|
"bin": {
|
||||||
|
"playwright": "cli.js"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18"
|
||||||
|
},
|
||||||
|
"optionalDependencies": {
|
||||||
|
"fsevents": "2.3.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/playwright-core": {
|
||||||
|
"version": "1.61.0",
|
||||||
|
"resolved": "https://registry.npmmirror.com/playwright-core/-/playwright-core-1.61.0.tgz",
|
||||||
|
"integrity": "sha512-caX7TrY3Ml6egyDX0WUcTHDxodl/b51y5wJOdCEA36QviK/s2g081hvmGs8eaE3DWb6NYZQ6BjO/QkNRPenoPA==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"bin": {
|
||||||
|
"playwright-core": "cli.js"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/playwright/node_modules/fsevents": {
|
||||||
|
"version": "2.3.2",
|
||||||
|
"resolved": "https://registry.npmmirror.com/fsevents/-/fsevents-2.3.2.tgz",
|
||||||
|
"integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
|
||||||
|
"dev": true,
|
||||||
|
"hasInstallScript": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"darwin"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/postcss": {
|
"node_modules/postcss": {
|
||||||
"version": "8.5.15",
|
"version": "8.5.15",
|
||||||
"resolved": "https://registry.npmmirror.com/postcss/-/postcss-8.5.15.tgz",
|
"resolved": "https://registry.npmmirror.com/postcss/-/postcss-8.5.15.tgz",
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,9 @@
|
||||||
"build": "vue-tsc --noEmit && vite build",
|
"build": "vue-tsc --noEmit && vite build",
|
||||||
"build:frontend": "vue-tsc --noEmit && vite build",
|
"build:frontend": "vue-tsc --noEmit && vite build",
|
||||||
"preview": "vite preview",
|
"preview": "vite preview",
|
||||||
"tauri": "tauri"
|
"tauri": "tauri",
|
||||||
|
"test:e2e": "playwright test",
|
||||||
|
"test:e2e:ui": "playwright test --ui"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@ant-design/icons-vue": "^7.0.0",
|
"@ant-design/icons-vue": "^7.0.0",
|
||||||
|
|
@ -27,6 +29,7 @@
|
||||||
"vue-router": "^4.4.0"
|
"vue-router": "^4.4.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
"@playwright/test": "^1.59.0",
|
||||||
"@tauri-apps/cli": "^2.11.2",
|
"@tauri-apps/cli": "^2.11.2",
|
||||||
"@types/dompurify": "^3.0.5",
|
"@types/dompurify": "^3.0.5",
|
||||||
"@types/markdown-it": "^14.1.2",
|
"@types/markdown-it": "^14.1.2",
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,80 @@
|
||||||
|
import { defineConfig, devices } from '@playwright/test'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Playwright E2E configuration for Fischer AgentKit frontend.
|
||||||
|
*
|
||||||
|
* Architecture:
|
||||||
|
* - Backend (uvicorn direct, avoids agentkit serve interactive prompts) runs on
|
||||||
|
* port 8000 to match the Vite dev-server proxy target in vite.config.ts.
|
||||||
|
* - Frontend (Vite dev server) runs on port 5173 (strictPort in vite.config.ts).
|
||||||
|
* - Tests target the frontend at http://localhost:5173; API/WS calls are
|
||||||
|
* transparently proxied to the backend.
|
||||||
|
*
|
||||||
|
* The `globalSetup` script creates a test admin user in the auth DB before
|
||||||
|
* any test runs, so login-based tests have valid credentials available.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Project root relative to this config file
|
||||||
|
// (src/agentkit/server/frontend/ → 4 levels up to project root)
|
||||||
|
const PROJECT_ROOT = '../../../..'
|
||||||
|
|
||||||
|
export default defineConfig({
|
||||||
|
testDir: './e2e',
|
||||||
|
fullyParallel: false,
|
||||||
|
forbidOnly: !!process.env.CI,
|
||||||
|
retries: process.env.CI ? 1 : 0,
|
||||||
|
workers: 1,
|
||||||
|
reporter: [['list'], ['html', { open: 'never' }]],
|
||||||
|
timeout: 90_000,
|
||||||
|
expect: { timeout: 15_000 },
|
||||||
|
globalSetup: './e2e/global-setup.ts',
|
||||||
|
|
||||||
|
use: {
|
||||||
|
baseURL: 'http://localhost:5173',
|
||||||
|
trace: 'on-first-retry',
|
||||||
|
screenshot: 'only-on-failure',
|
||||||
|
video: 'retain-on-failure',
|
||||||
|
actionTimeout: 15_000,
|
||||||
|
navigationTimeout: 30_000,
|
||||||
|
},
|
||||||
|
|
||||||
|
projects: [
|
||||||
|
{
|
||||||
|
name: 'chromium',
|
||||||
|
use: {
|
||||||
|
...devices['Desktop Chrome'],
|
||||||
|
// Use system Chrome to avoid slow browser downloads.
|
||||||
|
channel: 'chrome',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
|
||||||
|
webServer: [
|
||||||
|
{
|
||||||
|
// Use uvicorn directly — `agentkit serve` has Confirm.ask() prompts
|
||||||
|
// that fail in non-tty subprocess environments.
|
||||||
|
// Env vars set inline to avoid Playwright's env property replacing
|
||||||
|
// the entire process.env (which would lose PATH, API keys, etc.).
|
||||||
|
command:
|
||||||
|
'AGENTKIT_GUI_MODE=1 NO_PROXY=127.0.0.1,localhost no_proxy=127.0.0.1,localhost ' +
|
||||||
|
'python3 -c "import uvicorn; uvicorn.run(' +
|
||||||
|
"'agentkit.server.app:create_app', " +
|
||||||
|
"host='127.0.0.1', port=8000, factory=True)\"",
|
||||||
|
url: 'http://127.0.0.1:8000/api/v1/health',
|
||||||
|
cwd: PROJECT_ROOT,
|
||||||
|
reuseExistingServer: !process.env.CI,
|
||||||
|
timeout: 120_000,
|
||||||
|
stdout: 'pipe',
|
||||||
|
stderr: 'pipe',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
command: 'npm run dev',
|
||||||
|
url: 'http://localhost:5173',
|
||||||
|
cwd: '.',
|
||||||
|
reuseExistingServer: !process.env.CI,
|
||||||
|
timeout: 60_000,
|
||||||
|
stdout: 'pipe',
|
||||||
|
stderr: 'pipe',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
})
|
||||||
|
|
@ -7,12 +7,7 @@ import type {
|
||||||
IConversation,
|
IConversation,
|
||||||
IChatRequest,
|
IChatRequest,
|
||||||
WsClientMessage,
|
WsClientMessage,
|
||||||
IExpertTeamState,
|
WsServerMessage,
|
||||||
IBoardStartedData,
|
|
||||||
IExpertSpeechData,
|
|
||||||
IRoundSummaryData,
|
|
||||||
IUserInterventionData,
|
|
||||||
IBoardConcludedData,
|
|
||||||
} from '@/api/types'
|
} from '@/api/types'
|
||||||
|
|
||||||
function generateId(): string {
|
function generateId(): string {
|
||||||
|
|
@ -276,7 +271,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
|
|
||||||
socket.onmessage = (event: MessageEvent) => {
|
socket.onmessage = (event: MessageEvent) => {
|
||||||
try {
|
try {
|
||||||
const data = JSON.parse(event.data as string) as Record<string, unknown>
|
const data = JSON.parse(event.data as string) as WsServerMessage
|
||||||
console.log('[Chat WS] Received:', data.type, data)
|
console.log('[Chat WS] Received:', data.type, data)
|
||||||
handleWsMessage(data)
|
handleWsMessage(data)
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|
@ -403,17 +398,14 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
return _teamStore
|
return _teamStore
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: refactor to WsServerMessage union to eliminate `any`.
|
function handleWsMessage(data: WsServerMessage): void {
|
||||||
// This function predates the current VI redesign and touches many legacy branches.
|
// Discriminated union narrowing: each `case` branch narrows `data` to a
|
||||||
function handleWsMessage(data: Record<string, any>): void {
|
// specific variant of WsServerMessage, so typed fields can be accessed
|
||||||
// Backend sends nested data: {type, data: {...}}
|
// directly from `data` (or `data.data` for variants with a nested payload).
|
||||||
// Flatten for easier access
|
|
||||||
const payload = data.data ?? data
|
|
||||||
|
|
||||||
switch (data.type) {
|
switch (data.type) {
|
||||||
case 'connected': {
|
case 'connected': {
|
||||||
// Backend confirms conversation — update local ID if backend assigned a different one
|
// Backend confirms conversation — update local ID if backend assigned a different one
|
||||||
const serverConvId = data.conversation_id || payload.conversation_id
|
const serverConvId = data.conversation_id
|
||||||
if (serverConvId && serverConvId !== currentConversationId.value) {
|
if (serverConvId && serverConvId !== currentConversationId.value) {
|
||||||
// Rename the local conversation to match the server ID
|
// Rename the local conversation to match the server ID
|
||||||
const localId = currentConversationId.value
|
const localId = currentConversationId.value
|
||||||
|
|
@ -453,11 +445,12 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
const lastAssistantMsg = [...conv.messages]
|
const lastAssistantMsg = [...conv.messages]
|
||||||
.reverse()
|
.reverse()
|
||||||
.find((m) => m.role === 'assistant')
|
.find((m) => m.role === 'assistant')
|
||||||
const stepInfo = payload
|
const stepInfo = data.data
|
||||||
|
const innerData = stepInfo.data as Record<string, unknown>
|
||||||
const desc = stepInfo.event_type === 'final_answer'
|
const desc = stepInfo.event_type === 'final_answer'
|
||||||
? '生成最终回答'
|
? '生成最终回答'
|
||||||
: stepInfo.event_type === 'tool_call'
|
: stepInfo.event_type === 'tool_call'
|
||||||
? `调用工具: ${stepInfo.data?.tool_name || stepInfo.data?.name || '#'}`
|
? `调用工具: ${(innerData.tool_name || innerData.name || '#') as string}`
|
||||||
: stepInfo.event_type === 'thinking'
|
: stepInfo.event_type === 'thinking'
|
||||||
? '思考中...'
|
? '思考中...'
|
||||||
: `步骤 ${stepInfo.step || ''}: ${stepInfo.event_type || ''}`
|
: `步骤 ${stepInfo.step || ''}: ${stepInfo.event_type || ''}`
|
||||||
|
|
@ -469,11 +462,11 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
|
|
||||||
if (stepInfo.event_type === 'tool_call') {
|
if (stepInfo.event_type === 'tool_call') {
|
||||||
const tcId = `tc-${stepInfo.step || toolCalls.length}`
|
const tcId = `tc-${stepInfo.step || toolCalls.length}`
|
||||||
const toolName = stepInfo.data?.tool_name || stepInfo.data?.name || 'unknown'
|
const toolName = (innerData.tool_name || innerData.name || 'unknown') as string
|
||||||
const params = stepInfo.data?.arguments
|
const params = innerData.arguments
|
||||||
? (typeof stepInfo.data.arguments === 'string'
|
? (typeof innerData.arguments === 'string'
|
||||||
? stepInfo.data.arguments
|
? innerData.arguments
|
||||||
: JSON.stringify(stepInfo.data.arguments, null, 2))
|
: JSON.stringify(innerData.arguments, null, 2))
|
||||||
: undefined
|
: undefined
|
||||||
toolCalls.push({
|
toolCalls.push({
|
||||||
id: tcId,
|
id: tcId,
|
||||||
|
|
@ -486,20 +479,20 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
// Find the last running tool call and update it
|
// Find the last running tool call and update it
|
||||||
const lastRunning = [...toolCalls].reverse().find(tc => tc.status === 'running')
|
const lastRunning = [...toolCalls].reverse().find(tc => tc.status === 'running')
|
||||||
if (lastRunning) {
|
if (lastRunning) {
|
||||||
const resultStr = stepInfo.data?.output
|
const resultStr = innerData.output
|
||||||
? (typeof stepInfo.data.output === 'string'
|
? (typeof innerData.output === 'string'
|
||||||
? stepInfo.data.output
|
? innerData.output
|
||||||
: JSON.stringify(stepInfo.data.output, null, 2))
|
: JSON.stringify(innerData.output, null, 2))
|
||||||
: ''
|
: ''
|
||||||
lastRunning.status = stepInfo.data?.error ? 'error' : 'completed'
|
lastRunning.status = innerData.error ? 'error' : 'completed'
|
||||||
lastRunning.result = resultStr.length > 2000 ? resultStr.substring(0, 2000) + '...' : resultStr
|
lastRunning.result = resultStr.length > 2000 ? resultStr.substring(0, 2000) + '...' : resultStr
|
||||||
lastRunning.error = stepInfo.data?.error
|
lastRunning.error = innerData.error as string | undefined
|
||||||
lastRunning.duration = stepInfo.data?.duration
|
lastRunning.duration = innerData.duration as number | undefined
|
||||||
updateMessage(conversationId, lastAssistantMsg.id, { tool_calls: [...toolCalls] })
|
updateMessage(conversationId, lastAssistantMsg.id, { tool_calls: [...toolCalls] })
|
||||||
}
|
}
|
||||||
} else if (stepInfo.event_type === 'thinking') {
|
} else if (stepInfo.event_type === 'thinking') {
|
||||||
// Accumulate thinking content for ThinkingBlock rendering
|
// Accumulate thinking content for ThinkingBlock rendering
|
||||||
const thinkingChunk = stepInfo.data?.content || stepInfo.data?.thought || ''
|
const thinkingChunk = (innerData.content || innerData.thought || '') as string
|
||||||
if (thinkingChunk && lastAssistantMsg) {
|
if (thinkingChunk && lastAssistantMsg) {
|
||||||
updateMessage(conversationId, lastAssistantMsg.id, {
|
updateMessage(conversationId, lastAssistantMsg.id, {
|
||||||
thinking: (lastAssistantMsg.thinking || '') + thinkingChunk,
|
thinking: (lastAssistantMsg.thinking || '') + thinkingChunk,
|
||||||
|
|
@ -510,7 +503,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
|
|
||||||
// Accumulate final_answer content for streaming display
|
// Accumulate final_answer content for streaming display
|
||||||
if (stepInfo.event_type === 'final_answer' && lastAssistantMsg) {
|
if (stepInfo.event_type === 'final_answer' && lastAssistantMsg) {
|
||||||
const chunk = stepInfo.data?.output || ''
|
const chunk = (innerData.output || '') as string
|
||||||
if (chunk) {
|
if (chunk) {
|
||||||
updateMessage(conversationId, lastAssistantMsg.id, {
|
updateMessage(conversationId, lastAssistantMsg.id, {
|
||||||
content: (lastAssistantMsg.content || '') + chunk,
|
content: (lastAssistantMsg.content || '') + chunk,
|
||||||
|
|
@ -529,7 +522,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
.reverse()
|
.reverse()
|
||||||
.find((m) => m.role === 'assistant')
|
.find((m) => m.role === 'assistant')
|
||||||
// Backend sends: {type: "result", data: {message: "..."}} or {data: {status, content}}
|
// Backend sends: {type: "result", data: {message: "..."}} or {data: {status, content}}
|
||||||
const content = payload.message || payload.content || ''
|
const content = data.data.message || data.data.content || ''
|
||||||
if (lastAssistantMsg) {
|
if (lastAssistantMsg) {
|
||||||
// Only overwrite if we didn't already stream the content
|
// Only overwrite if we didn't already stream the content
|
||||||
const finalContent = content || lastAssistantMsg.content || ''
|
const finalContent = content || lastAssistantMsg.content || ''
|
||||||
|
|
@ -562,7 +555,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
updateMessage(conversationId, lastAssistantMsg.id, {
|
updateMessage(conversationId, lastAssistantMsg.id, {
|
||||||
message_type: 'error',
|
message_type: 'error',
|
||||||
status: 'error',
|
status: 'error',
|
||||||
error_detail: payload.message || '未知错误',
|
error_detail: data.data.message || '未知错误',
|
||||||
content: lastAssistantMsg.content || '',
|
content: lastAssistantMsg.content || '',
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -573,7 +566,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
status: 'error',
|
status: 'error',
|
||||||
message_type: 'error',
|
message_type: 'error',
|
||||||
error_detail: payload.message || '未知错误',
|
error_detail: data.data.message || '未知错误',
|
||||||
}
|
}
|
||||||
appendMessage(conversationId, errorMsg)
|
appendMessage(conversationId, errorMsg)
|
||||||
}
|
}
|
||||||
|
|
@ -585,9 +578,9 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
case 'team_formed': {
|
case 'team_formed': {
|
||||||
const teamStore = _getTeamStore()
|
const teamStore = _getTeamStore()
|
||||||
if (teamStore) {
|
if (teamStore) {
|
||||||
teamStore.setTeamState(payload as IExpertTeamState)
|
teamStore.setTeamState(data.data)
|
||||||
}
|
}
|
||||||
streamingSteps.value.push(`专家团队已组建: ${(payload as IExpertTeamState).experts.map((e) => e.name).join(', ')}`)
|
streamingSteps.value.push(`专家团队已组建: ${data.data.experts.map((e) => e.name).join(', ')}`)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -599,26 +592,26 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
// Dedup: append to existing expert message if one exists for this expert
|
// Dedup: append to existing expert message if one exists for this expert
|
||||||
const existingExpertMsg = [...conv.messages]
|
const existingExpertMsg = [...conv.messages]
|
||||||
.reverse()
|
.reverse()
|
||||||
.find((m) => m.expert_id === payload.expert_id && m.status === 'pending')
|
.find((m) => m.expert_id === data.data.expert_id && m.status === 'pending')
|
||||||
if (existingExpertMsg) {
|
if (existingExpertMsg) {
|
||||||
updateMessage(conversationId, existingExpertMsg.id, {
|
updateMessage(conversationId, existingExpertMsg.id, {
|
||||||
content: (existingExpertMsg.content || '') + (payload.content || ''),
|
content: (existingExpertMsg.content || '') + (data.data.content || ''),
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
const expertMsg: IChatMessage = {
|
const expertMsg: IChatMessage = {
|
||||||
id: generateId(),
|
id: generateId(),
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: payload.content || '',
|
content: data.data.content || '',
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
status: 'pending',
|
status: 'pending',
|
||||||
expert_id: payload.expert_id,
|
expert_id: data.data.expert_id,
|
||||||
expert_name: payload.expert_name,
|
expert_name: data.data.expert_name,
|
||||||
expert_color: payload.expert_color,
|
expert_color: data.data.expert_color,
|
||||||
message_type: 'chat',
|
message_type: 'chat',
|
||||||
}
|
}
|
||||||
appendMessage(conversationId, expertMsg)
|
appendMessage(conversationId, expertMsg)
|
||||||
}
|
}
|
||||||
streamingSteps.value.push(`${payload.expert_name}: 步骤 ${payload.step}`)
|
streamingSteps.value.push(`${data.data.expert_name}: 步骤 ${data.data.step}`)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -630,12 +623,12 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
const expertMsg: IChatMessage = {
|
const expertMsg: IChatMessage = {
|
||||||
id: generateId(),
|
id: generateId(),
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: payload.content || '',
|
content: data.data.content || '',
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
status: 'completed',
|
status: 'completed',
|
||||||
expert_id: payload.expert_id,
|
expert_id: data.data.expert_id,
|
||||||
expert_name: payload.expert_name,
|
expert_name: data.data.expert_name,
|
||||||
expert_color: payload.expert_color,
|
expert_color: data.data.expert_color,
|
||||||
message_type: 'chat',
|
message_type: 'chat',
|
||||||
}
|
}
|
||||||
appendMessage(conversationId, expertMsg)
|
appendMessage(conversationId, expertMsg)
|
||||||
|
|
@ -645,7 +638,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
case 'plan_update': {
|
case 'plan_update': {
|
||||||
const teamStore = _getTeamStore()
|
const teamStore = _getTeamStore()
|
||||||
if (teamStore) {
|
if (teamStore) {
|
||||||
teamStore.updatePhases(payload.plan_phases)
|
teamStore.updatePhases(data.data.plan_phases)
|
||||||
}
|
}
|
||||||
const conversationId = currentConversationId.value
|
const conversationId = currentConversationId.value
|
||||||
if (!conversationId) break
|
if (!conversationId) break
|
||||||
|
|
@ -656,7 +649,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
.find((m) => m.message_type === 'plan_update')
|
.find((m) => m.message_type === 'plan_update')
|
||||||
if (existingPlanMsg) {
|
if (existingPlanMsg) {
|
||||||
updateMessage(conversationId, existingPlanMsg.id, {
|
updateMessage(conversationId, existingPlanMsg.id, {
|
||||||
plan_phases: payload.plan_phases,
|
plan_phases: data.data.plan_phases,
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
const planMsg: IChatMessage = {
|
const planMsg: IChatMessage = {
|
||||||
|
|
@ -666,7 +659,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
status: 'completed',
|
status: 'completed',
|
||||||
message_type: 'plan_update',
|
message_type: 'plan_update',
|
||||||
plan_phases: payload.plan_phases,
|
plan_phases: data.data.plan_phases,
|
||||||
}
|
}
|
||||||
appendMessage(conversationId, planMsg)
|
appendMessage(conversationId, planMsg)
|
||||||
}
|
}
|
||||||
|
|
@ -681,7 +674,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
const synthesisMsg: IChatMessage = {
|
const synthesisMsg: IChatMessage = {
|
||||||
id: generateId(),
|
id: generateId(),
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: payload.content || '',
|
content: data.data.content || '',
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
status: 'completed',
|
status: 'completed',
|
||||||
message_type: 'milestone',
|
message_type: 'milestone',
|
||||||
|
|
@ -702,8 +695,8 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
case 'phase_started': {
|
case 'phase_started': {
|
||||||
const teamStore = _getTeamStore()
|
const teamStore = _getTeamStore()
|
||||||
if (teamStore?.teamState) {
|
if (teamStore?.teamState) {
|
||||||
teamStore.updatePhaseStatus(payload.phase_id, 'in_progress')
|
teamStore.updatePhaseStatus(data.data.phase_id, 'in_progress')
|
||||||
streamingSteps.value.push(`阶段开始: ${payload.phase_name} (${payload.assigned_expert})`)
|
streamingSteps.value.push(`阶段开始: ${data.data.phase_name} (${data.data.assigned_expert})`)
|
||||||
}
|
}
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
@ -711,8 +704,8 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
case 'phase_completed': {
|
case 'phase_completed': {
|
||||||
const teamStore = _getTeamStore()
|
const teamStore = _getTeamStore()
|
||||||
if (teamStore?.teamState) {
|
if (teamStore?.teamState) {
|
||||||
teamStore.updatePhaseStatus(payload.phase_id, 'completed', payload.result_summary)
|
teamStore.updatePhaseStatus(data.data.phase_id, 'completed', data.data.result_summary)
|
||||||
streamingSteps.value.push(`阶段完成: ${payload.phase_name}`)
|
streamingSteps.value.push(`阶段完成: ${data.data.phase_name}`)
|
||||||
}
|
}
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
@ -720,8 +713,8 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
case 'phase_failed': {
|
case 'phase_failed': {
|
||||||
const teamStore = _getTeamStore()
|
const teamStore = _getTeamStore()
|
||||||
if (teamStore?.teamState) {
|
if (teamStore?.teamState) {
|
||||||
teamStore.updatePhaseStatus(payload.phase_id, 'failed', payload.error)
|
teamStore.updatePhaseStatus(data.data.phase_id, 'failed', data.data.error)
|
||||||
streamingSteps.value.push(`阶段失败: ${payload.phase_name} - ${payload.error}`)
|
streamingSteps.value.push(`阶段失败: ${data.data.phase_name} - ${data.data.error}`)
|
||||||
}
|
}
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
@ -729,23 +722,23 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
// ── Board Meeting 模式事件 ────────────────────────────────────────
|
// ── Board Meeting 模式事件 ────────────────────────────────────────
|
||||||
|
|
||||||
case 'board_started': {
|
case 'board_started': {
|
||||||
const data = payload as IBoardStartedData
|
const boardData = data.data
|
||||||
// Initialize board state
|
// Initialize board state
|
||||||
boardState.value = {
|
boardState.value = {
|
||||||
topic: data.topic,
|
topic: boardData.topic,
|
||||||
experts: data.experts.map((e) => ({
|
experts: boardData.experts.map((e) => ({
|
||||||
name: e.name,
|
name: e.name,
|
||||||
avatar: e.avatar,
|
avatar: e.avatar,
|
||||||
color: e.color,
|
color: e.color,
|
||||||
is_moderator: e.is_moderator,
|
is_moderator: e.is_moderator,
|
||||||
persona: e.persona,
|
persona: e.persona,
|
||||||
})),
|
})),
|
||||||
max_rounds: data.max_rounds,
|
max_rounds: boardData.max_rounds,
|
||||||
current_round: 0,
|
current_round: 0,
|
||||||
status: 'discussing',
|
status: 'discussing',
|
||||||
}
|
}
|
||||||
streamingSteps.value.push(
|
streamingSteps.value.push(
|
||||||
`私董会已开启: 主题「${data.topic}」, ${data.experts.length} 位专家, 最多 ${data.max_rounds} 轮`
|
`私董会已开启: 主题「${boardData.topic}」, ${boardData.experts.length} 位专家, 最多 ${boardData.max_rounds} 轮`
|
||||||
)
|
)
|
||||||
// Push a structured banner message so the renderer can show BoardBannerCard
|
// Push a structured banner message so the renderer can show BoardBannerCard
|
||||||
const conversationId = currentConversationId.value
|
const conversationId = currentConversationId.value
|
||||||
|
|
@ -753,11 +746,11 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
const startMsg: IChatMessage = {
|
const startMsg: IChatMessage = {
|
||||||
id: generateId(),
|
id: generateId(),
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: `🏛️ 私董会开始:${data.topic}`,
|
content: `🏛️ 私董会开始:${boardData.topic}`,
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
status: 'completed',
|
status: 'completed',
|
||||||
message_type: 'board_started',
|
message_type: 'board_started',
|
||||||
board_started: data,
|
board_started: boardData,
|
||||||
board_round: 0,
|
board_round: 0,
|
||||||
}
|
}
|
||||||
appendMessage(conversationId, startMsg)
|
appendMessage(conversationId, startMsg)
|
||||||
|
|
@ -766,67 +759,67 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
}
|
}
|
||||||
|
|
||||||
case 'expert_speech': {
|
case 'expert_speech': {
|
||||||
const data = payload as IExpertSpeechData
|
const speechData = data.data
|
||||||
// Update current round in board state
|
// Update current round in board state
|
||||||
if (boardState.value && data.round > boardState.value.current_round) {
|
if (boardState.value && speechData.round > boardState.value.current_round) {
|
||||||
boardState.value.current_round = data.round
|
boardState.value.current_round = speechData.round
|
||||||
}
|
}
|
||||||
const conversationId = currentConversationId.value
|
const conversationId = currentConversationId.value
|
||||||
if (!conversationId) break
|
if (!conversationId) break
|
||||||
const speechMsg: IChatMessage = {
|
const speechMsg: IChatMessage = {
|
||||||
id: generateId(),
|
id: generateId(),
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: data.content || '',
|
content: speechData.content || '',
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
status: 'completed',
|
status: 'completed',
|
||||||
expert_name: data.expert_name,
|
expert_name: speechData.expert_name,
|
||||||
expert_color: data.expert_color,
|
expert_color: speechData.expert_color,
|
||||||
expert_avatar: data.expert_avatar,
|
expert_avatar: speechData.expert_avatar,
|
||||||
message_type: 'board_speech',
|
message_type: 'board_speech',
|
||||||
board_round: data.round,
|
board_round: speechData.round,
|
||||||
board_role: data.role,
|
board_role: speechData.role,
|
||||||
}
|
}
|
||||||
appendMessage(conversationId, speechMsg)
|
appendMessage(conversationId, speechMsg)
|
||||||
streamingSteps.value.push(
|
streamingSteps.value.push(
|
||||||
`${data.expert_avatar} ${data.expert_name} (第${data.round}轮${data.role === 'moderator' ? '·主持' : ''})`
|
`${speechData.expert_avatar} ${speechData.expert_name} (第${speechData.round}轮${speechData.role === 'moderator' ? '·主持' : ''})`
|
||||||
)
|
)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
case 'round_summary': {
|
case 'round_summary': {
|
||||||
const data = payload as IRoundSummaryData
|
const summaryData = data.data
|
||||||
const conversationId = currentConversationId.value
|
const conversationId = currentConversationId.value
|
||||||
if (!conversationId) break
|
if (!conversationId) break
|
||||||
const summaryMsg: IChatMessage = {
|
const summaryMsg: IChatMessage = {
|
||||||
id: generateId(),
|
id: generateId(),
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: data.content || '',
|
content: summaryData.content || '',
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
status: 'completed',
|
status: 'completed',
|
||||||
expert_name: data.moderator_name,
|
expert_name: summaryData.moderator_name,
|
||||||
message_type: 'board_summary',
|
message_type: 'board_summary',
|
||||||
board_round: data.round,
|
board_round: summaryData.round,
|
||||||
board_role: 'summary',
|
board_role: 'summary',
|
||||||
}
|
}
|
||||||
appendMessage(conversationId, summaryMsg)
|
appendMessage(conversationId, summaryMsg)
|
||||||
streamingSteps.value.push(`第${data.round}轮小结${data.continue ? '(继续讨论)' : '(即将结束)'}`)
|
streamingSteps.value.push(`第${summaryData.round}轮小结${summaryData.continue ? '(继续讨论)' : '(即将结束)'}`)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
case 'user_intervention': {
|
case 'user_intervention': {
|
||||||
const data = payload as IUserInterventionData
|
const interventionData = data.data
|
||||||
streamingSteps.value.push(`用户干预: ${data.content.slice(0, 50)}...`)
|
streamingSteps.value.push(`用户干预: ${interventionData.content.slice(0, 50)}...`)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
case 'board_concluded': {
|
case 'board_concluded': {
|
||||||
const data = payload as IBoardConcludedData
|
const conclusionData = data.data
|
||||||
// Update board state to completed
|
// Update board state to completed
|
||||||
if (boardState.value) {
|
if (boardState.value) {
|
||||||
boardState.value.status = 'completed'
|
boardState.value.status = 'completed'
|
||||||
}
|
}
|
||||||
streamingSteps.value.push(
|
streamingSteps.value.push(
|
||||||
`私董会结束: ${data.total_rounds} 轮讨论${data.error ? ' (异常)' : ''}`
|
`私董会结束: ${conclusionData.total_rounds} 轮讨论${conclusionData.error ? ' (异常)' : ''}`
|
||||||
)
|
)
|
||||||
// Push a structured conclusion message so the renderer can show BoardConclusionCard
|
// Push a structured conclusion message so the renderer can show BoardConclusionCard
|
||||||
const conversationId = currentConversationId.value
|
const conversationId = currentConversationId.value
|
||||||
|
|
@ -834,12 +827,12 @@ export const useChatStore = defineStore('chat', () => {
|
||||||
const conclusionMsg: IChatMessage = {
|
const conclusionMsg: IChatMessage = {
|
||||||
id: generateId(),
|
id: generateId(),
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: data.summary || '私董会已结束',
|
content: conclusionData.summary || '私董会已结束',
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
status: 'completed',
|
status: 'completed',
|
||||||
message_type: 'board_conclusion',
|
message_type: 'board_conclusion',
|
||||||
board_conclusion: data,
|
board_conclusion: conclusionData,
|
||||||
board_round: data.total_rounds,
|
board_round: conclusionData.total_rounds,
|
||||||
}
|
}
|
||||||
appendMessage(conversationId, conclusionMsg)
|
appendMessage(conversationId, conclusionMsg)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,11 +1,11 @@
|
||||||
# AgentKit 能力基准测试报告
|
# AgentKit 能力基准测试报告
|
||||||
|
|
||||||
## 测试概要
|
## 测试概要
|
||||||
- 时间: 2026-06-17T15:47:33.591101+00:00
|
- 时间: 2026-06-20T03:18:35.937935+00:00
|
||||||
- 版本: 0.1.0
|
- 版本: 0.1.0
|
||||||
- 模式: mock
|
- 模式: llm
|
||||||
- 运行次数: 1
|
- 运行次数: 1
|
||||||
- 总体准确率: 100.0% ± 0.0%
|
- 总体准确率: 60.0% ± 0.0%
|
||||||
|
|
||||||
## 与行业 Benchmark 对比
|
## 与行业 Benchmark 对比
|
||||||
|
|
||||||
|
|
@ -17,252 +17,47 @@
|
||||||
|
|
||||||
## 维度结果
|
## 维度结果
|
||||||
|
|
||||||
### 1. 预处理准确度 (Preprocessing Accuracy) [Mock]
|
### 9. LLM 推理能力 (LLM Reasoning) [LLM]
|
||||||
|
|
||||||
| 指标 | 值 |
|
| 指标 | 值 |
|
||||||
|---|---|
|
|---|---|
|
||||||
| Accuracy | 100.0% ± 0.0% |
|
| Accuracy | 60.0% ± 0.0% |
|
||||||
| 95% CI | [79.6%, 100.0%] |
|
| 95% CI | [23.1%, 88.2%] |
|
||||||
| Precision | 100.0% |
|
|
||||||
| Recall | 100.0% |
|
|
||||||
| F1 | 100.0% |
|
|
||||||
| Latency p50 | 0.01ms |
|
|
||||||
| Latency p95 | 0.07ms |
|
|
||||||
| Latency p99 | 0.11ms |
|
|
||||||
| Consistency | 100.0% |
|
|
||||||
| Total / Pass / Fail | 15 / 15 / 0 |
|
|
||||||
|
|
||||||
#### 按类别分布
|
|
||||||
|
|
||||||
| 类别 | 用例数 | 通过 | 准确率 |
|
|
||||||
|---|---|---|---|
|
|
||||||
| greeting | 4 | 4 | 100.0% |
|
|
||||||
| tool_query | 5 | 5 | 100.0% |
|
|
||||||
| skill_prefix | 3 | 3 | 100.0% |
|
|
||||||
| complex | 3 | 3 | 100.0% |
|
|
||||||
|
|
||||||
#### 按难度分布
|
|
||||||
|
|
||||||
| 难度 | 用例数 | 通过 | 准确率 |
|
|
||||||
|---|---|---|---|
|
|
||||||
| easy | 5 | 5 | 100.0% |
|
|
||||||
| medium | 7 | 7 | 100.0% |
|
|
||||||
| hard | 3 | 3 | 100.0% |
|
|
||||||
|
|
||||||
### 2. 过拟合检测 (Overfitting Detection) [Mock]
|
|
||||||
|
|
||||||
| 指标 | 值 |
|
|
||||||
|---|---|
|
|
||||||
| Accuracy | 100.0% ± 0.0% |
|
|
||||||
| 95% CI | [56.5%, 100.0%] |
|
|
||||||
| Precision | 100.0% |
|
|
||||||
| Recall | 100.0% |
|
|
||||||
| F1 | 100.0% |
|
|
||||||
| Latency p50 | 0.01ms |
|
|
||||||
| Latency p95 | 0.03ms |
|
|
||||||
| Latency p99 | 0.03ms |
|
|
||||||
| Consistency | 100.0% |
|
|
||||||
| Total / Pass / Fail | 5 / 5 / 0 |
|
|
||||||
|
|
||||||
#### 按类别分布
|
|
||||||
|
|
||||||
| 类别 | 用例数 | 通过 | 准确率 |
|
|
||||||
|---|---|---|---|
|
|
||||||
| ip_check | 1 | 1 | 100.0% |
|
|
||||||
| search | 1 | 1 | 100.0% |
|
|
||||||
| greeting | 1 | 1 | 100.0% |
|
|
||||||
| tool_use | 1 | 1 | 100.0% |
|
|
||||||
| complex | 1 | 1 | 100.0% |
|
|
||||||
|
|
||||||
#### 按难度分布
|
|
||||||
|
|
||||||
| 难度 | 用例数 | 通过 | 准确率 |
|
|
||||||
|---|---|---|---|
|
|
||||||
| medium | 3 | 3 | 100.0% |
|
|
||||||
| easy | 1 | 1 | 100.0% |
|
|
||||||
| hard | 1 | 1 | 100.0% |
|
|
||||||
|
|
||||||
### 3. 效率测试 (Efficiency) [Mock]
|
|
||||||
|
|
||||||
| 指标 | 值 |
|
|
||||||
|---|---|
|
|
||||||
| Accuracy | 100.0% ± 0.0% |
|
|
||||||
| 95% CI | [56.5%, 100.0%] |
|
|
||||||
| Precision | 0.0% |
|
| Precision | 0.0% |
|
||||||
| Recall | 0.0% |
|
| Recall | 0.0% |
|
||||||
| F1 | 0.0% |
|
| F1 | 0.0% |
|
||||||
| Latency p50 | 0.33ms |
|
| Latency p50 | 35309.32ms |
|
||||||
| Latency p95 | 0.64ms |
|
| Latency p95 | 41704.39ms |
|
||||||
| Latency p99 | 0.67ms |
|
| Latency p99 | 42044.76ms |
|
||||||
| Consistency | 100.0% |
|
| Consistency | 100.0% |
|
||||||
| Total / Pass / Fail | 5 / 5 / 0 |
|
| Total / Pass / Fail | 5 / 3 / 2 |
|
||||||
|
|
||||||
#### 按类别分布
|
#### 按类别分布
|
||||||
|
|
||||||
| 类别 | 用例数 | 通过 | 准确率 |
|
| 类别 | 用例数 | 通过 | 准确率 |
|
||||||
|---|---|---|---|
|
|---|---|---|---|
|
||||||
| preprocess_latency | 3 | 3 | 100.0% |
|
| intent_understanding | 1 | 0 | 0.0% |
|
||||||
| tool_search_latency | 2 | 2 | 100.0% |
|
| tool_selection | 1 | 1 | 100.0% |
|
||||||
|
| multi_step | 1 | 1 | 100.0% |
|
||||||
|
| code_generation | 1 | 0 | 0.0% |
|
||||||
|
| error_recovery | 1 | 1 | 100.0% |
|
||||||
|
|
||||||
#### 按难度分布
|
#### 按难度分布
|
||||||
|
|
||||||
| 难度 | 用例数 | 通过 | 准确率 |
|
| 难度 | 用例数 | 通过 | 准确率 |
|
||||||
|---|---|---|---|
|
|---|---|---|---|
|
||||||
| easy | 2 | 2 | 100.0% |
|
| easy | 1 | 0 | 0.0% |
|
||||||
| medium | 3 | 3 | 100.0% |
|
| medium | 2 | 1 | 50.0% |
|
||||||
|
| hard | 2 | 2 | 100.0% |
|
||||||
|
|
||||||
### 4. 工具搜索 (Tool Search) [Mock]
|
#### 失败用例分析
|
||||||
|
|
||||||
| 指标 | 值 |
|
| 用例 ID | 类别 | 难度 | 期望 | 实际 | 根因 |
|
||||||
|---|---|
|
|---|---|---|---|---|---|
|
||||||
| Accuracy | 100.0% ± 0.0% |
|
| llm-001 | intent_understanding | easy | react | timeout | timeout |
|
||||||
| 95% CI | [72.2%, 100.0%] |
|
| llm-004 | code_generation | medium | react | timeout | timeout |
|
||||||
| Precision | 83.3% |
|
|
||||||
| Recall | 83.3% |
|
|
||||||
| F1 | 83.3% |
|
|
||||||
| Latency p50 | 0.01ms |
|
|
||||||
| Latency p95 | 0.02ms |
|
|
||||||
| Latency p99 | 0.02ms |
|
|
||||||
| Consistency | 100.0% |
|
|
||||||
| Total / Pass / Fail | 10 / 10 / 0 |
|
|
||||||
|
|
||||||
#### 按类别分布
|
|
||||||
|
|
||||||
| 类别 | 用例数 | 通过 | 准确率 |
|
|
||||||
|---|---|---|---|
|
|
||||||
| exact_match | 5 | 5 | 100.0% |
|
|
||||||
| fuzzy_match | 2 | 2 | 100.0% |
|
|
||||||
| no_match | 2 | 2 | 100.0% |
|
|
||||||
| top_k | 1 | 1 | 100.0% |
|
|
||||||
|
|
||||||
#### 按难度分布
|
|
||||||
|
|
||||||
| 难度 | 用例数 | 通过 | 准确率 |
|
|
||||||
|---|---|---|---|
|
|
||||||
| easy | 7 | 7 | 100.0% |
|
|
||||||
| medium | 3 | 3 | 100.0% |
|
|
||||||
|
|
||||||
### 5. 事件模型 (Event Model) [Mock]
|
|
||||||
|
|
||||||
| 指标 | 值 |
|
|
||||||
|---|---|
|
|
||||||
| Accuracy | 100.0% ± 0.0% |
|
|
||||||
| 95% CI | [61.0%, 100.0%] |
|
|
||||||
| Precision | 0.0% |
|
|
||||||
| Recall | 0.0% |
|
|
||||||
| F1 | 0.0% |
|
|
||||||
| Latency p50 | 0.05ms |
|
|
||||||
| Latency p95 | 15.87ms |
|
|
||||||
| Latency p99 | 20.08ms |
|
|
||||||
| Consistency | 100.0% |
|
|
||||||
| Total / Pass / Fail | 6 / 6 / 0 |
|
|
||||||
|
|
||||||
#### 按类别分布
|
|
||||||
|
|
||||||
| 类别 | 用例数 | 通过 | 准确率 |
|
|
||||||
|---|---|---|---|
|
|
||||||
| sq_lifecycle | 3 | 3 | 100.0% |
|
|
||||||
| eq_lifecycle | 3 | 3 | 100.0% |
|
|
||||||
|
|
||||||
#### 按难度分布
|
|
||||||
|
|
||||||
| 难度 | 用例数 | 通过 | 准确率 |
|
|
||||||
|---|---|---|---|
|
|
||||||
| easy | 6 | 6 | 100.0% |
|
|
||||||
|
|
||||||
### 6. 规格管理 (Spec Management) [Mock]
|
|
||||||
|
|
||||||
| 指标 | 值 |
|
|
||||||
|---|---|
|
|
||||||
| Accuracy | 100.0% ± 0.0% |
|
|
||||||
| 95% CI | [64.6%, 100.0%] |
|
|
||||||
| Precision | 0.0% |
|
|
||||||
| Recall | 0.0% |
|
|
||||||
| F1 | 0.0% |
|
|
||||||
| Latency p50 | 1.94ms |
|
|
||||||
| Latency p95 | 2.94ms |
|
|
||||||
| Latency p99 | 3.25ms |
|
|
||||||
| Consistency | 100.0% |
|
|
||||||
| Total / Pass / Fail | 7 / 7 / 0 |
|
|
||||||
|
|
||||||
#### 按类别分布
|
|
||||||
|
|
||||||
| 类别 | 用例数 | 通过 | 准确率 |
|
|
||||||
|---|---|---|---|
|
|
||||||
| crud | 5 | 5 | 100.0% |
|
|
||||||
| edge | 2 | 2 | 100.0% |
|
|
||||||
|
|
||||||
#### 按难度分布
|
|
||||||
|
|
||||||
| 难度 | 用例数 | 通过 | 准确率 |
|
|
||||||
|---|---|---|---|
|
|
||||||
| easy | 6 | 6 | 100.0% |
|
|
||||||
| medium | 1 | 1 | 100.0% |
|
|
||||||
|
|
||||||
### 7. 验证循环 (Verification Loop) [Mock]
|
|
||||||
|
|
||||||
| 指标 | 值 |
|
|
||||||
|---|---|
|
|
||||||
| Accuracy | 100.0% ± 0.0% |
|
|
||||||
| 95% CI | [56.5%, 100.0%] |
|
|
||||||
| Precision | 0.0% |
|
|
||||||
| Recall | 0.0% |
|
|
||||||
| F1 | 0.0% |
|
|
||||||
| Latency p50 | 22.22ms |
|
|
||||||
| Latency p95 | 47.79ms |
|
|
||||||
| Latency p99 | 50.93ms |
|
|
||||||
| Consistency | 100.0% |
|
|
||||||
| Total / Pass / Fail | 5 / 5 / 0 |
|
|
||||||
|
|
||||||
#### 按类别分布
|
|
||||||
|
|
||||||
| 类别 | 用例数 | 通过 | 准确率 |
|
|
||||||
|---|---|---|---|
|
|
||||||
| basic | 2 | 2 | 100.0% |
|
|
||||||
| retry | 1 | 1 | 100.0% |
|
|
||||||
| timeout | 1 | 1 | 100.0% |
|
|
||||||
| multi | 1 | 1 | 100.0% |
|
|
||||||
|
|
||||||
#### 按难度分布
|
|
||||||
|
|
||||||
| 难度 | 用例数 | 通过 | 准确率 |
|
|
||||||
|---|---|---|---|
|
|
||||||
| easy | 2 | 2 | 100.0% |
|
|
||||||
| medium | 3 | 3 | 100.0% |
|
|
||||||
|
|
||||||
### 8. 私董会路由 (Board Meeting Routing) [Mock]
|
|
||||||
|
|
||||||
| 指标 | 值 |
|
|
||||||
|---|---|
|
|
||||||
| Accuracy | 100.0% ± 0.0% |
|
|
||||||
| 95% CI | [82.4%, 100.0%] |
|
|
||||||
| Precision | 100.0% |
|
|
||||||
| Recall | 100.0% |
|
|
||||||
| F1 | 100.0% |
|
|
||||||
| Latency p50 | 0.01ms |
|
|
||||||
| Latency p95 | 0.39ms |
|
|
||||||
| Latency p99 | 1.19ms |
|
|
||||||
| Consistency | 100.0% |
|
|
||||||
| Total / Pass / Fail | 18 / 18 / 0 |
|
|
||||||
|
|
||||||
#### 按类别分布
|
|
||||||
|
|
||||||
| 类别 | 用例数 | 通过 | 准确率 |
|
|
||||||
|---|---|---|---|
|
|
||||||
| default_template | 3 | 3 | 100.0% |
|
|
||||||
| explicit_experts | 3 | 3 | 100.0% |
|
|
||||||
| topic_extraction | 3 | 3 | 100.0% |
|
|
||||||
| no_match | 3 | 3 | 100.0% |
|
|
||||||
| name_validation | 3 | 3 | 100.0% |
|
|
||||||
| stop_command | 3 | 3 | 100.0% |
|
|
||||||
|
|
||||||
#### 按难度分布
|
|
||||||
|
|
||||||
| 难度 | 用例数 | 通过 | 准确率 |
|
|
||||||
|---|---|---|---|
|
|
||||||
| easy | 11 | 11 | 100.0% |
|
|
||||||
| medium | 7 | 7 | 100.0% |
|
|
||||||
|
|
||||||
## 问题总结与改进建议
|
## 问题总结与改进建议
|
||||||
|
|
||||||
- 所有维度表现良好,无需特别改进。
|
- **llm_reasoning**: 准确率 60.0% 低于 90%,建议检查失败用例并优化
|
||||||
|
- **llm_reasoning**: P95 延迟 41704.39ms 较高,建议优化性能
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,636 @@
|
||||||
|
"""Real LLM E2E tests — tests against a live server with real LLM providers.
|
||||||
|
|
||||||
|
These tests start a real AgentKit server using the project's ``agentkit.yaml``
|
||||||
|
configuration and make actual LLM API calls to Bailian (DashScope).
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
- ``DASHSCOPE_API_KEY`` environment variable (loaded from ``.env``)
|
||||||
|
- Network access to ``https://coding.dashscope.aliyuncs.com/v1``
|
||||||
|
|
||||||
|
Run with::
|
||||||
|
|
||||||
|
.venv/bin/python -m pytest tests/e2e/test_real_llm_e2e.py -v --timeout=180
|
||||||
|
|
||||||
|
All tests are marked with ``@pytest.mark.integration`` so they are excluded
|
||||||
|
from the default unit-test run (``pytest -m "not integration"``).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Generator
|
||||||
|
|
||||||
|
import aiosqlite
|
||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
# Disable HTTP proxies for localhost requests (Clash/V2Ray intercepts localhost).
|
||||||
|
os.environ["NO_PROXY"] = "127.0.0.1,localhost"
|
||||||
|
os.environ["no_proxy"] = "127.0.0.1,localhost"
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Constants
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
||||||
|
|
||||||
|
REAL_LLM_HOST = "127.0.0.1"
|
||||||
|
REAL_LLM_PORT = 18766 # dedicated port to avoid conflict with mock E2E (18765)
|
||||||
|
REAL_LLM_BASE_URL = f"http://{REAL_LLM_HOST}:{REAL_LLM_PORT}"
|
||||||
|
REAL_LLM_WS_URL = f"ws://{REAL_LLM_HOST}:{REAL_LLM_PORT}"
|
||||||
|
|
||||||
|
# Fixed JWT secret so tokens are deterministic across the session.
|
||||||
|
TEST_JWT_SECRET = "test-jwt-secret-for-real-llm-e2e-fixed-do-not-use-in-prod"
|
||||||
|
|
||||||
|
# Test user credentials (created directly in the auth DB).
|
||||||
|
TEST_USERNAME = "real_llm_e2e_user"
|
||||||
|
TEST_PASSWORD = "TestPassword123!@#"
|
||||||
|
TEST_EMAIL = "real_llm_e2e@example.com"
|
||||||
|
|
||||||
|
# Model alias from agentkit.yaml (resolves to bailian-coding/qwen3.7-plus).
|
||||||
|
TEST_MODEL = "default"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# .env loading
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def _load_dotenv_vars(dotenv_path: Path) -> dict[str, str]:
|
||||||
|
"""Load env vars from a .env file into a dict (does not touch os.environ)."""
|
||||||
|
env_vars: dict[str, str] = {}
|
||||||
|
if not dotenv_path.exists():
|
||||||
|
return env_vars
|
||||||
|
with open(dotenv_path, encoding="utf-8") as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith("#"):
|
||||||
|
continue
|
||||||
|
if "=" not in line:
|
||||||
|
continue
|
||||||
|
key, _, value = line.partition("=")
|
||||||
|
key = key.strip()
|
||||||
|
value = value.strip().strip("\"'")
|
||||||
|
if key:
|
||||||
|
env_vars[key] = value
|
||||||
|
return env_vars
|
||||||
|
|
||||||
|
|
||||||
|
def _has_dashscope_key() -> bool:
|
||||||
|
"""Return True if DASHSCOPE_API_KEY is available (env or .env file)."""
|
||||||
|
if os.environ.get("DASHSCOPE_API_KEY"):
|
||||||
|
return True
|
||||||
|
dotenv_vars = _load_dotenv_vars(PROJECT_ROOT / ".env")
|
||||||
|
return bool(dotenv_vars.get("DASHSCOPE_API_KEY"))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test user creation
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def _create_test_user(auth_db_path: Path) -> None:
|
||||||
|
"""Create the test user directly in the SQLite auth DB.
|
||||||
|
|
||||||
|
Uses bcrypt hashing (rounds=12) via the project's password utility so the
|
||||||
|
``/auth/login`` route can verify the password.
|
||||||
|
"""
|
||||||
|
from agentkit.server.auth.models import init_auth_db
|
||||||
|
from agentkit.server.auth.password import hash_password
|
||||||
|
|
||||||
|
# Ensure the schema exists.
|
||||||
|
asyncio.run(init_auth_db(auth_db_path))
|
||||||
|
|
||||||
|
user_id = str(uuid.uuid4())
|
||||||
|
password_hash = hash_password(TEST_PASSWORD)
|
||||||
|
now_iso = datetime.now(timezone.utc).isoformat()
|
||||||
|
|
||||||
|
async def _insert() -> None:
|
||||||
|
async with aiosqlite.connect(str(auth_db_path)) as db:
|
||||||
|
# Remove any stale row from a previous run.
|
||||||
|
await db.execute("DELETE FROM users WHERE username = ?", (TEST_USERNAME,))
|
||||||
|
await db.execute(
|
||||||
|
"INSERT INTO users "
|
||||||
|
"(id, username, email, password_hash, role, is_active, "
|
||||||
|
" is_terminal_authorized, is_server_terminal_authorized, "
|
||||||
|
" created_at, updated_at) "
|
||||||
|
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||||
|
(
|
||||||
|
user_id,
|
||||||
|
TEST_USERNAME,
|
||||||
|
TEST_EMAIL,
|
||||||
|
password_hash,
|
||||||
|
"admin", # admin role → full access for tests
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
now_iso,
|
||||||
|
now_iso,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
asyncio.run(_insert())
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Session-scoped server fixture
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def real_llm_server(
|
||||||
|
tmp_path_factory: pytest.TempPathFactory,
|
||||||
|
) -> Generator[tuple[str, Path], None, None]:
|
||||||
|
"""Start a real AgentKit server with actual LLM providers.
|
||||||
|
|
||||||
|
Yields ``(base_url, auth_db_path)``. The server uses the project root's
|
||||||
|
``agentkit.yaml`` (Bailian coding plan) — no mock providers.
|
||||||
|
|
||||||
|
Skips the entire session if ``DASHSCOPE_API_KEY`` is not available.
|
||||||
|
"""
|
||||||
|
if not _has_dashscope_key():
|
||||||
|
pytest.skip("DASHSCOPE_API_KEY not set — skipping real LLM E2E tests")
|
||||||
|
|
||||||
|
tmp_path = tmp_path_factory.mktemp("real_llm_server")
|
||||||
|
auth_db_path = tmp_path / "auth.db"
|
||||||
|
|
||||||
|
# Build subprocess environment.
|
||||||
|
env = os.environ.copy()
|
||||||
|
|
||||||
|
# Disable HTTP proxies so localhost requests don't go through Clash/V2Ray.
|
||||||
|
for proxy_var in ("HTTP_PROXY", "HTTPS_PROXY", "http_proxy", "https_proxy", "ALL_PROXY", "all_proxy"):
|
||||||
|
env.pop(proxy_var, None)
|
||||||
|
env["NO_PROXY"] = "127.0.0.1,localhost"
|
||||||
|
env["no_proxy"] = "127.0.0.1,localhost"
|
||||||
|
|
||||||
|
# Ensure API keys from .env are available to the subprocess.
|
||||||
|
dotenv_vars = _load_dotenv_vars(PROJECT_ROOT / ".env")
|
||||||
|
for key, value in dotenv_vars.items():
|
||||||
|
if not env.get(key):
|
||||||
|
env[key] = value
|
||||||
|
|
||||||
|
# Auth configuration.
|
||||||
|
env["AGENTKIT_JWT_SECRET"] = TEST_JWT_SECRET
|
||||||
|
env["AGENTKIT_AUTH_DB"] = str(auth_db_path)
|
||||||
|
|
||||||
|
# GUI mode creates a default chat agent (needed for chat / WebSocket tests).
|
||||||
|
env["AGENTKIT_GUI_MODE"] = "1"
|
||||||
|
|
||||||
|
# Explicit config path (also auto-discovered via CWD, but set explicitly).
|
||||||
|
config_path = PROJECT_ROOT / "agentkit.yaml"
|
||||||
|
env["AGENTKIT_CONFIG_PATH"] = str(config_path)
|
||||||
|
|
||||||
|
# Start the server via uvicorn directly (agentkit serve has interactive
|
||||||
|
# prompts that fail in non-tty subprocess environments).
|
||||||
|
# Redirect stderr to a file so we can read server logs on test failures.
|
||||||
|
stderr_log = tmp_path / "server_stderr.log"
|
||||||
|
stderr_fh = open(stderr_log, "w", encoding="utf-8")
|
||||||
|
proc = subprocess.Popen(
|
||||||
|
[
|
||||||
|
sys.executable,
|
||||||
|
"-c",
|
||||||
|
"import uvicorn; uvicorn.run("
|
||||||
|
"'agentkit.server.app:create_app', "
|
||||||
|
f"host='{REAL_LLM_HOST}', port={REAL_LLM_PORT}, factory=True)",
|
||||||
|
],
|
||||||
|
env=env,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=stderr_fh,
|
||||||
|
cwd=str(PROJECT_ROOT),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Wait for the server to become healthy (max 60s — real LLM server
|
||||||
|
# initialization is slower than the mock E2E server).
|
||||||
|
base_url = REAL_LLM_BASE_URL
|
||||||
|
deadline = time.monotonic() + 60
|
||||||
|
ready = False
|
||||||
|
while time.monotonic() < deadline:
|
||||||
|
if proc.poll() is not None:
|
||||||
|
# Process exited early — capture output for diagnostics.
|
||||||
|
stdout, stderr = proc.communicate(timeout=5)
|
||||||
|
pytest.fail(
|
||||||
|
"Real LLM server exited early.\n"
|
||||||
|
f"stdout: {stdout.decode()[:2000] if stdout else ''}\n"
|
||||||
|
f"stderr: {stderr.decode()[:2000] if stderr else ''}"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
resp = httpx.get(f"{base_url}/api/v1/health", timeout=2)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
ready = True
|
||||||
|
break
|
||||||
|
except httpx.ConnectError:
|
||||||
|
pass
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
if not ready:
|
||||||
|
proc.terminate()
|
||||||
|
try:
|
||||||
|
stdout, stderr = proc.communicate(timeout=5)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
proc.kill()
|
||||||
|
stdout, stderr = proc.communicate()
|
||||||
|
pytest.fail(
|
||||||
|
"Real LLM server failed to start within 60s.\n"
|
||||||
|
f"stdout: {stdout.decode()[:2000] if stdout else ''}\n"
|
||||||
|
f"stderr: {stderr.decode()[:2000] if stderr else ''}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create the test user now that the server (and auth DB schema) is up.
|
||||||
|
_create_test_user(auth_db_path)
|
||||||
|
|
||||||
|
yield base_url, auth_db_path
|
||||||
|
|
||||||
|
# Teardown — terminate the server process.
|
||||||
|
proc.terminate()
|
||||||
|
try:
|
||||||
|
proc.wait(timeout=10)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
proc.kill()
|
||||||
|
proc.wait()
|
||||||
|
stderr_fh.close()
|
||||||
|
|
||||||
|
# If the server logged any errors, print them for debugging.
|
||||||
|
if stderr_log.exists():
|
||||||
|
log_content = stderr_log.read_text(encoding="utf-8", errors="replace")
|
||||||
|
if "Error" in log_content or "Traceback" in log_content:
|
||||||
|
print(f"\n--- Server stderr log ---\n{log_content[-3000:]}\n--- End server log ---")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Convenience fixtures derived from real_llm_server
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def base_url(real_llm_server: tuple[str, Path]) -> str:
|
||||||
|
return real_llm_server[0]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def auth_db_path(real_llm_server: tuple[str, Path]) -> Path:
|
||||||
|
return real_llm_server[1]
|
||||||
|
|
||||||
|
|
||||||
|
def _login_with_retry(
|
||||||
|
base_url: str, max_retries: int = 3, delay: float = 1.0
|
||||||
|
) -> httpx.Response:
|
||||||
|
"""Login with retry on 500 (transient SQLite write-lock contention)."""
|
||||||
|
with httpx.Client(base_url=base_url, timeout=30) as client:
|
||||||
|
for attempt in range(max_retries):
|
||||||
|
resp = client.post(
|
||||||
|
"/api/v1/auth/login",
|
||||||
|
json={"username": TEST_USERNAME, "password": TEST_PASSWORD},
|
||||||
|
)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
return resp
|
||||||
|
if resp.status_code == 500 and attempt < max_retries - 1:
|
||||||
|
time.sleep(delay)
|
||||||
|
continue
|
||||||
|
return resp
|
||||||
|
return resp # type: ignore[possibly-undefined]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def auth_token(base_url: str) -> str:
|
||||||
|
"""Log in once per session and return the access token."""
|
||||||
|
resp = _login_with_retry(base_url)
|
||||||
|
assert resp.status_code == 200, (
|
||||||
|
f"Login failed: {resp.status_code} {resp.text[:1000]}"
|
||||||
|
)
|
||||||
|
data = resp.json()
|
||||||
|
assert "access_token" in data
|
||||||
|
return data["access_token"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def refresh_token(base_url: str) -> str:
|
||||||
|
"""Log in once per session and return the refresh token."""
|
||||||
|
resp = _login_with_retry(base_url)
|
||||||
|
assert resp.status_code == 200, (
|
||||||
|
f"Login failed: {resp.status_code} {resp.text[:1000]}"
|
||||||
|
)
|
||||||
|
return resp.json()["refresh_token"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def auth_headers(auth_token: str) -> dict[str, str]:
|
||||||
|
"""Default headers with a Bearer JWT for authenticated requests."""
|
||||||
|
return {"Authorization": f"Bearer {auth_token}", "Content-Type": "application/json"}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# 1. Authentication Flow Tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
@pytest.mark.timeout(30)
|
||||||
|
class TestAuthFlow:
|
||||||
|
"""Verify the JWT authentication flow against the live server."""
|
||||||
|
|
||||||
|
def test_login_success(self, base_url: str):
|
||||||
|
"""POST /auth/login with correct credentials returns a JWT pair."""
|
||||||
|
with httpx.Client(base_url=base_url, timeout=30) as client:
|
||||||
|
resp = client.post(
|
||||||
|
"/api/v1/auth/login",
|
||||||
|
json={"username": TEST_USERNAME, "password": TEST_PASSWORD},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert "access_token" in data
|
||||||
|
assert "refresh_token" in data
|
||||||
|
assert data["token_type"] == "bearer"
|
||||||
|
assert data["user"]["username"] == TEST_USERNAME
|
||||||
|
assert data["user"]["role"] == "admin"
|
||||||
|
|
||||||
|
def test_login_wrong_password(self, base_url: str):
|
||||||
|
"""POST /auth/login with wrong password returns 401."""
|
||||||
|
with httpx.Client(base_url=base_url, timeout=30) as client:
|
||||||
|
resp = client.post(
|
||||||
|
"/api/v1/auth/login",
|
||||||
|
json={"username": TEST_USERNAME, "password": "definitely-wrong"},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 401
|
||||||
|
|
||||||
|
def test_me_with_valid_token(self, base_url: str, auth_headers: dict[str, str]):
|
||||||
|
"""GET /auth/me with a valid JWT returns the user profile."""
|
||||||
|
with httpx.Client(base_url=base_url, timeout=30) as client:
|
||||||
|
resp = client.get("/api/v1/auth/me", headers=auth_headers)
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert data["username"] == TEST_USERNAME
|
||||||
|
assert data["email"] == TEST_EMAIL
|
||||||
|
assert data["role"] == "admin"
|
||||||
|
assert data["is_active"] is True
|
||||||
|
|
||||||
|
def test_me_without_token_returns_401(self, base_url: str):
|
||||||
|
"""GET /auth/me without a token returns 401."""
|
||||||
|
with httpx.Client(base_url=base_url, timeout=10) as client:
|
||||||
|
resp = client.get("/api/v1/auth/me")
|
||||||
|
assert resp.status_code == 401
|
||||||
|
|
||||||
|
def test_refresh_token(self, base_url: str, refresh_token: str):
|
||||||
|
"""POST /auth/refresh exchanges a refresh token for a new access token."""
|
||||||
|
with httpx.Client(base_url=base_url, timeout=30) as client:
|
||||||
|
resp = client.post(
|
||||||
|
"/api/v1/auth/refresh",
|
||||||
|
json={"refresh_token": refresh_token},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert "access_token" in data
|
||||||
|
assert data["user"]["username"] == TEST_USERNAME
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# 2. LLM Gateway Tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
@pytest.mark.timeout(120)
|
||||||
|
class TestLLMGateway:
|
||||||
|
"""Verify the LLM gateway proxy returns real LLM responses."""
|
||||||
|
|
||||||
|
def test_chat_non_streaming(self, base_url: str, auth_headers: dict[str, str]):
|
||||||
|
"""POST /llm/chat returns a non-empty real LLM response."""
|
||||||
|
with httpx.Client(base_url=base_url, timeout=90) as client:
|
||||||
|
resp = client.post(
|
||||||
|
"/api/v1/llm/chat",
|
||||||
|
headers=auth_headers,
|
||||||
|
json={
|
||||||
|
"messages": [{"role": "user", "content": "你好,请用一句话介绍自己"}],
|
||||||
|
"model": TEST_MODEL,
|
||||||
|
"temperature": 0.7,
|
||||||
|
"max_tokens": 200,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert "content" in data
|
||||||
|
content: str = data["content"]
|
||||||
|
assert len(content) > 0
|
||||||
|
# Real LLM response should contain Chinese characters.
|
||||||
|
assert any("\u4e00" <= ch <= "\u9fff" for ch in content)
|
||||||
|
assert "model" in data
|
||||||
|
assert "usage" in data
|
||||||
|
|
||||||
|
def test_chat_streaming_sse(self, base_url: str, auth_headers: dict[str, str]):
|
||||||
|
"""POST /llm/chat/stream returns SSE chunks with real content."""
|
||||||
|
chunks: list[dict[str, Any]] = []
|
||||||
|
with httpx.Client(base_url=base_url, timeout=90) as client:
|
||||||
|
with client.stream(
|
||||||
|
"POST",
|
||||||
|
"/api/v1/llm/chat/stream",
|
||||||
|
headers=auth_headers,
|
||||||
|
json={
|
||||||
|
"messages": [{"role": "user", "content": "用一句话说明什么是人工智能"}],
|
||||||
|
"model": TEST_MODEL,
|
||||||
|
"temperature": 0.7,
|
||||||
|
"max_tokens": 200,
|
||||||
|
},
|
||||||
|
) as resp:
|
||||||
|
assert resp.status_code == 200
|
||||||
|
for line in resp.iter_lines():
|
||||||
|
if not line.startswith("data: "):
|
||||||
|
continue
|
||||||
|
payload = line[6:]
|
||||||
|
if payload == "[DONE]":
|
||||||
|
break
|
||||||
|
chunks.append(json.loads(payload))
|
||||||
|
|
||||||
|
assert len(chunks) > 0
|
||||||
|
full_content = "".join(c.get("content", "") for c in chunks)
|
||||||
|
assert len(full_content) > 0
|
||||||
|
assert any("\u4e00" <= ch <= "\u9fff" for ch in full_content)
|
||||||
|
|
||||||
|
def test_chat_invalid_model_returns_error(self, base_url: str, auth_headers: dict[str, str]):
|
||||||
|
"""POST /llm/chat with an unknown model returns 404 or 502."""
|
||||||
|
with httpx.Client(base_url=base_url, timeout=30) as client:
|
||||||
|
resp = client.post(
|
||||||
|
"/api/v1/llm/chat",
|
||||||
|
headers=auth_headers,
|
||||||
|
json={
|
||||||
|
"messages": [{"role": "user", "content": "test"}],
|
||||||
|
"model": "nonexistent-model-xyz-12345",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert resp.status_code in (404, 502)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# 3. Chat REST API Tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="class")
|
||||||
|
def chat_session_id(base_url: str, auth_headers: dict[str, str]) -> str:
|
||||||
|
"""Create a chat session bound to the default agent (created in GUI mode)."""
|
||||||
|
with httpx.Client(base_url=base_url, timeout=30) as client:
|
||||||
|
resp = client.post(
|
||||||
|
"/api/v1/chat/sessions",
|
||||||
|
headers=auth_headers,
|
||||||
|
json={"agent_name": "default"},
|
||||||
|
)
|
||||||
|
assert resp.status_code in (200, 201), f"Failed to create chat session: {resp.text}"
|
||||||
|
return resp.json()["session_id"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
@pytest.mark.timeout(120)
|
||||||
|
class TestChatAPI:
|
||||||
|
"""Verify the chat REST API returns real LLM responses."""
|
||||||
|
|
||||||
|
def test_create_session(self, chat_session_id: str):
|
||||||
|
"""A chat session is created with a non-empty ID."""
|
||||||
|
assert chat_session_id
|
||||||
|
assert len(chat_session_id) > 0
|
||||||
|
|
||||||
|
def test_send_message_and_get_real_response(
|
||||||
|
self, base_url: str, auth_headers: dict[str, str], chat_session_id: str
|
||||||
|
):
|
||||||
|
"""POST /chat/sessions/{id}/messages returns a real LLM reply."""
|
||||||
|
with httpx.Client(base_url=base_url, timeout=90) as client:
|
||||||
|
resp = client.post(
|
||||||
|
f"/api/v1/chat/sessions/{chat_session_id}/messages",
|
||||||
|
headers=auth_headers,
|
||||||
|
json={"content": "你好,请用一句话介绍自己"},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert data["role"] == "assistant"
|
||||||
|
content: str = data["content"]
|
||||||
|
assert len(content) > 0
|
||||||
|
# Must not be a mock response.
|
||||||
|
assert "mock" not in content.lower()
|
||||||
|
# Real LLM response should contain Chinese characters.
|
||||||
|
assert any("\u4e00" <= ch <= "\u9fff" for ch in content)
|
||||||
|
|
||||||
|
def test_message_history_after_conversation(
|
||||||
|
self, base_url: str, auth_headers: dict[str, str], chat_session_id: str
|
||||||
|
):
|
||||||
|
"""GET /chat/sessions/{id}/messages returns user + assistant messages."""
|
||||||
|
with httpx.Client(base_url=base_url, timeout=30) as client:
|
||||||
|
resp = client.get(
|
||||||
|
f"/api/v1/chat/sessions/{chat_session_id}/messages",
|
||||||
|
headers=auth_headers,
|
||||||
|
)
|
||||||
|
assert resp.status_code == 200
|
||||||
|
messages = resp.json()
|
||||||
|
assert isinstance(messages, list)
|
||||||
|
assert len(messages) >= 2 # at least one user + one assistant
|
||||||
|
roles = [m["role"] for m in messages]
|
||||||
|
assert "user" in roles
|
||||||
|
assert "assistant" in roles
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# 4. WebSocket Chat Tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
@pytest.mark.timeout(120)
|
||||||
|
class TestWebSocketChat:
|
||||||
|
"""Verify the WebSocket chat protocol with real LLM streaming."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_websocket_full_chat_flow(self, base_url: str, auth_token: str):
|
||||||
|
"""Connect → send message → receive final_answer with real LLM content."""
|
||||||
|
try:
|
||||||
|
import websockets
|
||||||
|
except ImportError:
|
||||||
|
pytest.skip("websockets package not installed")
|
||||||
|
|
||||||
|
# Create a chat session via REST.
|
||||||
|
with httpx.Client(base_url=base_url, timeout=30) as client:
|
||||||
|
resp = client.post(
|
||||||
|
"/api/v1/chat/sessions",
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {auth_token}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
json={"agent_name": "default"},
|
||||||
|
)
|
||||||
|
assert resp.status_code in (200, 201)
|
||||||
|
session_id = resp.json()["session_id"]
|
||||||
|
|
||||||
|
# Connect to the WebSocket (JWT passed via ?token= query param).
|
||||||
|
ws_url = f"{REAL_LLM_WS_URL}/api/v1/chat/ws/{session_id}?token={auth_token}"
|
||||||
|
received: list[dict[str, Any]] = []
|
||||||
|
|
||||||
|
async with websockets.connect(ws_url) as ws: # type: ignore[name-defined]
|
||||||
|
# 1. Expect a connected event.
|
||||||
|
raw = await asyncio.wait_for(ws.recv(), timeout=10)
|
||||||
|
data = json.loads(raw)
|
||||||
|
received.append(data)
|
||||||
|
assert data["type"] == "connected"
|
||||||
|
|
||||||
|
# 2. Send a user message.
|
||||||
|
await ws.send(json.dumps({"type": "message", "content": "你好,请用一句话介绍自己"}))
|
||||||
|
|
||||||
|
# 3. Collect events until final_answer / error / timeout.
|
||||||
|
deadline = time.monotonic() + 90
|
||||||
|
while time.monotonic() < deadline:
|
||||||
|
try:
|
||||||
|
raw = await asyncio.wait_for(ws.recv(), timeout=90)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
received.append({"type": "timeout"})
|
||||||
|
break
|
||||||
|
msg = json.loads(raw)
|
||||||
|
received.append(msg)
|
||||||
|
if msg.get("type") in ("final_answer", "error"):
|
||||||
|
break
|
||||||
|
|
||||||
|
# 4. Assert we got a final_answer (not an error).
|
||||||
|
types = [m.get("type") for m in received]
|
||||||
|
assert "connected" in types
|
||||||
|
final_msgs = [m for m in received if m.get("type") == "final_answer"]
|
||||||
|
assert final_msgs, f"Expected final_answer, got event types: {types}"
|
||||||
|
|
||||||
|
final_content: str = final_msgs[0].get("content", "")
|
||||||
|
assert len(final_content) > 0
|
||||||
|
# Must not be a mock response.
|
||||||
|
assert "mock" not in final_content.lower()
|
||||||
|
# Real LLM response should contain Chinese characters.
|
||||||
|
assert any("\u4e00" <= ch <= "\u9fff" for ch in final_content)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_websocket_ping_pong(self, base_url: str, auth_token: str):
|
||||||
|
"""WebSocket ping/pong heartbeat works alongside the chat session."""
|
||||||
|
try:
|
||||||
|
import websockets
|
||||||
|
except ImportError:
|
||||||
|
pytest.skip("websockets package not installed")
|
||||||
|
|
||||||
|
with httpx.Client(base_url=base_url, timeout=30) as client:
|
||||||
|
resp = client.post(
|
||||||
|
"/api/v1/chat/sessions",
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {auth_token}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
json={"agent_name": "default"},
|
||||||
|
)
|
||||||
|
assert resp.status_code in (200, 201)
|
||||||
|
session_id = resp.json()["session_id"]
|
||||||
|
|
||||||
|
ws_url = f"{REAL_LLM_WS_URL}/api/v1/chat/ws/{session_id}?token={auth_token}"
|
||||||
|
async with websockets.connect(ws_url) as ws: # type: ignore[name-defined]
|
||||||
|
# Wait for connected.
|
||||||
|
await asyncio.wait_for(ws.recv(), timeout=10)
|
||||||
|
|
||||||
|
# Send ping → expect pong.
|
||||||
|
await ws.send(json.dumps({"type": "ping"}))
|
||||||
|
raw = await asyncio.wait_for(ws.recv(), timeout=10)
|
||||||
|
msg = json.loads(raw)
|
||||||
|
assert msg["type"] == "pong"
|
||||||
Loading…
Reference in New Issue