Compare commits
3 Commits
060df5ba13
...
2d94fab4dd
| Author | SHA1 | Date |
|---|---|---|
|
|
2d94fab4dd | |
|
|
02cf7a94ac | |
|
|
a8927a18e6 |
|
|
@ -0,0 +1,19 @@
|
|||
# 数据库
|
||||
DATABASE_URL=postgresql+asyncpg://postgres:postgres123@db:5432/geo_platform
|
||||
|
||||
# Redis
|
||||
REDIS_URL=redis://redis:6379/0
|
||||
|
||||
# JWT
|
||||
JWT_SECRET=your-secret-key-change-in-production
|
||||
JWT_EXPIRE_HOURS=24
|
||||
|
||||
# 前端
|
||||
NEXT_PUBLIC_API_URL=http://localhost:8000
|
||||
|
||||
# Playwright
|
||||
PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
|
||||
|
||||
# 国内大模型API(可选)
|
||||
ZHIPU_API_KEY=
|
||||
TONGYI_API_KEY=
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
# Node
|
||||
node_modules/
|
||||
frontend/node_modules/
|
||||
frontend/.next/
|
||||
frontend/out/
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
backend/venv/
|
||||
backend/.venv/
|
||||
*.egg-info/
|
||||
|
||||
# Environment
|
||||
.env
|
||||
frontend/.env.local
|
||||
|
||||
# IDE
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Database
|
||||
*.db
|
||||
*.sqlite3
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
|
||||
# Docker
|
||||
postgres_data/
|
||||
redis_data/
|
||||
|
||||
# Cache
|
||||
.npm-cache/
|
||||
.pytest_cache/
|
||||
tsconfig.tsbuildinfo
|
||||
|
|
@ -0,0 +1,530 @@
|
|||
# AI平台集成
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/workers/platforms/base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [backend/app/workers/platforms/kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [backend/app/workers/platforms/wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
- [backend/app/workers/platforms/tongyi.py](file://backend/app/workers/platforms/tongyi.py)
|
||||
- [backend/app/workers/platforms/doubao.py](file://backend/app/workers/platforms/doubao.py)
|
||||
- [backend/app/workers/platforms/qingyan.py](file://backend/app/workers/platforms/qingyan.py)
|
||||
- [backend/app/workers/platforms/tiangong.py](file://backend/app/workers/platforms/tiangong.py)
|
||||
- [backend/app/workers/platforms/xinghuo.py](file://backend/app/workers/platforms/xinghuo.py)
|
||||
- [backend/app/workers/platforms/search_engine.py](file://backend/app/workers/platforms/search_engine.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [backend/app/models/query_task.py](file://backend/app/models/query_task.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [backend/app/services/citation.py](file://backend/app/services/citation.py)
|
||||
- [frontend/lib/platforms.ts](file://frontend/lib/platforms.ts)
|
||||
- [tests/test_citation_engine.py](file://tests/test_citation_engine.py)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
</cite>
|
||||
|
||||
## 更新摘要
|
||||
**变更内容**
|
||||
- 新增5个基于搜索引擎的AI平台适配器(通义千问、豆包、智谱清言、天工AI、讯飞星火)
|
||||
- 替代原有的Playwright浏览器自动化适配器架构
|
||||
- 新增智能搜索引擎模块,提供DuckDuckGo和Wikipedia双回退机制
|
||||
- 更新前端平台映射以支持新平台
|
||||
- 保持相同的引用检测引擎和调度器架构
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本项目是一个AI平台集成系统,支持通过统一适配器接口对接多个大模型平台(如通义千问、豆包、智谱清言、天工AI、讯飞星火)。系统包含以下能力:
|
||||
- 适配器架构:以抽象基类为统一接口,扩展新的AI平台只需实现query方法。
|
||||
- 搜索引擎集成:基于DuckDuckGo和Wikipedia的智能搜索引擎,提供稳定的回退机制。
|
||||
- 引用检测引擎:对平台返回内容进行品牌匹配、竞争品牌识别与置信度评分,生成引用记录。
|
||||
- 定时调度:基于APScheduler的异步调度器,周期性检查并执行到期查询任务。
|
||||
- API与服务层:提供查询与统计接口,支持立即执行、导出CSV等功能。
|
||||
|
||||
## 项目结构
|
||||
后端采用分层架构:
|
||||
- workers:工作流与平台适配器、引用检测引擎、调度器
|
||||
- models:数据库ORM模型(查询、引用记录、任务)
|
||||
- services:业务服务(查询统计、触发执行、导出)
|
||||
- api:FastAPI路由与对外接口
|
||||
- config:应用配置(环境变量读取)
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "前端"
|
||||
FE["前端应用<br/>platforms.ts"]
|
||||
end
|
||||
subgraph "后端"
|
||||
API["API 层<br/>citations.py"]
|
||||
SVC["服务层<br/>services/citation.py"]
|
||||
SCH["调度器<br/>workers/scheduler.py"]
|
||||
CE["引用检测引擎<br/>workers/citation_engine.py"]
|
||||
AD_T["适配器: 通义千问<br/>workers/platforms/tongyi.py"]
|
||||
AD_D["适配器: 豆包<br/>workers/platforms/doubao.py"]
|
||||
AD_Q["适配器: 智谱清言<br/>workers/platforms/qingyan.py"]
|
||||
AD_G["适配器: 天工AI<br/>workers/platforms/tiangong.py"]
|
||||
AD_X["适配器: 讯飞星火<br/>workers/platforms/xinghuo.py"]
|
||||
SE["搜索引擎<br/>workers/platforms/search_engine.py"]
|
||||
CFG["配置<br/>app/config.py"]
|
||||
DB_Q["模型: Query<br/>models/query.py"]
|
||||
DB_CR["模型: CitationRecord<br/>models/citation_record.py"]
|
||||
DB_QT["模型: QueryTask<br/>models/query_task.py"]
|
||||
end
|
||||
FE --> API
|
||||
API --> SVC
|
||||
SVC --> SCH
|
||||
SCH --> CE
|
||||
CE --> AD_T
|
||||
CE --> AD_D
|
||||
CE --> AD_Q
|
||||
CE --> AD_G
|
||||
CE --> AD_X
|
||||
AD_T --> SE
|
||||
AD_D --> SE
|
||||
AD_Q --> SE
|
||||
AD_G --> SE
|
||||
AD_X --> SE
|
||||
CE --> DB_CR
|
||||
CE --> DB_QT
|
||||
CE --> DB_Q
|
||||
SE --> CFG
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/services/citation.py:1-269](file://backend/app/services/citation.py#L1-L269)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/workers/citation_engine.py:1-330](file://backend/app/workers/citation_engine.py#L1-L330)
|
||||
- [backend/app/workers/platforms/tongyi.py:1-38](file://backend/app/workers/platforms/tongyi.py#L1-L38)
|
||||
- [backend/app/workers/platforms/doubao.py:1-38](file://backend/app/workers/platforms/doubao.py#L1-L38)
|
||||
- [backend/app/workers/platforms/qingyan.py:1-38](file://backend/app/workers/platforms/qingyan.py#L1-L38)
|
||||
- [backend/app/workers/platforms/tiangong.py:1-38](file://backend/app/workers/platforms/tiangong.py#L1-L38)
|
||||
- [backend/app/workers/platforms/xinghuo.py:1-38](file://backend/app/workers/platforms/xinghuo.py#L1-L38)
|
||||
- [backend/app/workers/platforms/search_engine.py:1-174](file://backend/app/workers/platforms/search_engine.py#L1-L174)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/services/citation.py:1-269](file://backend/app/services/citation.py#L1-L269)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/workers/citation_engine.py:1-330](file://backend/app/workers/citation_engine.py#L1-L330)
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/tongyi.py:1-38](file://backend/app/workers/platforms/tongyi.py#L1-L38)
|
||||
- [backend/app/workers/platforms/doubao.py:1-38](file://backend/app/workers/platforms/doubao.py#L1-L38)
|
||||
- [backend/app/workers/platforms/qingyan.py:1-38](file://backend/app/workers/platforms/qingyan.py#L1-L38)
|
||||
- [backend/app/workers/platforms/tiangong.py:1-38](file://backend/app/workers/platforms/tiangong.py#L1-L38)
|
||||
- [backend/app/workers/platforms/xinghuo.py:1-38](file://backend/app/workers/platforms/xinghuo.py#L1-L38)
|
||||
- [backend/app/workers/platforms/search_engine.py:1-174](file://backend/app/workers/platforms/search_engine.py#L1-L174)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
- [frontend/lib/platforms.ts:1-24](file://frontend/lib/platforms.ts#L1-L24)
|
||||
|
||||
## 核心组件
|
||||
- 抽象适配器基类:定义统一的平台名称、URL以及查询接口,便于扩展新平台。
|
||||
- 搜索引擎适配器(通义千问/豆包/智谱清言/天工AI/讯飞星火):基于DuckDuckGo和Wikipedia的智能搜索引擎,提供稳定的回退机制。
|
||||
- 引用检测引擎:品牌匹配(精确/别名/模糊)、竞争品牌识别、置信度评分与记录生成。
|
||||
- 调度器:定时扫描到期查询,调用引擎执行并更新任务状态。
|
||||
- API与服务:提供查询列表、统计、立即执行、导出CSV等接口。
|
||||
- 数据模型:Query、CitationRecord、QueryTask支撑查询生命周期与结果存储。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/tongyi.py:1-38](file://backend/app/workers/platforms/tongyi.py#L1-L38)
|
||||
- [backend/app/workers/platforms/doubao.py:1-38](file://backend/app/workers/platforms/doubao.py#L1-L38)
|
||||
- [backend/app/workers/platforms/qingyan.py:1-38](file://backend/app/workers/platforms/qingyan.py#L1-L38)
|
||||
- [backend/app/workers/platforms/tiangong.py:1-38](file://backend/app/workers/platforms/tiangong.py#L1-L38)
|
||||
- [backend/app/workers/platforms/xinghuo.py:1-38](file://backend/app/workers/platforms/xinghuo.py#L1-L38)
|
||||
- [backend/app/workers/platforms/search_engine.py:1-174](file://backend/app/workers/platforms/search_engine.py#L1-L174)
|
||||
- [backend/app/workers/citation_engine.py:1-330](file://backend/app/workers/citation_engine.py#L1-L330)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/services/citation.py:1-269](file://backend/app/services/citation.py#L1-L269)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
|
||||
## 架构总览
|
||||
系统通过"适配器 + 引擎 + 调度器"的解耦方式实现多平台接入与统一处理流程。前端通过API触发查询与查看统计;调度器按频率驱动查询;引擎负责跨平台数据采集与分析;服务层提供数据访问与导出能力。所有平台现在都通过搜索引擎获取内容,提供更稳定的回退机制。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant FE as "前端"
|
||||
participant API as "API 层"
|
||||
participant SVC as "服务层"
|
||||
participant SCH as "调度器"
|
||||
participant CE as "引用检测引擎"
|
||||
participant AD as "平台适配器"
|
||||
participant SE as "搜索引擎"
|
||||
participant DB as "数据库"
|
||||
FE->>API : 触发立即查询
|
||||
API->>SVC : trigger_query_now()
|
||||
SVC->>DB : 写入 QueryTask(状态 pending)
|
||||
Note over SCH,DB : 定时任务扫描 next_query_at<=now 的查询
|
||||
SCH->>CE : execute_query(query)
|
||||
CE->>AD : query(keyword)
|
||||
AD->>SE : fetch_search_content(platform_name, keyword)
|
||||
SE-->>AD : 搜索结果文本
|
||||
AD-->>CE : 原始响应文本
|
||||
CE->>CE : 品牌匹配/竞争品牌识别
|
||||
CE->>DB : 写入 CitationRecord
|
||||
CE->>DB : 更新 Query.next_query_at
|
||||
API-->>FE : 返回任务状态/查询结果
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/citations.py:59-78](file://backend/app/api/citations.py#L59-L78)
|
||||
- [backend/app/services/citation.py:204-234](file://backend/app/services/citation.py#L204-L234)
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/workers/citation_engine.py:177-254](file://backend/app/workers/citation_engine.py#L177-L254)
|
||||
- [backend/app/workers/platforms/tongyi.py:16-33](file://backend/app/workers/platforms/tongyi.py#L16-L33)
|
||||
- [backend/app/workers/platforms/search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)
|
||||
- [backend/app/models/query.py:29-31](file://backend/app/models/query.py#L29-L31)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 适配器架构与扩展机制
|
||||
- 抽象基类定义
|
||||
- 平台标识:platform_name、platform_url
|
||||
- 统一接口:query(keyword)返回原始响应文本;可选close()释放资源
|
||||
- 扩展步骤
|
||||
- 继承BasePlatformAdapter
|
||||
- 实现query与可选close
|
||||
- 在CitationEngine的platforms映射中注册实例
|
||||
- 设计优势
|
||||
- 解耦平台差异,统一调用入口
|
||||
- 易于新增平台与替换实现
|
||||
- 基于搜索引擎的适配器无需复杂的浏览器自动化
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class BasePlatformAdapter {
|
||||
+string platform_name
|
||||
+string platform_url
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class TongyiAdapter {
|
||||
+platform_name = "tongyi"
|
||||
+platform_url = "https : //tongyi.aliyun.com/qianwen"
|
||||
+query(keyword) str
|
||||
+_do_query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class DoubaoAdapter {
|
||||
+platform_name = "doubao"
|
||||
+platform_url = "https : //www.doubao.com/"
|
||||
+query(keyword) str
|
||||
+_do_query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class QingyanAdapter {
|
||||
+platform_name = "qingyan"
|
||||
+platform_url = "https : //chatglm.cn/"
|
||||
+query(keyword) str
|
||||
+_do_query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class TiangongAdapter {
|
||||
+platform_name = "tiangong"
|
||||
+platform_url = "https : //www.tiangong.cn/"
|
||||
+query(keyword) str
|
||||
+_do_query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class XinghuoAdapter {
|
||||
+platform_name = "xinghuo"
|
||||
+platform_url = "https : //xinghuo.xfyun.cn/"
|
||||
+query(keyword) str
|
||||
+_do_query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
BasePlatformAdapter <|-- TongyiAdapter
|
||||
BasePlatformAdapter <|-- DoubaoAdapter
|
||||
BasePlatformAdapter <|-- QingyanAdapter
|
||||
BasePlatformAdapter <|-- TiangongAdapter
|
||||
BasePlatformAdapter <|-- XinghuoAdapter
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/platforms/base.py:4-17](file://backend/app/workers/platforms/base.py#L4-L17)
|
||||
- [backend/app/workers/platforms/tongyi.py:10-38](file://backend/app/workers/platforms/tongyi.py#L10-L38)
|
||||
- [backend/app/workers/platforms/doubao.py:10-38](file://backend/app/workers/platforms/doubao.py#L10-L38)
|
||||
- [backend/app/workers/platforms/qingyan.py:10-38](file://backend/app/workers/platforms/qingyan.py#L10-L38)
|
||||
- [backend/app/workers/platforms/tiangong.py:10-38](file://backend/app/workers/platforms/tiangong.py#L10-L38)
|
||||
- [backend/app/workers/platforms/xinghuo.py:10-38](file://backend/app/workers/platforms/xinghuo.py#L10-L38)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/tongyi.py:1-38](file://backend/app/workers/platforms/tongyi.py#L1-L38)
|
||||
- [backend/app/workers/platforms/doubao.py:1-38](file://backend/app/workers/platforms/doubao.py#L1-L38)
|
||||
- [backend/app/workers/platforms/qingyan.py:1-38](file://backend/app/workers/platforms/qingyan.py#L1-L38)
|
||||
- [backend/app/workers/platforms/tiangong.py:1-38](file://backend/app/workers/platforms/tiangong.py#L1-L38)
|
||||
- [backend/app/workers/platforms/xinghuo.py:1-38](file://backend/app/workers/platforms/xinghuo.py#L1-L38)
|
||||
|
||||
### 搜索引擎适配器实现
|
||||
- 搜索引擎模式
|
||||
- 所有平台适配器现在都基于fetch_search_content函数
|
||||
- 通过DuckDuckGo搜索关键词,自动回退到Wikipedia
|
||||
- 提供指数退避重试机制(最多3次尝试)
|
||||
- 搜索策略
|
||||
- 组合关键词与目标品牌,确保搜索结果包含品牌信息
|
||||
- 优先使用DuckDuckGo HTML搜索,自动检测结果有效性
|
||||
- 当DuckDuckGo受限时自动回退到Wikipedia API
|
||||
- 错误处理
|
||||
- 每次尝试失败都会记录警告日志
|
||||
- 最终失败时抛出异常,便于上层处理
|
||||
- 适配器close方法为空实现,因为无需浏览器资源管理
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["开始查询"]) --> Combine["组合关键词: keyword + target_brand"]
|
||||
Combine --> TryDDG["尝试DuckDuckGo搜索"]
|
||||
TryDDG --> Valid{"结果有效?"}
|
||||
Valid --> |是| Parse["解析搜索结果"]
|
||||
Valid --> |否| Wiki["回退到Wikipedia搜索"]
|
||||
Parse --> Return["返回搜索文本"]
|
||||
Wiki --> Parse
|
||||
Return --> End(["结束"])
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/platforms/tongyi.py:16-33](file://backend/app/workers/platforms/tongyi.py#L16-L33)
|
||||
- [backend/app/workers/platforms/search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)
|
||||
- [backend/app/workers/platforms/search_engine.py:79-144](file://backend/app/workers/platforms/search_engine.py#L79-L144)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/tongyi.py:1-38](file://backend/app/workers/platforms/tongyi.py#L1-L38)
|
||||
- [backend/app/workers/platforms/doubao.py:1-38](file://backend/app/workers/platforms/doubao.py#L1-L38)
|
||||
- [backend/app/workers/platforms/qingyan.py:1-38](file://backend/app/workers/platforms/qingyan.py#L1-L38)
|
||||
- [backend/app/workers/platforms/tiangong.py:1-38](file://backend/app/workers/platforms/tiangong.py#L1-L38)
|
||||
- [backend/app/workers/platforms/xinghuo.py:1-38](file://backend/app/workers/platforms/xinghuo.py#L1-L38)
|
||||
- [backend/app/workers/platforms/search_engine.py:1-174](file://backend/app/workers/platforms/search_engine.py#L1-L174)
|
||||
|
||||
### 智能搜索引擎模块
|
||||
- DuckDuckGo搜索实现
|
||||
- 使用HTML版本搜索,无需API密钥
|
||||
- 支持多种结果块格式的解析
|
||||
- 自动检测非结果页面并回退
|
||||
- Wikipedia回退机制
|
||||
- 通过Wikipedia API获取词条摘要
|
||||
- 自动清理HTML标记和引用格式
|
||||
- 提供稳定可靠的备用搜索源
|
||||
- 搜索内容提取
|
||||
- 统一的HTML清理和文本提取
|
||||
- 支持标题和摘要的组合输出
|
||||
- 限制最大字符数防止内容过长
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/search_engine.py:1-174](file://backend/app/workers/platforms/search_engine.py#L1-L174)
|
||||
|
||||
### 引用检测引擎工作原理
|
||||
- 品牌匹配策略
|
||||
- 精确匹配:命中目标品牌,置信度1.0
|
||||
- 别名匹配:命中别名,置信度0.9
|
||||
- 模糊匹配:基于序列相似度阈值,返回最高相似度及置信度
|
||||
- 结果包含是否引用、置信度、匹配类型、段落位置、上下文片段
|
||||
- 竞争品牌识别
|
||||
- 预定义行业品牌集合,从文本中识别除目标品牌外的竞争品牌
|
||||
- 置信度评分机制
|
||||
- 精确命中1.0,别名0.9,模糊按相似度取值并四舍五入
|
||||
- 引擎执行流程
|
||||
- 构建BrandMatcher,遍历查询配置的平台
|
||||
- 调用适配器获取原始响应,执行匹配与竞争品牌检测
|
||||
- 生成CitationRecord并写入数据库,更新Query.next_query_at
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Q["输入: 关键词/目标品牌/别名"] --> BuildMatcher["构建BrandMatcher"]
|
||||
BuildMatcher --> ForEachPlat{"遍历平台"}
|
||||
ForEachPlat --> QueryRaw["调用适配器.query()"]
|
||||
QueryRaw --> Match["品牌匹配(match)"]
|
||||
Match --> Competitor["竞争品牌检测(detect)"]
|
||||
Competitor --> Record["生成CitationRecord"]
|
||||
Record --> Persist["写入数据库"]
|
||||
Persist --> NextTime["更新Query.next_query_at"]
|
||||
NextTime --> ForEachPlat
|
||||
ForEachPlat --> Done["完成"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/citation_engine.py:177-254](file://backend/app/workers/citation_engine.py#L177-L254)
|
||||
- [backend/app/workers/citation_engine.py:256-287](file://backend/app/workers/citation_engine.py#L256-L287)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/models/query.py:29-31](file://backend/app/models/query.py#L29-L31)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/citation_engine.py:1-330](file://backend/app/workers/citation_engine.py#L1-L330)
|
||||
- [tests/test_citation_engine.py:1-54](file://tests/test_citation_engine.py#L1-L54)
|
||||
|
||||
### 定时调度与任务管理
|
||||
- 调度器
|
||||
- 使用APScheduler的AsyncIOScheduler,每小时检查一次
|
||||
- 查找status='active'且next_query_at<=now的查询
|
||||
- 逐个调用CitationEngine.execute_query并更新QueryTask状态
|
||||
- 任务模型
|
||||
- QueryTask记录平台、状态、错误信息与时间戳
|
||||
- 支持pending/running/success/failed状态流转
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant S as "调度器"
|
||||
participant DB as "数据库"
|
||||
participant CE as "引用检测引擎"
|
||||
participant QT as "QueryTask"
|
||||
S->>DB : 查询 active 且到期的 Query
|
||||
DB-->>S : 返回待执行查询列表
|
||||
loop 遍历查询
|
||||
S->>QT : 创建/更新 QueryTask
|
||||
S->>CE : execute_query(query)
|
||||
CE-->>S : 返回 CitationRecord 列表
|
||||
S->>DB : 更新 QueryTask 状态/时间
|
||||
end
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
|
||||
### API与服务层
|
||||
- API路由
|
||||
- 列表查询、统计、立即执行查询
|
||||
- 立即执行返回任务状态与消息
|
||||
- 服务层
|
||||
- 权限校验:仅允许用户访问自己的查询
|
||||
- 统计聚合:总查询数、引用数、引用率、按平台分布、趋势
|
||||
- 导出CSV:将引用记录导出为CSV字符串
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/services/citation.py:1-269](file://backend/app/services/citation.py#L1-L269)
|
||||
|
||||
## 依赖分析
|
||||
- 外部依赖
|
||||
- FastAPI、SQLAlchemy、APScheduler、httpx、Pydantic Settings等
|
||||
- 内部模块耦合
|
||||
- CitationEngine依赖适配器与数据库模型
|
||||
- 所有适配器依赖search_engine模块
|
||||
- Scheduler依赖CitationEngine与Query模型
|
||||
- API与Service层依赖数据库与权限控制
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
REQ["requirements.txt"] --> FA["FastAPI"]
|
||||
REQ --> SA["SQLAlchemy"]
|
||||
REQ --> AP["APScheduler"]
|
||||
REQ --> HTTPX["httpx"]
|
||||
REQ --> PYD["Pydantic Settings"]
|
||||
CE["CitationEngine"] --> AD1["TongyiAdapter"]
|
||||
CE --> AD2["DoubaoAdapter"]
|
||||
CE --> AD3["QingyanAdapter"]
|
||||
CE --> AD4["TiangongAdapter"]
|
||||
CE --> AD5["XinghuoAdapter"]
|
||||
CE --> DB1["CitationRecord"]
|
||||
CE --> DB2["QueryTask"]
|
||||
CE --> DB3["Query"]
|
||||
AD1 --> SE["SearchEngine"]
|
||||
AD2 --> SE
|
||||
AD3 --> SE
|
||||
AD4 --> SE
|
||||
AD5 --> SE
|
||||
SCH["Scheduler"] --> CE
|
||||
API["API"] --> SVC["Service"]
|
||||
SVC --> DB1
|
||||
SVC --> DB3
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/requirements.txt:1-36](file://backend/requirements.txt#L1-L36)
|
||||
- [backend/app/workers/citation_engine.py:1-330](file://backend/app/workers/citation_engine.py#L1-L330)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/services/citation.py:1-269](file://backend/app/services/citation.py#L1-L269)
|
||||
|
||||
**章节来源**
|
||||
- [backend/requirements.txt:1-36](file://backend/requirements.txt#L1-L36)
|
||||
|
||||
## 性能考虑
|
||||
- 搜索引擎优化
|
||||
- DuckDuckGo搜索无需API密钥,成本低且稳定
|
||||
- Wikipedia回退机制确保搜索成功率
|
||||
- 指数退避减少对搜索引擎的压力
|
||||
- 资源管理
|
||||
- 适配器无需浏览器资源,内存占用更低
|
||||
- 搜索引擎调用使用异步HTTP客户端
|
||||
- 响应稳定性
|
||||
- 搜索结果比网页自动化更稳定
|
||||
- 双回退机制提高成功率
|
||||
- 数据库性能
|
||||
- Query与CitationRecord的关键字段建立索引,优化查询性能
|
||||
- 异步调度
|
||||
- 使用AsyncIOScheduler与异步数据库连接,提升并发效率
|
||||
|
||||
## 故障排查指南
|
||||
- 搜索引擎访问失败
|
||||
- 现象:DuckDuckGo搜索失败或被限制
|
||||
- 处理:自动回退到Wikipedia,检查网络连接
|
||||
- Wikipedia API调用失败
|
||||
- 现象:Wikipedia搜索返回空结果
|
||||
- 处理:检查关键词有效性,确认Wikipedia服务可用
|
||||
- 搜索结果为空
|
||||
- 现象:适配器返回空字符串
|
||||
- 处理:尝试更具体的关键词,检查搜索引擎状态
|
||||
- 查询任务失败
|
||||
- 现象:QueryTask状态为failed并记录错误信息
|
||||
- 处理:查看错误日志,确认搜索引擎可用性与网络状况
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/search_engine.py:139-144](file://backend/app/workers/platforms/search_engine.py#L139-L144)
|
||||
- [backend/app/workers/platforms/tongyi.py:22-29](file://backend/app/workers/platforms/tongyi.py#L22-L29)
|
||||
- [backend/app/workers/citation_engine.py:231-247](file://backend/app/workers/citation_engine.py#L231-L247)
|
||||
|
||||
## 结论
|
||||
该系统通过适配器模式实现了对多平台的统一接入,现在采用基于搜索引擎的稳定架构,结合智能回退机制与引用检测引擎,提供了从查询、匹配到统计与导出的完整能力。搜索引擎模式相比浏览器自动化具有更高的稳定性、更低的成本和更好的可扩展性。调度器保障了周期性任务的可靠执行,API与服务层为前端与运维提供了清晰的接口。未来可在搜索引擎优化、稳定性与性能优化方面持续演进。
|
||||
|
||||
## 附录
|
||||
|
||||
### 新AI平台接入扩展指南与最佳实践
|
||||
- 扩展步骤
|
||||
- 新建适配器类继承BasePlatformAdapter,实现query与可选close
|
||||
- 在CitationEngine的platforms映射中注册新适配器实例
|
||||
- 在前端platforms.ts中添加平台映射与展示项
|
||||
- 最佳实践
|
||||
- 明确定义platform_name与platform_url
|
||||
- 统一异常处理与日志记录
|
||||
- 使用指数退避与搜索引擎回退提升鲁棒性
|
||||
- 合理设置超时与重试次数
|
||||
- 在close中确保资源释放(如需)
|
||||
- 为新平台编写单元测试覆盖关键场景
|
||||
- 搜索引擎适配器开发要点
|
||||
- 直接复用fetch_search_content函数
|
||||
- 不需要复杂的浏览器自动化逻辑
|
||||
- 注重错误处理和日志记录
|
||||
- 考虑关键词组合策略以提高搜索准确性
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/base.py:4-17](file://backend/app/workers/platforms/base.py#L4-L17)
|
||||
- [backend/app/workers/citation_engine.py:164-175](file://backend/app/workers/citation_engine.py#L164-L175)
|
||||
- [frontend/lib/platforms.ts:1-24](file://frontend/lib/platforms.ts#L1-L24)
|
||||
- [backend/app/workers/platforms/search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)
|
||||
|
|
@ -0,0 +1,495 @@
|
|||
# Kimi平台集成
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/workers/platforms/kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [backend/app/workers/platforms/base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [backend/app/workers/platforms/search_engine.py](file://backend/app/workers/platforms/search_engine.py)
|
||||
- [backend/app/workers/platforms/__init__.py](file://backend/app/workers/platforms/__init__.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/models/query.py](file://backend/app/workers/models/query.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [backend/app/models/query_task.py](file://backend/app/models/query_task.py)
|
||||
- [backend/app/services/query.py](file://backend/app/services/query.py)
|
||||
- [backend/app/services/citation.py](file://backend/app/services/citation.py)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [frontend/lib/platforms.ts](file://frontend/lib/platforms.ts)
|
||||
</cite>
|
||||
|
||||
## 更新摘要
|
||||
**变更内容**
|
||||
- Kimi平台适配器已简化为搜索引擎模式,移除了Playwright浏览器自动化实现
|
||||
- 所有平台适配器采用统一的fetch_search_content机制
|
||||
- 引入了通用搜索引擎模块,支持DuckDuckGo和Wikipedia回退机制
|
||||
- 更新了架构图和组件分析以反映新的实现方式
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖关系分析](#依赖关系分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件面向需要集成Kimi平台的开发者与运维人员,系统性阐述Kimi适配器的简化实现方式与运行机制。Kimi适配器现已采用搜索引擎查询机制,通过DuckDuckGo和Wikipedia获取与关键词相关的真实内容,替代了原有的Playwright浏览器自动化方案。文档涵盖搜索引擎集成、错误重试与超时处理、资源管理与API调用示例。
|
||||
|
||||
## 项目结构
|
||||
后端采用FastAPI + SQLAlchemy + APScheduler的异步架构,前端通过Next.js提供可视化界面。Kimi适配器位于工作线程模块,被引用检测引擎统一编排,定时调度器周期性触发查询任务。所有平台适配器现在共享相同的搜索引擎基础设施。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "前端"
|
||||
FE_Platforms["前端平台映射<br/>frontend/lib/platforms.ts"]
|
||||
end
|
||||
subgraph "后端"
|
||||
API_Queries["查询API<br/>backend/app/api/queries.py"]
|
||||
API_Citations["引用API<br/>backend/app/api/citations.py"]
|
||||
Service_Query["查询服务<br/>backend/app/services/query.py"]
|
||||
Service_Citation["引用服务<br/>backend/app/services/citation.py"]
|
||||
Scheduler["定时调度器<br/>backend/app/workers/scheduler.py"]
|
||||
Engine["引用检测引擎<br/>backend/app/workers/citation_engine.py"]
|
||||
Adapter_Kimi["Kimi适配器<br/>backend/app/workers/platforms/kimi.py"]
|
||||
SearchEngine["搜索引擎模块<br/>backend/app/workers/platforms/search_engine.py"]
|
||||
BaseAdapter["基础适配器<br/>backend/app/workers/platforms/base.py"]
|
||||
Model_Query["查询模型<br/>backend/app/models/query.py"]
|
||||
Model_Record["引用记录模型<br/>backend/app/models/citation_record.py"]
|
||||
Model_Task["查询任务模型<br/>backend/app/models/query_task.py"]
|
||||
end
|
||||
FE_Platforms --> API_Queries
|
||||
FE_Platforms --> API_Citations
|
||||
API_Queries --> Service_Query
|
||||
API_Citations --> Service_Citation
|
||||
Service_Query --> Scheduler
|
||||
Service_Citation --> Scheduler
|
||||
Scheduler --> Engine
|
||||
Engine --> Adapter_Kimi
|
||||
Engine --> SearchEngine
|
||||
Engine --> BaseAdapter
|
||||
Engine --> Model_Query
|
||||
Engine --> Model_Record
|
||||
Engine --> Model_Task
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/workers/scheduler.py:25-40](file://backend/app/workers/scheduler.py#L25-L40)
|
||||
- [backend/app/workers/citation_engine.py:164-173](file://backend/app/workers/citation_engine.py#L164-L173)
|
||||
- [backend/app/workers/platforms/kimi.py:10-37](file://backend/app/workers/platforms/kimi.py#L10-L37)
|
||||
- [backend/app/workers/platforms/search_engine.py:1-174](file://backend/app/workers/platforms/search_engine.py#L1-L174)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/workers/scheduler.py:25-40](file://backend/app/workers/scheduler.py#L25-L40)
|
||||
- [frontend/lib/platforms.ts:1-23](file://frontend/lib/platforms.ts#L1-L23)
|
||||
|
||||
## 核心组件
|
||||
- **Kimi适配器(搜索引擎模式)**:基于HTTP客户端的轻量级适配器,通过fetch_search_content获取搜索结果。
|
||||
- **搜索引擎模块**:提供DuckDuckGo HTML搜索和Wikipedia API回退机制,支持内容提取和清理。
|
||||
- **引用检测引擎**:编排多平台查询,执行品牌匹配与竞争品牌检测,并持久化结果。
|
||||
- **定时调度器**:周期性扫描到期查询,触发执行并更新任务状态。
|
||||
- **数据模型**:查询、引用记录、查询任务三者构成完整的查询生命周期与结果存储。
|
||||
- **API与服务**:提供REST接口与业务服务,支撑前端展示与手动触发"立即执行"。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/kimi.py:10-37](file://backend/app/workers/platforms/kimi.py#L10-L37)
|
||||
- [backend/app/workers/platforms/search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)
|
||||
- [backend/app/workers/citation_engine.py:161-173](file://backend/app/workers/citation_engine.py#L161-L173)
|
||||
- [backend/app/workers/scheduler.py:25-40](file://backend/app/workers/scheduler.py#L25-L40)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
|
||||
## 架构总览
|
||||
下图展示了从用户发起查询到结果入库的关键路径,以及Kimi适配器在其中的角色。现在Kimi适配器通过搜索引擎获取内容,而非直接操作浏览器。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant FE as "前端"
|
||||
participant API as "后端API"
|
||||
participant SVC as "服务层"
|
||||
participant SCH as "调度器"
|
||||
participant ENG as "引用检测引擎"
|
||||
participant ADP as "Kimi适配器"
|
||||
participant SE as "搜索引擎模块"
|
||||
FE->>API : "POST /api/v1/queries 或 /api/v1/citations/run-now"
|
||||
API->>SVC : "创建查询/立即执行"
|
||||
SVC->>SCH : "登记查询任务"
|
||||
SCH->>ENG : "周期性执行查询"
|
||||
ENG->>ADP : "query(keyword)"
|
||||
ADP->>SE : "fetch_search_content(platform_name, keyword)"
|
||||
SE->>SE : "search_duckduckgo 或 Wikipedia"
|
||||
SE-->>ADP : "返回搜索结果文本"
|
||||
ADP-->>ENG : "返回原始响应文本"
|
||||
ENG-->>SVC : "品牌匹配/竞争品牌检测"
|
||||
SVC-->>API : "写入引用记录"
|
||||
API-->>FE : "返回查询结果/统计"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/queries.py:26-39](file://backend/app/api/queries.py#L26-L39)
|
||||
- [backend/app/api/citations.py:59-77](file://backend/app/api/citations.py#L59-L77)
|
||||
- [backend/app/services/query.py:45-81](file://backend/app/services/query.py#L45-L81)
|
||||
- [backend/app/services/citation.py:204-234](file://backend/app/services/citation.py#L204-L234)
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/workers/citation_engine.py:256-287](file://backend/app/workers/citation_engine.py#L256-L287)
|
||||
- [backend/app/workers/platforms/kimi.py:16-33](file://backend/app/workers/platforms/kimi.py#L16-L33)
|
||||
- [backend/app/workers/platforms/search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### Kimi适配器(搜索引擎模式)
|
||||
**更新** Kimi适配器已完全简化,移除了Playwright浏览器自动化实现,现在是一个轻量级的HTTP客户端适配器。
|
||||
|
||||
- **适配器实现**
|
||||
- 继承自BasePlatformAdapter基类,实现query和_close方法
|
||||
- platform_name设置为"kimi",platform_url设置为"https://kimi.moonshot.cn"
|
||||
- query方法包含最多3次重试(共3次尝试),使用指数退避策略
|
||||
- **搜索引擎集成**
|
||||
- _do_query方法调用fetch_search_content获取搜索结果
|
||||
- 通过search_duckduckgo获取HTML搜索结果,必要时回退到Wikipedia
|
||||
- 返回清理后的搜索结果文本
|
||||
- **资源管理**
|
||||
- close方法为空实现,因为无额外资源需要释放
|
||||
- 依赖httpx异步客户端自动管理连接池
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["进入 query(keyword)"]) --> RetryLoop{"重试次数 < 3"}
|
||||
RetryLoop --> |是| DoQuery["_do_query 执行单次查询"]
|
||||
DoQuery --> Success["返回搜索结果文本"]
|
||||
DoQuery --> |异常| Delay["指数退避等待 (2^attempt)"]
|
||||
Delay --> RetryLoop
|
||||
RetryLoop --> |否| FinalFail["记录最终失败并抛出异常"]
|
||||
Success --> End(["结束"])
|
||||
FinalFail --> End
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/platforms/kimi.py:16-29](file://backend/app/workers/platforms/kimi.py#L16-L29)
|
||||
- [backend/app/workers/platforms/kimi.py:31-33](file://backend/app/workers/platforms/kimi.py#L31-L33)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/kimi.py:10-37](file://backend/app/workers/platforms/kimi.py#L10-L37)
|
||||
|
||||
### 搜索引擎模块(Search Engine Module)
|
||||
**新增** 新增的通用搜索引擎模块,提供DuckDuckGo和Wikipedia的搜索功能。
|
||||
|
||||
- **DuckDuckGo搜索**
|
||||
- 使用HTML版本搜索,无需API密钥
|
||||
- 支持多种结果块匹配策略(标准result块和备选匹配)
|
||||
- 自动清理HTML标签和实体,提取可读文本
|
||||
- **Wikipedia回退机制**
|
||||
- 当DuckDuckGo受限或失败时自动回退到Wikipedia API
|
||||
- 使用Wikipedia API获取词条摘要,避免HTML解析复杂性
|
||||
- 支持引用标记清理和文本格式化
|
||||
- **内容提取与清理**
|
||||
- 统一的HTML标签清理函数
|
||||
- 文本压缩和空白字符标准化
|
||||
- 最大字符长度控制和结果截断
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["fetch_search_content"]) --> DDG["search_duckduckgo"]
|
||||
DDG --> Valid{"有效结果?"}
|
||||
Valid --> |是| Clean["清理HTML标签"]
|
||||
Clean --> Return["返回结果"]
|
||||
Valid --> |否| Wiki["search_wikipedia 回退"]
|
||||
Wiki --> WikiValid{"Wikipedia结果?"}
|
||||
WikiValid --> |是| WikiClean["清理Wikipedia内容"]
|
||||
WikiClean --> Return
|
||||
WikiValid --> |否| Error["抛出所有搜索源失败"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/platforms/search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)
|
||||
- [backend/app/workers/platforms/search_engine.py:79-144](file://backend/app/workers/platforms/search_engine.py#L79-L144)
|
||||
- [backend/app/workers/platforms/search_engine.py:16-76](file://backend/app/workers/platforms/search_engine.py#L16-L76)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)
|
||||
- [backend/app/workers/platforms/search_engine.py:79-144](file://backend/app/workers/platforms/search_engine.py#L79-L144)
|
||||
- [backend/app/workers/platforms/search_engine.py:16-76](file://backend/app/workers/platforms/search_engine.py#L16-L76)
|
||||
|
||||
### 引用检测引擎(Citation Engine)
|
||||
- **平台编排**
|
||||
- 维护平台适配器映射,支持Kimi、文心、通义等平台
|
||||
- 针对每个查询遍历指定平台,执行查询与检测
|
||||
- **更新** 所有平台现在共享相同的搜索引擎基础设施
|
||||
- **品牌匹配**
|
||||
- 支持精确匹配、别名匹配、模糊匹配三种策略,输出置信度与首次出现位置
|
||||
- **竞争品牌检测**
|
||||
- 基于预设行业品牌库,识别文本中除目标品牌外的其他品牌
|
||||
- **任务状态管理**
|
||||
- 为每次平台查询创建或获取对应任务记录,维护状态与错误信息
|
||||
- **结果持久化**
|
||||
- 将引用检测结果写入引用记录表,包含是否引用、位置、文本片段、竞争品牌及原始响应
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class CitationEngine {
|
||||
+execute_query(query, db) list
|
||||
+execute_single_platform(keyword, platform, target_brand, aliases) dict
|
||||
+close() void
|
||||
}
|
||||
class KimiAdapter {
|
||||
+query(keyword) str
|
||||
+_do_query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class SearchEngineModule {
|
||||
+fetch_search_content(platform_name, keyword) str
|
||||
+search_duckduckgo(query) str
|
||||
+search_wikipedia(keyword) str
|
||||
}
|
||||
class BrandMatcher {
|
||||
+match(text) dict
|
||||
}
|
||||
class CompetitorDetector {
|
||||
+detect(text, target_brand) list
|
||||
}
|
||||
CitationEngine --> KimiAdapter : "依赖"
|
||||
CitationEngine --> BrandMatcher : "使用"
|
||||
CitationEngine --> CompetitorDetector : "使用"
|
||||
KimiAdapter --> SearchEngineModule : "使用"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/citation_engine.py:161-173](file://backend/app/workers/citation_engine.py#L161-L173)
|
||||
- [backend/app/workers/platforms/kimi.py:10-37](file://backend/app/workers/platforms/kimi.py#L10-L37)
|
||||
- [backend/app/workers/platforms/search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/citation_engine.py:161-173](file://backend/app/workers/citation_engine.py#L161-L173)
|
||||
- [backend/app/workers/citation_engine.py:256-287](file://backend/app/workers/citation_engine.py#L256-L287)
|
||||
|
||||
### 定时调度器(Scheduler)
|
||||
- **触发机制**
|
||||
- 使用APScheduler的AsyncIOScheduler,每小时检查一次到期查询
|
||||
- 条件:查询状态为激活且next_query_at小于等于当前时间
|
||||
- **执行流程**
|
||||
- 逐条执行查询,调用引用检测引擎,更新任务状态与查询时间字段
|
||||
- **关闭流程**
|
||||
- 应用关闭时停止调度器并关闭各平台适配器
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant S as "调度器"
|
||||
participant DB as "数据库"
|
||||
participant ENG as "引用检测引擎"
|
||||
participant ADP as "Kimi适配器"
|
||||
S->>DB : "查询到期的查询记录"
|
||||
loop 遍历查询
|
||||
S->>ENG : "execute_query(query, db)"
|
||||
ENG->>ADP : "query(keyword)"
|
||||
ADP->>ADP : "fetch_search_content"
|
||||
ADP-->>ENG : "返回搜索结果"
|
||||
ENG-->>S : "写入引用记录并更新时间"
|
||||
end
|
||||
S->>S : "关闭时调用 engine.close()"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/workers/citation_engine.py:323-330](file://backend/app/workers/citation_engine.py#L323-L330)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/scheduler.py:25-40](file://backend/app/workers/scheduler.py#L25-L40)
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/workers/scheduler.py:86-90](file://backend/app/workers/scheduler.py#L86-L90)
|
||||
|
||||
### 数据模型(Models)
|
||||
- **查询(Query)**
|
||||
- 关键字段:关键词、目标品牌、品牌别名、平台列表、频率、状态、下次查询时间等
|
||||
- **引用记录(CitationRecord)**
|
||||
- 关键字段:是否引用、引用位置、引用文本、竞争品牌列表、原始响应、查询时间等
|
||||
- **查询任务(QueryTask)**
|
||||
- 关键字段:状态、错误信息、计划/开始/完成时间等
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
QUERY {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
timestamp last_queried_at
|
||||
timestamp next_query_at
|
||||
timestamp created_at
|
||||
timestamp updated_at
|
||||
}
|
||||
CITATION_RECORD {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
boolean cited
|
||||
int citation_position
|
||||
text citation_text
|
||||
jsonb competitor_brands
|
||||
text raw_response
|
||||
timestamp queried_at
|
||||
}
|
||||
QUERY_TASK {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
string status
|
||||
text error_message
|
||||
timestamp scheduled_at
|
||||
timestamp started_at
|
||||
timestamp completed_at
|
||||
}
|
||||
QUERY ||--o{ CITATION_RECORD : "拥有"
|
||||
QUERY ||--o{ QUERY_TASK : "拥有"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
|
||||
### API与服务(API & Services)
|
||||
- **查询API**
|
||||
- 支持分页列出、创建、查询详情、更新、删除查询
|
||||
- **引用API**
|
||||
- 支持分页列出引用、统计查询、立即执行查询(触发任务)
|
||||
- **服务层**
|
||||
- 查询服务:校验用户配额、计算下次查询时间、增删改查
|
||||
- 引用服务:权限校验、统计聚合、CSV导出、立即执行任务登记
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/queries.py:15-86](file://backend/app/api/queries.py#L15-L86)
|
||||
- [backend/app/api/citations.py:25-77](file://backend/app/api/citations.py#L25-L77)
|
||||
- [backend/app/services/query.py:12-130](file://backend/app/services/query.py#L12-L130)
|
||||
- [backend/app/services/citation.py:24-269](file://backend/app/services/citation.py#L24-L269)
|
||||
|
||||
## 依赖关系分析
|
||||
- **组件耦合**
|
||||
- 引用检测引擎对Kimi适配器存在直接依赖;对品牌匹配器与竞争品牌检测器为组合关系
|
||||
- **更新** Kimi适配器现在依赖通用搜索引擎模块,而非Playwright
|
||||
- 定时调度器仅通过引擎接口触发执行,解耦具体平台实现
|
||||
- **外部依赖**
|
||||
- httpx异步HTTP客户端(用于搜索引擎请求)
|
||||
- DuckDuckGo和Wikipedia API(无需认证)
|
||||
- PostgreSQL数据库(SQLAlchemy ORM)
|
||||
- APScheduler(异步调度)
|
||||
- **循环依赖**
|
||||
- 未发现循环导入或调用链路
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
SCH["调度器"] --> ENG["引用检测引擎"]
|
||||
ENG --> ADP["Kimi适配器"]
|
||||
ENG --> BM["品牌匹配器"]
|
||||
ENG --> CD["竞争品牌检测器"]
|
||||
ADP --> SE["搜索引擎模块"]
|
||||
SE --> HTTPX["httpx客户端"]
|
||||
APIQ["查询API"] --> SVQ["查询服务"]
|
||||
APIC["引用API"] --> SVC["引用服务"]
|
||||
SVQ --> DB["数据库"]
|
||||
SVC --> DB
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/scheduler.py:25-40](file://backend/app/workers/scheduler.py#L25-L40)
|
||||
- [backend/app/workers/citation_engine.py:161-173](file://backend/app/workers/citation_engine.py#L161-L173)
|
||||
- [backend/app/workers/platforms/kimi.py:10-37](file://backend/app/workers/platforms/kimi.py#L10-L37)
|
||||
- [backend/app/workers/platforms/search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)
|
||||
- [backend/app/api/queries.py:15-86](file://backend/app/api/queries.py#L15-L86)
|
||||
- [backend/app/api/citations.py:25-77](file://backend/app/api/citations.py#L25-L77)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/scheduler.py:25-40](file://backend/app/workers/scheduler.py#L25-L40)
|
||||
- [backend/app/workers/citation_engine.py:161-173](file://backend/app/workers/citation_engine.py#L161-L173)
|
||||
- [backend/app/api/queries.py:15-86](file://backend/app/api/queries.py#L15-L86)
|
||||
- [backend/app/api/citations.py:25-77](file://backend/app/api/citations.py#L25-L77)
|
||||
|
||||
## 性能考虑
|
||||
- **HTTP客户端优化**
|
||||
- 使用httpx异步客户端,支持连接池复用和超时控制
|
||||
- DuckDuckGo搜索超时设置为30秒,避免长时间阻塞
|
||||
- **搜索策略优化**
|
||||
- 首选DuckDuckGo HTML搜索,自动回退到Wikipedia API
|
||||
- 支持最多5个结果的提取,平衡准确性和性能
|
||||
- **重试与退避**
|
||||
- 已采用指数退避(2^attempt),建议结合平台可用性监控动态调整重试上限
|
||||
- **数据库索引**
|
||||
- 查询模型的索引设计有助于高频筛选
|
||||
- 建议在引用记录表上针对查询时间、平台、是否引用等字段建立复合索引以提升统计与导出性能
|
||||
- **前端平台映射**
|
||||
- 前端平台键值映射清晰,便于UI展示与用户选择
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/kimi.py:16-29](file://backend/app/workers/platforms/kimi.py#L16-L29)
|
||||
- [backend/app/workers/platforms/search_engine.py:94-96](file://backend/app/workers/platforms/search_engine.py#L94-L96)
|
||||
- [backend/app/models/query.py:50-54](file://backend/app/models/query.py#L50-L54)
|
||||
- [backend/app/models/citation_record.py:37-41](file://backend/app/models/citation_record.py#L37-L41)
|
||||
- [frontend/lib/platforms.ts:1-23](file://frontend/lib/platforms.ts#L1-L23)
|
||||
|
||||
## 故障排查指南
|
||||
- **DuckDuckGo限制**
|
||||
- 现象:DuckDuckGo返回非结果页面或解析失败
|
||||
- 处理:自动回退到Wikipedia API;检查网络连接和代理设置
|
||||
- **Wikipedia API失败**
|
||||
- 现象:Wikipedia API调用失败或无结果
|
||||
- 处理:检查Wikipedia API可用性;确认关键词有效性
|
||||
- **搜索结果为空**
|
||||
- 现象:两个搜索源均无结果
|
||||
- 处理:尝试更具体的关键词;检查网络连接
|
||||
- **重试仍失败**
|
||||
- 现象:多次重试后仍失败
|
||||
- 处理:查看日志中的最后一次错误;检查平台访问限制与验证码弹窗
|
||||
- **API调用失败**
|
||||
- 现象:接口返回4xx/5xx错误
|
||||
- 处理:核对鉴权头与请求体;检查用户权限与查询状态;查看服务端日志定位异常
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/search_engine.py:139-144](file://backend/app/workers/platforms/search_engine.py#L139-L144)
|
||||
- [backend/app/workers/platforms/search_engine.py:140-144](file://backend/app/workers/platforms/search_engine.py#L140-L144)
|
||||
- [backend/app/workers/platforms/kimi.py:24-29](file://backend/app/workers/platforms/kimi.py#L24-L29)
|
||||
- [backend/app/api/queries.py:34-38](file://backend/app/api/queries.py#L34-L38)
|
||||
- [backend/app/api/citations.py:67-71](file://backend/app/api/citations.py#L67-L71)
|
||||
|
||||
## 结论
|
||||
Kimi适配器已成功简化为搜索引擎模式,移除了复杂的Playwright浏览器自动化实现,转而采用轻量级的HTTP客户端方案。通过DuckDuckGo和Wikipedia的组合搜索策略,实现了稳定的搜索结果获取。新的架构更加简洁、高效,降低了维护成本,同时保持了与原有系统的兼容性。建议在生产环境中关注HTTP客户端性能、搜索策略优化和数据库索引设计。
|
||||
|
||||
## 附录
|
||||
|
||||
### API调用示例(路径参考)
|
||||
- **创建查询**
|
||||
- POST /api/v1/queries
|
||||
- 请求体字段:keyword、target_brand、brand_aliases、platforms、frequency
|
||||
- 成功响应:QueryResponse
|
||||
- **立即执行查询**
|
||||
- POST /api/v1/queries/{query_id}/run-now
|
||||
- 成功响应:RunNowResponse(包含任务ID与状态)
|
||||
- **查询引用列表**
|
||||
- GET /api/v1/citations/?query_id={query_id}&platform={platform}
|
||||
- 成功响应:CitationListResponse
|
||||
- **引用统计**
|
||||
- GET /api/v1/citations/stats?query_id={query_id}
|
||||
- 成功响应:CitationStatsResponse
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/queries.py:26-39](file://backend/app/api/queries.py#L26-L39)
|
||||
- [backend/app/api/citations.py:59-77](file://backend/app/api/citations.py#L59-L77)
|
||||
- [backend/app/api/citations.py:25-56](file://backend/app/api/citations.py#L25-L56)
|
||||
|
|
@ -0,0 +1,408 @@
|
|||
# 平台适配器扩展指南
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/workers/platforms/base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [backend/app/workers/platforms/kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [backend/app/workers/platforms/wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [frontend/lib/platforms.ts](file://frontend/lib/platforms.ts)
|
||||
- [frontend/components/charts/platform-chart.tsx](file://frontend/components/charts/platform-chart.tsx)
|
||||
- [frontend/lib/api.ts](file://frontend/lib/api.ts)
|
||||
- [tests/test_citation_engine.py](file://tests/test_citation_engine.py)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本指南面向需要为系统新增AI平台适配器的开发者,提供从基类继承、实现必需方法、配置平台参数,到资源管理与性能优化的完整流程。文档同时覆盖两类平台适配策略:基于浏览器自动化(Playwright)的平台与基于API的平台,并给出测试方法、验证标准与常见问题排查建议。
|
||||
|
||||
## 项目结构
|
||||
后端采用分层架构,平台适配器位于 workers/platforms 子目录;引用检测引擎位于 workers 子目录;查询模型与调度器分别位于 models 与 workers 子目录;前端通过 lib 与 components 提供平台映射、图表展示与API封装。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "后端"
|
||||
CFG["配置<br/>app/config.py"]
|
||||
SCH["调度器<br/>workers/scheduler.py"]
|
||||
CE["引用引擎<br/>workers/citation_engine.py"]
|
||||
BP["适配器基类<br/>workers/platforms/base.py"]
|
||||
KIMI["Kimi适配器<br/>workers/platforms/kimi.py"]
|
||||
WENXIN["文心一言适配器<br/>workers/platforms/wenxin.py"]
|
||||
QRY["查询模型<br/>models/query.py"]
|
||||
end
|
||||
subgraph "前端"
|
||||
PFM["平台映射<br/>lib/platforms.ts"]
|
||||
API["API封装<br/>lib/api.ts"]
|
||||
CHART["平台图表<br/>components/charts/platform-chart.tsx"]
|
||||
end
|
||||
SCH --> CE
|
||||
CE --> KIMI
|
||||
CE --> WENXIN
|
||||
CE --> QRY
|
||||
BP -. 继承 .- KIMI
|
||||
BP -. 继承 .- WENXIN
|
||||
PFM --> CHART
|
||||
API --> SCH
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [frontend/lib/platforms.ts:1-18](file://frontend/lib/platforms.ts#L1-L18)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [frontend/components/charts/platform-chart.tsx:1-68](file://frontend/components/charts/platform-chart.tsx#L1-L68)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [frontend/lib/platforms.ts:1-18](file://frontend/lib/platforms.ts#L1-L18)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [frontend/components/charts/platform-chart.tsx:1-68](file://frontend/components/charts/platform-chart.tsx#L1-L68)
|
||||
|
||||
## 核心组件
|
||||
- 适配器基类 BasePlatformAdapter:定义平台名称、平台URL以及抽象方法 query 与可选方法 close。
|
||||
- 具体适配器 KimiAdapter 与 WenxinAdapter:均继承自基类,实现 query 方法并通过 Playwright 自动化浏览器进行交互。
|
||||
- 引擎 CitationEngine:负责编排查询、调用适配器、执行品牌匹配与竞争品牌检测、持久化结果。
|
||||
- 调度器 QueryScheduler:周期性触发查询任务执行。
|
||||
- 查询模型 Query:承载关键词、目标品牌、别名、平台列表、频率与时间戳等元数据。
|
||||
- 配置 Settings:集中管理数据库、Redis、JWT、Playwright浏览器路径及第三方API密钥等。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
## 架构总览
|
||||
下图展示了从调度器到引擎再到具体适配器的调用链路,以及前端如何消费平台映射与统计数据。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant S as "调度器<br/>workers/scheduler.py"
|
||||
participant E as "引擎<br/>workers/citation_engine.py"
|
||||
participant A as "适配器<br/>Kimi/Wenxin"
|
||||
participant DB as "数据库<br/>SQLAlchemy"
|
||||
S->>E : 触发执行查询
|
||||
E->>DB : 查询待执行的 Query
|
||||
loop 遍历平台
|
||||
E->>A : 调用 query(keyword)
|
||||
A-->>E : 返回原始响应文本
|
||||
E->>E : 品牌匹配/竞争品牌检测
|
||||
E->>DB : 写入引用记录与任务状态
|
||||
end
|
||||
E-->>S : 返回本次执行结果
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [backend/app/workers/platforms/kimi.py:33-48](file://backend/app/workers/platforms/kimi.py#L33-L48)
|
||||
- [backend/app/workers/platforms/wenxin.py:33-48](file://backend/app/workers/platforms/wenxin.py#L33-L48)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 基类与继承规范
|
||||
- 必须实现的方法
|
||||
- query(keyword: str) -> str:在指定平台查询关键词并返回原始响应文本。
|
||||
- 可选方法
|
||||
- close():释放浏览器或网络连接等资源。
|
||||
- 平台元信息
|
||||
- platform_name:平台标识字符串。
|
||||
- platform_url:平台主页URL。
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class BasePlatformAdapter {
|
||||
+string platform_name
|
||||
+string platform_url
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class KimiAdapter {
|
||||
+platform_name = "kimi"
|
||||
+platform_url = "https : //kimi.moonshot.cn"
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class WenxinAdapter {
|
||||
+platform_name = "wenxin"
|
||||
+platform_url = "https : //yiyan.baidu.com"
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
KimiAdapter --|> BasePlatformAdapter
|
||||
WenxinAdapter --|> BasePlatformAdapter
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [backend/app/workers/platforms/base.py:4-17](file://backend/app/workers/platforms/base.py#L4-L17)
|
||||
- [backend/app/workers/platforms/kimi.py:11-20](file://backend/app/workers/platforms/kimi.py#L11-L20)
|
||||
- [backend/app/workers/platforms/wenxin.py:11-20](file://backend/app/workers/platforms/wenxin.py#L11-L20)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/base.py:4-17](file://backend/app/workers/platforms/base.py#L4-L17)
|
||||
|
||||
### 浏览器驱动型平台适配策略(以Kimi/Wenxin为例)
|
||||
- 启动与重用浏览器:首次调用时初始化 Playwright 并启动 Chromium 浏览器,避免重复创建。
|
||||
- 页面交互:导航至平台URL,等待并定位输入框,填充关键词,提交(回车或点击发送按钮)。
|
||||
- 稳定性检测:持续轮询消息容器,当文本连续多次保持一致时判定回复稳定,返回内容。
|
||||
- 错误处理:捕获超时与异常,记录日志并抛出可诊断的错误;提供指数退避重试机制。
|
||||
- 资源管理:在 finally 中确保 page/context 关闭;close()统一释放浏览器与Playwright实例。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["进入 query"]) --> Ensure["确保浏览器已启动"]
|
||||
Ensure --> TryDo["_do_query 尝试一次查询"]
|
||||
TryDo --> Stable["等待回复稳定"]
|
||||
Stable --> Done["返回原始响应文本"]
|
||||
TryDo --> |异常| Retry{"重试次数 < 3?"}
|
||||
Retry --> |是| Backoff["指数退避等待"] --> TryDo
|
||||
Retry --> |否| Raise["抛出最终异常"]
|
||||
Done --> Close["关闭 page/context"]
|
||||
Raise --> Close
|
||||
Close --> End(["结束"])
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [backend/app/workers/platforms/kimi.py:21-48](file://backend/app/workers/platforms/kimi.py#L21-L48)
|
||||
- [backend/app/workers/platforms/kimi.py:50-124](file://backend/app/workers/platforms/kimi.py#L50-L124)
|
||||
- [backend/app/workers/platforms/kimi.py:126-196](file://backend/app/workers/platforms/kimi.py#L126-L196)
|
||||
- [backend/app/workers/platforms/kimi.py:198-205](file://backend/app/workers/platforms/kimi.py#L198-L205)
|
||||
- [backend/app/workers/platforms/wenxin.py:21-48](file://backend/app/workers/platforms/wenxin.py#L21-L48)
|
||||
- [backend/app/workers/platforms/wenxin.py:50-123](file://backend/app/workers/platforms/wenxin.py#L50-L123)
|
||||
- [backend/app/workers/platforms/wenxin.py:124-195](file://backend/app/workers/platforms/wenxin.py#L124-L195)
|
||||
- [backend/app/workers/platforms/wenxin.py:197-204](file://backend/app/workers/platforms/wenxin.py#L197-L204)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
|
||||
### API型平台适配策略(扩展指导)
|
||||
- 继承基类并实现 query:通过HTTP客户端发起请求,解析JSON响应,提取平台返回的文本内容。
|
||||
- 错误处理:区分网络错误、平台限流/鉴权失败、解析异常等,统一转换为可诊断的异常类型。
|
||||
- 资源管理:若使用连接池或长连接,实现 close() 以释放连接;否则可为空实现。
|
||||
- 配置参数:在 Settings 中添加平台API密钥与基础URL,前端与后端分别读取使用。
|
||||
|
||||
(本小节为概念性指导,不直接分析具体文件)
|
||||
|
||||
### 引擎与品牌检测
|
||||
- CitationEngine 负责:
|
||||
- 初始化各平台适配器实例并注册到平台字典。
|
||||
- 针对每个 Query 的平台列表依次执行查询。
|
||||
- 使用 BrandMatcher 执行精确/别名/模糊匹配,返回置信度与位置信息。
|
||||
- 使用 CompetitorDetector 检测文本中出现的竞争品牌集合。
|
||||
- 记录 CitationRecord 并更新 Query 的时间戳与下次查询时间。
|
||||
- 资源关闭:在 close() 中遍历适配器逐一关闭,避免资源泄漏。
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class CitationEngine {
|
||||
+dict platforms
|
||||
+BrandMatcher matcher
|
||||
+CompetitorDetector competitor_detector
|
||||
+execute_query(query, db) list
|
||||
+execute_single_platform(keyword, platform, ...) dict
|
||||
+close() void
|
||||
}
|
||||
class BrandMatcher {
|
||||
+match(text) dict
|
||||
}
|
||||
class CompetitorDetector {
|
||||
+detect(text, target_brand) list
|
||||
}
|
||||
CitationEngine --> BrandMatcher : "使用"
|
||||
CitationEngine --> CompetitorDetector : "使用"
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/citation_engine.py:19-120](file://backend/app/workers/citation_engine.py#L19-L120)
|
||||
- [backend/app/workers/citation_engine.py:122-146](file://backend/app/workers/citation_engine.py#L122-L146)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
|
||||
### 调度与执行
|
||||
- QueryScheduler 使用 APScheduler 定时触发检查与执行逻辑,每小时扫描一次到期的查询任务。
|
||||
- 对每个查询任务,CitationEngine.execute_query 会:
|
||||
- 创建或获取 QueryTask 并更新状态为 running。
|
||||
- 遍历平台列表执行查询与检测。
|
||||
- 记录结果并更新 Query 的 last_queried_at 与 next_query_at。
|
||||
- 若失败则记录错误并写入一条 cited=False 的占位记录。
|
||||
- 关闭时统一调用 engine.close() 释放适配器资源。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant SCH as "调度器"
|
||||
participant CE as "引擎"
|
||||
participant DB as "数据库"
|
||||
SCH->>CE : check_and_execute_queries()
|
||||
CE->>DB : 查询 active 且到期的 Query
|
||||
loop 遍历查询
|
||||
CE->>DB : 获取/创建 QueryTask
|
||||
CE->>CE : 执行平台查询与检测
|
||||
CE->>DB : 写入 CitationRecord 与更新 Query
|
||||
end
|
||||
SCH->>CE : shutdown()
|
||||
CE->>CE : close()
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [backend/app/workers/scheduler.py:30-90](file://backend/app/workers/scheduler.py#L30-L90)
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
|
||||
### 前端集成与展示
|
||||
- 平台映射:前端 lib/platforms.ts 定义平台键到中文标签的映射,用于UI显示与图表标签。
|
||||
- 图表组件:components/charts/platform-chart.tsx 接收平台统计数据,渲染引用率柱状图。
|
||||
- API封装:frontend/lib/api.ts 提供认证、查询、引用与报表导出等接口封装,便于前端调用。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
PFM["平台映射<br/>lib/platforms.ts"] --> CHART["平台图表<br/>components/charts/platform-chart.tsx"]
|
||||
API["API封装<br/>lib/api.ts"] --> SCH["调度器<br/>workers/scheduler.py"]
|
||||
API --> CE["引擎<br/>workers/citation_engine.py"]
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [frontend/lib/platforms.ts:1-18](file://frontend/lib/platforms.ts#L1-L18)
|
||||
- [frontend/components/charts/platform-chart.tsx:34-39](file://frontend/components/charts/platform-chart.tsx#L34-L39)
|
||||
- [frontend/lib/api.ts:23-57](file://frontend/lib/api.ts#L23-L57)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/lib/platforms.ts:1-18](file://frontend/lib/platforms.ts#L1-L18)
|
||||
- [frontend/components/charts/platform-chart.tsx:1-68](file://frontend/components/charts/platform-chart.tsx#L1-L68)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
## 依赖分析
|
||||
- 组件耦合
|
||||
- CitationEngine 与具体适配器松耦合:通过平台字典注册与动态查找,便于扩展新平台。
|
||||
- 调度器与引擎解耦:调度器仅负责触发,引擎负责业务逻辑与平台交互。
|
||||
- 外部依赖
|
||||
- Playwright:用于浏览器自动化(Kimi/Wenxin)。
|
||||
- SQLAlchemy:用于数据库访问与事务控制。
|
||||
- APScheduler:用于定时任务调度。
|
||||
- Pydantic Settings:用于配置管理。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
CE["CitationEngine"] --> KIMI["KimiAdapter"]
|
||||
CE --> WENXIN["WenxinAdapter"]
|
||||
CE --> DB["SQLAlchemy"]
|
||||
SCH["QueryScheduler"] --> CE
|
||||
CE --> CFG["Settings"]
|
||||
KIMI --> PW["Playwright"]
|
||||
WENXIN --> PW
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [backend/app/workers/citation_engine.py:152-155](file://backend/app/workers/citation_engine.py#L152-L155)
|
||||
- [backend/app/workers/scheduler.py:26-28](file://backend/app/workers/scheduler.py#L26-L28)
|
||||
- [backend/app/config.py:11-14](file://backend/app/config.py#L11-L14)
|
||||
- [backend/app/workers/platforms/kimi.py:4-6](file://backend/app/workers/platforms/kimi.py#L4-L6)
|
||||
- [backend/app/workers/platforms/wenxin.py:4-6](file://backend/app/workers/platforms/wenxin.py#L4-L6)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
|
||||
## 性能考虑
|
||||
- 浏览器复用:避免每次查询都启动/关闭浏览器,减少冷启动开销。
|
||||
- 稳定性检测阈值:合理设置“连续稳定”次数与轮询间隔,平衡准确性和延迟。
|
||||
- 指数退避:在重试时采用指数退避,降低平台压力并提升成功率。
|
||||
- 数据库批处理:批量写入引用记录与任务状态,减少IO往返。
|
||||
- 超时与并发:为页面导航、元素等待与响应稳定检测设置合理超时,避免长时间阻塞。
|
||||
- 缓存与预热:可在进程启动时预热部分资源(如Playwright浏览器),缩短首次查询耗时。
|
||||
|
||||
(本节为通用建议,不直接分析具体文件)
|
||||
|
||||
## 故障排查指南
|
||||
- 浏览器相关
|
||||
- 现象:启动浏览器失败或找不到输入框。
|
||||
- 排查:确认已安装 Playwright 浏览器;检查平台URL可达性;核对页面选择器是否随平台更新而变更。
|
||||
- 参考实现:Kimi/Wenxin 适配器在启动失败时抛出明确提示,需按提示执行安装命令。
|
||||
- 超时与不稳定
|
||||
- 现象:页面操作超时或回复未稳定。
|
||||
- 排查:适当提高等待超时与稳定检测阈值;检查网络环境与平台负载。
|
||||
- 参考实现:适配器内对超时与异常进行捕获并记录日志。
|
||||
- 品牌匹配
|
||||
- 现象:匹配结果不符合预期。
|
||||
- 排查:调整别名列表、提高模糊匹配阈值或优化候选词提取逻辑。
|
||||
- 参考实现:测试用例覆盖精确、别名、模糊与无匹配场景。
|
||||
- 调度与状态
|
||||
- 现象:查询未按时执行或状态异常。
|
||||
- 排查:检查调度器是否启动、数据库连接与时区设置、Query 的 next_query_at 是否正确更新。
|
||||
- 前端展示
|
||||
- 现象:平台图表标签显示异常。
|
||||
- 排查:确认前端平台映射与后端平台键一致。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/kimi.py:27-31](file://backend/app/workers/platforms/kimi.py#L27-L31)
|
||||
- [backend/app/workers/platforms/kimi.py:87-88](file://backend/app/workers/platforms/kimi.py#L87-L88)
|
||||
- [backend/app/workers/platforms/kimi.py:116-118](file://backend/app/workers/platforms/kimi.py#L116-L118)
|
||||
- [backend/app/workers/platforms/wenxin.py:27-31](file://backend/app/workers/platforms/wenxin.py#L27-L31)
|
||||
- [backend/app/workers/platforms/wenxin.py:86-87](file://backend/app/workers/platforms/wenxin.py#L86-L87)
|
||||
- [backend/app/workers/platforms/wenxin.py:114-116](file://backend/app/workers/platforms/wenxin.py#L114-L116)
|
||||
- [tests/test_citation_engine.py:6-53](file://tests/test_citation_engine.py#L6-L53)
|
||||
- [backend/app/workers/scheduler.py:30-40](file://backend/app/workers/scheduler.py#L30-L40)
|
||||
- [backend/app/workers/citation_engine.py:229-232](file://backend/app/workers/citation_engine.py#L229-L232)
|
||||
- [frontend/lib/platforms.ts:1-18](file://frontend/lib/platforms.ts#L1-L18)
|
||||
|
||||
## 结论
|
||||
通过继承 BasePlatformAdapter 并遵循本文提供的实现规范与最佳实践,可以快速、安全地为系统接入新的AI平台。对于浏览器驱动型平台,重点在于稳定的选择器策略与资源管理;对于API型平台,重点在于健壮的错误处理与配置管理。配合引擎的品牌检测能力与调度器的自动化执行,可构建高可用的跨平台引用检测体系。
|
||||
|
||||
## 附录
|
||||
|
||||
### 新平台接入步骤清单
|
||||
- 继承基类并实现必需方法
|
||||
- 在 workers/platforms 下新建适配器文件,继承 BasePlatformAdapter。
|
||||
- 实现 query 与可选的 close。
|
||||
- 设置 platform_name 与 platform_url。
|
||||
- 注册到引擎
|
||||
- 在 CitationEngine 的平台字典中注册新适配器实例。
|
||||
- 配置与密钥
|
||||
- 在 Settings 中添加必要的配置项(如API密钥、基础URL)。
|
||||
- 前端集成
|
||||
- 在前端 lib/platforms.ts 中添加平台键与中文标签。
|
||||
- 如需展示统计,确保后端返回的数据结构与前端图表组件兼容。
|
||||
- 测试与验证
|
||||
- 编写单元测试覆盖品牌匹配与竞争品牌检测逻辑。
|
||||
- 进行端到端测试,验证调度器触发、引擎执行与数据库写入。
|
||||
- 性能与稳定性
|
||||
- 评估并优化超时、重试与稳定检测策略。
|
||||
- 监控日志与错误指标,持续改进。
|
||||
|
||||
(本节为流程性说明,不直接分析具体文件)
|
||||
|
|
@ -0,0 +1,477 @@
|
|||
# 引用检测算法
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
- [citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [query.py](file://backend/app/models/query.py)
|
||||
- [query_task.py](file://backend/app/models/query_task.py)
|
||||
- [citation.py](file://backend/app/services/citation.py)
|
||||
- [scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [citations.py](file://backend/app/api/citations.py)
|
||||
- [config.py](file://backend/app/config.py)
|
||||
- [database.py](file://backend/app/database.py)
|
||||
- [test_citation_engine.py](file://tests/test_citation_engine.py)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖关系分析](#依赖关系分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件系统性阐述引用检测算法的设计与实现,重点覆盖以下方面:
|
||||
- 品牌匹配策略:精确匹配、别名匹配、模糊匹配的判定逻辑与置信度评分
|
||||
- 竞争品牌识别机制:基于预定义品牌库的竞争者发现
|
||||
- 置信度评分系统:关键词匹配权重、上下文相关性评估与结果排序规则
|
||||
- 引用上下文提取技术:文本片段截取、语义分析与相关性判断
|
||||
- 算法优化策略:性能提升与准确性改进方法
|
||||
- 算法调优指南与实际应用场景示例
|
||||
|
||||
## 项目结构
|
||||
后端采用分层架构,围绕“查询-执行-记录-统计”闭环组织:
|
||||
- 查询模型与任务模型负责用户查询配置与执行计划
|
||||
- 引擎模块负责跨平台调用、品牌匹配与竞争品牌识别
|
||||
- 适配器模块封装不同AI平台的网页自动化交互
|
||||
- 服务模块提供API接口、统计数据与导出功能
|
||||
- 定时调度器按频率自动触发查询任务
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "查询与任务"
|
||||
Q["Query<br/>查询配置"]
|
||||
QT["QueryTask<br/>查询任务"]
|
||||
end
|
||||
subgraph "引擎与适配器"
|
||||
CE["CitationEngine<br/>引用检测引擎"]
|
||||
BM["BrandMatcher<br/>品牌匹配器"]
|
||||
CD["CompetitorDetector<br/>竞争品牌检测器"]
|
||||
KP["KimiAdapter<br/>Kimi适配器"]
|
||||
WP["WenxinAdapter<br/>文心一言适配器"]
|
||||
end
|
||||
subgraph "数据与服务"
|
||||
CR["CitationRecord<br/>引用记录"]
|
||||
SVC["Citation Services<br/>统计与导出"]
|
||||
SCH["QueryScheduler<br/>定时调度器"]
|
||||
end
|
||||
Q --> CE
|
||||
Q --> QT
|
||||
CE --> BM
|
||||
CE --> CD
|
||||
CE --> KP
|
||||
CE --> WP
|
||||
CE --> CR
|
||||
SVC --> CR
|
||||
SCH --> CE
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
- [scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [citation.py:24-269](file://backend/app/services/citation.py#L24-L269)
|
||||
|
||||
章节来源
|
||||
- [citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
- [scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [citation.py:24-269](file://backend/app/services/citation.py#L24-L269)
|
||||
|
||||
## 核心组件
|
||||
- 引用检测引擎:协调平台查询、品牌匹配与竞争品牌识别,并持久化结果
|
||||
- 品牌匹配器:提供精确、别名与模糊三种匹配策略,输出置信度与上下文
|
||||
- 竞争品牌检测器:基于预定义品牌库识别文本中的竞争者
|
||||
- 平台适配器:封装Kimi与文心一言的网页自动化查询流程
|
||||
- 数据模型:查询、查询任务、引用记录支撑业务数据流转
|
||||
- 统计服务:提供引用统计、趋势与CSV导出能力
|
||||
- 定时调度器:按频率自动触发查询任务
|
||||
|
||||
章节来源
|
||||
- [citation_engine.py:19-120](file://backend/app/workers/citation_engine.py#L19-L120)
|
||||
- [citation_engine.py:122-146](file://backend/app/workers/citation_engine.py#L122-L146)
|
||||
- [citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
- [kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
- [query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [citation.py:76-201](file://backend/app/services/citation.py#L76-L201)
|
||||
- [scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
|
||||
## 架构总览
|
||||
下图展示从查询配置到结果统计的完整流程,包括定时触发、平台查询、匹配与记录写入。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant User as "用户"
|
||||
participant API as "API层"
|
||||
participant Svc as "统计服务"
|
||||
participant Sch as "调度器"
|
||||
participant Eng as "引用检测引擎"
|
||||
participant Plat as "平台适配器"
|
||||
participant DB as "数据库"
|
||||
User->>API : 触发查询/查看统计
|
||||
API->>Svc : 获取引用记录/统计
|
||||
Svc->>DB : 查询引用记录
|
||||
DB-->>Svc : 返回记录
|
||||
Svc-->>API : 返回统计结果
|
||||
Sch->>DB : 查询到期的查询
|
||||
DB-->>Sch : 返回查询集合
|
||||
Sch->>Eng : 执行查询
|
||||
Eng->>Plat : 平台查询(keyword)
|
||||
Plat-->>Eng : 返回原始响应
|
||||
Eng->>Eng : 品牌匹配/竞争品牌识别
|
||||
Eng->>DB : 写入引用记录
|
||||
DB-->>Eng : 确认写入
|
||||
Eng-->>Sch : 返回执行结果
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [kimi.py:33-48](file://backend/app/workers/platforms/kimi.py#L33-L48)
|
||||
- [wenxin.py:33-48](file://backend/app/workers/platforms/wenxin.py#L33-L48)
|
||||
- [citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [citation.py:24-73](file://backend/app/services/citation.py#L24-L73)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 品牌匹配器(BrandMatcher)
|
||||
- 精确匹配:直接包含目标品牌或别名即命中,置信度最高
|
||||
- 别名匹配:对别名列表逐一匹配,命中则置信度较高
|
||||
- 模糊匹配:基于候选词集合与编辑相似度阈值进行匹配,置信度由相似度决定
|
||||
- 上下文提取:按段落定位首次出现位置,截取固定长度片段作为引用上下文
|
||||
- 结果字段:是否引用、置信度、匹配类型、段落位置、引用上下文
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["进入匹配"]) --> CheckEmpty{"文本为空?"}
|
||||
CheckEmpty --> |是| ReturnNoMatch["返回未命中"]
|
||||
CheckEmpty --> |否| Exact["精确匹配"]
|
||||
Exact --> ExactHit{"命中?"}
|
||||
ExactHit --> |是| ReturnExact["返回精确匹配结果"]
|
||||
ExactHit --> |否| Alias["别名匹配"]
|
||||
Alias --> AliasHit{"命中?"}
|
||||
AliasHit --> |是| ReturnAlias["返回别名匹配结果"]
|
||||
AliasHit --> |否| Fuzzy["模糊匹配"]
|
||||
Fuzzy --> Extract["提取候选词"]
|
||||
Extract --> Similarity["计算相似度"]
|
||||
Similarity --> Best{"最佳相似度>阈值?"}
|
||||
Best --> |是| ReturnFuzzy["返回模糊匹配结果"]
|
||||
Best --> |否| ReturnNoMatch
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [citation_engine.py:26-120](file://backend/app/workers/citation_engine.py#L26-L120)
|
||||
|
||||
章节来源
|
||||
- [citation_engine.py:19-120](file://backend/app/workers/citation_engine.py#L19-L120)
|
||||
|
||||
### 竞争品牌检测器(CompetitorDetector)
|
||||
- 基于预定义行业分类的品牌集合进行全量扫描
|
||||
- 排除目标品牌,返回去重后的竞争品牌列表
|
||||
- 支持多行业类别扩展,便于后续维护与增长
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["开始检测"]) --> Empty{"文本为空?"}
|
||||
Empty --> |是| ReturnEmpty["返回空列表"]
|
||||
Empty --> |否| LoopCat["遍历行业类别"]
|
||||
LoopCat --> LoopBrand["遍历类别内品牌"]
|
||||
LoopBrand --> Exclude{"是否为目标品牌?"}
|
||||
Exclude --> |是| NextBrand["跳过"]
|
||||
Exclude --> |否| CheckInText{"品牌是否出现在文本?"}
|
||||
CheckInText --> |是| Add["加入候选集"]
|
||||
CheckInText --> |否| NextBrand
|
||||
NextBrand --> LoopBrand
|
||||
LoopBrand --> Done{"遍历结束?"}
|
||||
Done --> |否| LoopCat
|
||||
Done --> |是| Sort["排序并返回"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [citation_engine.py:122-146](file://backend/app/workers/citation_engine.py#L122-L146)
|
||||
|
||||
章节来源
|
||||
- [citation_engine.py:122-146](file://backend/app/workers/citation_engine.py#L122-L146)
|
||||
|
||||
### 引用检测引擎(CitationEngine)
|
||||
- 单平台执行:获取适配器、发起查询、执行匹配与竞争品牌识别
|
||||
- 多平台执行:遍历配置平台,维护任务状态,持久化结果
|
||||
- 时间控制:根据频率计算下次查询时间,避免重复触发
|
||||
- 错误处理:捕获异常并记录失败任务与错误信息
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Q as "Query"
|
||||
participant CE as "CitationEngine"
|
||||
participant AD as "平台适配器"
|
||||
participant BM as "BrandMatcher"
|
||||
participant CD as "CompetitorDetector"
|
||||
participant DB as "数据库"
|
||||
CE->>Q : 读取配置关键词/品牌/别名/平台/频率
|
||||
loop 遍历平台
|
||||
CE->>AD : query(keyword)
|
||||
AD-->>CE : raw_response
|
||||
CE->>BM : match(raw_response)
|
||||
BM-->>CE : 匹配结果
|
||||
CE->>CD : detect(raw_response, target_brand)
|
||||
CD-->>CE : 竞争品牌列表
|
||||
CE->>DB : 写入CitationRecord
|
||||
end
|
||||
CE->>Q : 更新last_queried_at/next_query_at
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [citation_engine.py:236-266](file://backend/app/workers/citation_engine.py#L236-L266)
|
||||
|
||||
章节来源
|
||||
- [citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
|
||||
### 平台适配器(BasePlatformAdapter/Kimi/Wenxin)
|
||||
- 抽象基类定义统一接口:平台名称、URL与查询方法
|
||||
- 具体适配器通过Playwright自动化访问平台页面,输入关键词并等待稳定回复
|
||||
- 提供指数退避重试、超时处理与资源清理
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class BasePlatformAdapter {
|
||||
+string platform_name
|
||||
+string platform_url
|
||||
+query(keyword) str
|
||||
+close()
|
||||
}
|
||||
class KimiAdapter {
|
||||
+query(keyword) str
|
||||
+close()
|
||||
}
|
||||
class WenxinAdapter {
|
||||
+query(keyword) str
|
||||
+close()
|
||||
}
|
||||
BasePlatformAdapter <|-- KimiAdapter
|
||||
BasePlatformAdapter <|-- WenxinAdapter
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
- [kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
|
||||
章节来源
|
||||
- [base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
- [kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
|
||||
### 数据模型与统计服务
|
||||
- 查询模型:存储关键词、目标品牌、别名、平台、频率与时间控制字段
|
||||
- 查询任务模型:跟踪每次执行的状态、错误信息与时间戳
|
||||
- 引用记录模型:保存是否引用、引用位置、引用文本、竞争品牌与原始响应
|
||||
- 统计服务:提供总量、引用率、平均位置、按平台分布与近30天趋势
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
QUERY {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
datetime last_queried_at
|
||||
datetime next_query_at
|
||||
}
|
||||
QUERY_TASK {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
string status
|
||||
text error_message
|
||||
datetime scheduled_at
|
||||
datetime started_at
|
||||
datetime completed_at
|
||||
}
|
||||
CITATION_RECORD {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
boolean cited
|
||||
int citation_position
|
||||
text citation_text
|
||||
json competitor_brands
|
||||
text raw_response
|
||||
datetime queried_at
|
||||
}
|
||||
QUERY ||--o{ QUERY_TASK : "包含"
|
||||
QUERY ||--o{ CITATION_RECORD : "包含"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
||||
章节来源
|
||||
- [query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [citation.py:76-201](file://backend/app/services/citation.py#L76-L201)
|
||||
|
||||
### 定时调度器与API
|
||||
- 定时调度器:每小时检查到期查询并执行
|
||||
- API层:提供引用列表、统计与立即执行接口
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant SCH as "调度器"
|
||||
participant DB as "数据库"
|
||||
participant CE as "引擎"
|
||||
participant API as "API"
|
||||
participant SVC as "服务"
|
||||
SCH->>DB : 查询到期查询
|
||||
DB-->>SCH : 返回查询集合
|
||||
SCH->>CE : 执行查询
|
||||
CE-->>DB : 写入记录
|
||||
API->>SVC : 获取统计/列表
|
||||
SVC->>DB : 查询数据
|
||||
DB-->>SVC : 返回结果
|
||||
SVC-->>API : 返回统计/列表
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [citations.py:25-77](file://backend/app/api/citations.py#L25-L77)
|
||||
- [citation.py:24-73](file://backend/app/services/citation.py#L24-L73)
|
||||
|
||||
章节来源
|
||||
- [scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [citations.py:25-77](file://backend/app/api/citations.py#L25-L77)
|
||||
- [citation.py:24-269](file://backend/app/services/citation.py#L24-L269)
|
||||
|
||||
## 依赖关系分析
|
||||
- 引擎依赖适配器接口,通过平台名称映射具体实现
|
||||
- 引擎依赖数据库模型进行任务与结果持久化
|
||||
- 统计服务依赖查询与记录模型进行聚合
|
||||
- 调度器依赖引擎与数据库进行周期性执行
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
CE["CitationEngine"] --> BM["BrandMatcher"]
|
||||
CE --> CD["CompetitorDetector"]
|
||||
CE --> KP["KimiAdapter"]
|
||||
CE --> WP["WenxinAdapter"]
|
||||
CE --> CR["CitationRecord"]
|
||||
CE --> Q["Query"]
|
||||
CE --> QT["QueryTask"]
|
||||
SCH["QueryScheduler"] --> CE
|
||||
SVC["Citation Services"] --> CR
|
||||
API["API"] --> SVC
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [citation.py:24-269](file://backend/app/services/citation.py#L24-L269)
|
||||
|
||||
章节来源
|
||||
- [citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [citation.py:24-269](file://backend/app/services/citation.py#L24-L269)
|
||||
|
||||
## 性能考虑
|
||||
- 并发与重试:平台适配器采用指数退避重试,降低网络波动影响
|
||||
- 资源管理:适配器在finally中释放浏览器与上下文,避免资源泄漏
|
||||
- 数据库索引:对查询与记录的关键字段建立索引,加速统计与筛选
|
||||
- 异步执行:引擎与调度器均采用异步模式,提高吞吐量
|
||||
- 上下文截取:限定片段长度,减少存储与传输开销
|
||||
|
||||
章节来源
|
||||
- [kimi.py:33-48](file://backend/app/workers/platforms/kimi.py#L33-L48)
|
||||
- [wenxin.py:33-48](file://backend/app/workers/platforms/wenxin.py#L33-L48)
|
||||
- [citation_record.py:37-41](file://backend/app/models/citation_record.py#L37-L41)
|
||||
- [database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
|
||||
## 故障排查指南
|
||||
- 平台适配器初始化失败:检查Playwright浏览器安装与路径配置
|
||||
- 页面元素定位失败:适配器内置多种选择器与超时处理,可关注日志中的选择器尝试顺序
|
||||
- 引擎执行异常:查看任务状态与错误信息,确认查询配置与平台可用性
|
||||
- 统计结果异常:核对查询所有权验证与时间范围过滤条件
|
||||
|
||||
章节来源
|
||||
- [kimi.py:21-32](file://backend/app/workers/platforms/kimi.py#L21-L32)
|
||||
- [wenxin.py:21-32](file://backend/app/workers/platforms/wenxin.py#L21-L32)
|
||||
- [citation_engine.py:211-227](file://backend/app/workers/citation_engine.py#L211-L227)
|
||||
- [citation.py:14-21](file://backend/app/services/citation.py#L14-L21)
|
||||
|
||||
## 结论
|
||||
该引用检测算法以清晰的分层设计实现了从平台查询到品牌匹配与统计分析的完整链路。通过精确、别名与模糊匹配相结合的策略,辅以上下文提取与竞争品牌识别,能够有效支撑品牌监测与竞品分析场景。建议在生产环境中结合业务需求持续优化阈值与品牌库,并完善监控与告警体系。
|
||||
|
||||
## 附录
|
||||
|
||||
### 置信度评分系统
|
||||
- 精确匹配:置信度为最高值
|
||||
- 别名匹配:置信度为较高值
|
||||
- 模糊匹配:置信度由编辑相似度决定,超过阈值方可视为命中
|
||||
- 上下文相关性:通过段落位置与片段长度间接反映相关性
|
||||
|
||||
章节来源
|
||||
- [citation_engine.py:26-100](file://backend/app/workers/citation_engine.py#L26-L100)
|
||||
|
||||
### 引用上下文提取技术
|
||||
- 文本分段:按换行符拆分段落,定位首次出现位置
|
||||
- 片段截取:限定最大长度,保证上下文可读性与性能
|
||||
- 相关性判断:结合段落位置与关键词密度进行粗略评估
|
||||
|
||||
章节来源
|
||||
- [citation_engine.py:107-119](file://backend/app/workers/citation_engine.py#L107-L119)
|
||||
|
||||
### 算法优化策略
|
||||
- 性能提升:异步并发、指数退避重试、资源及时释放
|
||||
- 准确性改进:调整模糊匹配阈值、扩展品牌别名库、引入更细粒度的上下文特征
|
||||
- 可靠性增强:完善错误分类与日志记录、增加健康检查与降级策略
|
||||
|
||||
章节来源
|
||||
- [kimi.py:33-48](file://backend/app/workers/platforms/kimi.py#L33-L48)
|
||||
- [wenxin.py:33-48](file://backend/app/workers/platforms/wenxin.py#L33-L48)
|
||||
- [citation_engine.py:291-300](file://backend/app/workers/citation_engine.py#L291-L300)
|
||||
|
||||
### 算法调优指南
|
||||
- 调整阈值:根据业务反馈微调模糊匹配阈值与置信度边界
|
||||
- 品牌库维护:定期更新行业品牌清单与别名,提升识别覆盖率
|
||||
- 上下文长度:根据下游应用需求调整片段长度,平衡信息量与性能
|
||||
- 平台选择:针对不同关键词特性选择更适合的平台,必要时并行执行取并集
|
||||
|
||||
章节来源
|
||||
- [test_citation_engine.py:6-54](file://tests/test_citation_engine.py#L6-L54)
|
||||
- [citation_engine.py:126-130](file://backend/app/workers/citation_engine.py#L126-L130)
|
||||
|
||||
### 实际应用场景示例
|
||||
- 品牌监测:对目标品牌进行周期性监测,追踪提及次数与趋势
|
||||
- 竞品分析:识别文本中的竞争品牌,辅助市场情报收集
|
||||
- 舆情预警:结合置信度与上下文,筛选高风险或高热度提及
|
||||
|
||||
章节来源
|
||||
- [citation.py:76-201](file://backend/app/services/citation.py#L76-L201)
|
||||
- [citations.py:25-77](file://backend/app/api/citations.py#L25-L77)
|
||||
|
|
@ -0,0 +1,476 @@
|
|||
# 文心平台集成
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
- [base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [search_engine.py](file://backend/app/workers/platforms/search_engine.py)
|
||||
- [citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [config.py](file://backend/app/config.py)
|
||||
- [query.py](file://backend/app/models/query.py)
|
||||
- [queries.py](file://backend/app/api/queries.py)
|
||||
- [Dockerfile](file://backend/Dockerfile)
|
||||
- [kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [tongyi.py](file://backend/app/workers/platforms/tongyi.py)
|
||||
- [test_citations.py](file://tests/test_citations.py)
|
||||
</cite>
|
||||
|
||||
## 更新摘要
|
||||
**变更内容**
|
||||
- 文心平台适配器已简化为搜索引擎模式,移除Playwright浏览器自动化实现
|
||||
- 采用统一的搜索引擎查询机制,通过`fetch_search_content`函数获取内容
|
||||
- 所有平台适配器(wenxin、kimi、tongyi等)均采用相同的简化模式
|
||||
- 移除复杂的页面交互策略和稳定性检测逻辑
|
||||
- 保留重试机制和错误处理策略
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖关系分析](#依赖关系分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件面向"文心平台集成"的技术与非技术读者,系统性说明文心一言平台适配器的实现方式,涵盖以下方面:
|
||||
- 文心适配器的职责与实现模式(基于搜索引擎查询)
|
||||
- 请求参数构建(关键词处理、搜索引擎查询策略)
|
||||
- HTTP请求与搜索引擎交互流程(DuckDuckGo、Wikipedia API)
|
||||
- 响应解析与内容提取(HTML解析、文本清理)
|
||||
- 配置管理(环境变量、超时与重试策略)
|
||||
- 错误处理与异常恢复
|
||||
- 安全注意事项与最佳实践
|
||||
|
||||
## 项目结构
|
||||
文心平台集成位于后端工作流模块中,采用"适配器 + 引擎"的分层设计:
|
||||
- 适配器层:负责具体平台的搜索引擎查询交互
|
||||
- 引擎层:编排多平台查询、品牌匹配与统计
|
||||
- 搜索引擎层:提供统一的搜索内容获取能力
|
||||
- 配置层:集中管理运行时参数(如API密钥占位、超时设置)
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "适配器层"
|
||||
Base["BasePlatformAdapter<br/>抽象接口"]
|
||||
Wenxin["WenxinAdapter<br/>文心一言适配器"]
|
||||
Kimi["KimiAdapter<br/>Kimi适配器"]
|
||||
Tongyi["TongyiAdapter<br/>通义千问适配器"]
|
||||
End
|
||||
subgraph "搜索引擎层"
|
||||
Search["SearchEngine<br/>搜索引擎模块"]
|
||||
DDG["DuckDuckGo<br/>HTML搜索"]
|
||||
Wiki["Wikipedia API<br/>百科查询"]
|
||||
End
|
||||
subgraph "引擎层"
|
||||
Engine["CitationEngine<br/>引用检测引擎"]
|
||||
End
|
||||
subgraph "配置层"
|
||||
Cfg["Settings<br/>环境配置"]
|
||||
Docker["Dockerfile<br/>容器化配置"]
|
||||
End
|
||||
subgraph "数据模型"
|
||||
QModel["Query<br/>查询模型"]
|
||||
End
|
||||
Base --> Wenxin
|
||||
Base --> Kimi
|
||||
Base --> Tongyi
|
||||
Wenxin --> Search
|
||||
Kimi --> Search
|
||||
Tongyi --> Search
|
||||
Search --> DDG
|
||||
Search --> Wiki
|
||||
Engine --> Wenxin
|
||||
Engine --> QModel
|
||||
Wenxin --> Cfg
|
||||
Docker --> Wenxin
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [wenxin.py:10-38](file://backend/app/workers/platforms/wenxin.py#L10-L38)
|
||||
- [kimi.py:10-38](file://backend/app/workers/platforms/kimi.py#L10-L38)
|
||||
- [tongyi.py:10-38](file://backend/app/workers/platforms/tongyi.py#L10-L38)
|
||||
- [search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)
|
||||
- [citation_engine.py:161-176](file://backend/app/workers/citation_engine.py#L161-L176)
|
||||
- [config.py:9-23](file://backend/app/config.py#L9-L23)
|
||||
- [Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
|
||||
**章节来源**
|
||||
- [base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [wenxin.py:10-38](file://backend/app/workers/platforms/wenxin.py#L10-L38)
|
||||
- [search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)
|
||||
- [citation_engine.py:161-176](file://backend/app/workers/citation_engine.py#L161-L176)
|
||||
- [config.py:9-23](file://backend/app/config.py#L9-L23)
|
||||
- [Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
|
||||
## 核心组件
|
||||
- 文心适配器(WenxinAdapter):实现文心一言平台的搜索引擎查询能力,负责关键词拼接与搜索内容获取
|
||||
- 搜索引擎模块(SearchEngine):提供统一的搜索内容获取能力,支持DuckDuckGo和Wikipedia API
|
||||
- 引擎(CitationEngine):编排查询任务,调用适配器获取原始响应,进行品牌匹配与竞争品牌检测,并持久化结果
|
||||
- 基类(BasePlatformAdapter):定义统一的平台适配器接口,约束平台名称、URL与查询方法
|
||||
- 配置(Settings):集中管理数据库、Redis、JWT、Playwright浏览器路径以及API密钥占位等配置项
|
||||
- 数据模型(Query):承载查询任务的元数据,包括关键词、目标品牌、平台集合、频率与状态等
|
||||
|
||||
**章节来源**
|
||||
- [wenxin.py:10-38](file://backend/app/workers/platforms/wenxin.py#L10-L38)
|
||||
- [search_engine.py:16-77](file://backend/app/workers/platforms/search_engine.py#L16-L77)
|
||||
- [citation_engine.py:161-176](file://backend/app/workers/citation_engine.py#L161-L176)
|
||||
- [base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
- [config.py:9-23](file://backend/app/config.py#L9-L23)
|
||||
- [query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
|
||||
## 架构总览
|
||||
文心平台集成采用"搜索引擎查询 + 引擎编排"的架构:
|
||||
- 引擎接收查询请求,按平台顺序调用适配器
|
||||
- 适配器通过搜索引擎模块获取与关键词相关的内容
|
||||
- 适配器进行重试与错误处理,返回原始文本
|
||||
- 引擎进行品牌匹配与统计,写入数据库
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant Engine as "CitationEngine"
|
||||
participant Adapter as "WenxinAdapter"
|
||||
participant Search as "SearchEngine"
|
||||
participant DDG as "DuckDuckGo"
|
||||
participant Wiki as "Wikipedia API"
|
||||
Client->>Engine : "发起查询请求"
|
||||
Engine->>Adapter : "query(keyword)"
|
||||
Adapter->>Search : "fetch_search_content(keyword)"
|
||||
Search->>DDG : "搜索关键词"
|
||||
DDG-->>Search : "返回HTML结果"
|
||||
Search->>Wiki : "回退到百科查询"
|
||||
Wiki-->>Search : "返回百科内容"
|
||||
Search-->>Adapter : "返回搜索内容"
|
||||
Adapter-->>Engine : "返回原始响应文本"
|
||||
Engine-->>Client : "返回引用检测结果"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [citation_engine.py:256-287](file://backend/app/workers/citation_engine.py#L256-L287)
|
||||
- [wenxin.py:16-33](file://backend/app/workers/platforms/wenxin.py#L16-L33)
|
||||
- [search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)
|
||||
|
||||
**章节来源**
|
||||
- [citation_engine.py:161-176](file://backend/app/workers/citation_engine.py#L161-L176)
|
||||
- [wenxin.py:10-38](file://backend/app/workers/platforms/wenxin.py#L10-L38)
|
||||
- [search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 文心适配器(WenxinAdapter)
|
||||
- **角色与职责**
|
||||
- 实现BasePlatformAdapter接口,提供文心一言平台的搜索引擎查询能力
|
||||
- 使用统一的搜索引擎模块获取与关键词相关的内容
|
||||
- 提供重试与指数退避策略,提升查询稳定性
|
||||
- 直接返回搜索引擎返回的原始文本内容
|
||||
|
||||
- **关键实现要点**
|
||||
- 简化的查询流程:直接调用搜索引擎模块,无需复杂的页面交互
|
||||
- 重试机制:最多3次尝试,指数退避(2^attempt秒)
|
||||
- 错误处理:捕获异常并记录日志,最终抛出异常
|
||||
- 资源管理:搜索引擎模式无需额外资源清理
|
||||
|
||||
- **请求参数构建**
|
||||
- 关键词处理:直接传递给搜索引擎模块,无需特殊处理
|
||||
- 搜索策略:搜索引擎模块会自动处理关键词拼接和查询优化
|
||||
- 超时设置:搜索引擎查询超时30秒,确保不会长时间阻塞
|
||||
- 重试配置:最多3次尝试,指数退避策略
|
||||
|
||||
- **响应解析逻辑**
|
||||
- 内容获取:直接返回搜索引擎返回的文本内容
|
||||
- 文本清理:由搜索引擎模块负责HTML解析和文本清理
|
||||
- 错误处理:搜索引擎模块会处理各种异常情况并提供回退机制
|
||||
|
||||
- **安全与合规**
|
||||
- 该实现为搜索引擎查询,不涉及明文API密钥传递
|
||||
- 使用公开的搜索引擎API,无需认证
|
||||
- 遵循搜索引擎的使用条款和限制
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class BasePlatformAdapter {
|
||||
+string platform_name
|
||||
+string platform_url
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class WenxinAdapter {
|
||||
+query(keyword) str
|
||||
+_do_query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class SearchEngine {
|
||||
+fetch_search_content(platform_name, keyword) str
|
||||
+search_duckduckgo(query) str
|
||||
+search_wikipedia(keyword) str
|
||||
}
|
||||
BasePlatformAdapter <|-- WenxinAdapter
|
||||
WenxinAdapter --> SearchEngine
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
- [wenxin.py:10-38](file://backend/app/workers/platforms/wenxin.py#L10-L38)
|
||||
- [search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)
|
||||
|
||||
**章节来源**
|
||||
- [wenxin.py:10-38](file://backend/app/workers/platforms/wenxin.py#L10-L38)
|
||||
|
||||
### 搜索引擎模块(SearchEngine)
|
||||
- **角色与职责**
|
||||
- 提供统一的搜索内容获取能力
|
||||
- 支持DuckDuckGo HTML搜索和Wikipedia API查询
|
||||
- 实现智能回退机制,确保查询成功率
|
||||
|
||||
- **关键实现要点**
|
||||
- DuckDuckGo搜索:使用HTML解析获取搜索结果摘要
|
||||
- Wikipedia回退:当DuckDuckGo受限时自动切换到Wikipedia API
|
||||
- HTML解析:提取标题和摘要信息,去除HTML标签
|
||||
- 文本清理:移除引用标记和多余空白字符
|
||||
|
||||
- **搜索策略**
|
||||
- 主要策略:DuckDuckGo HTML搜索,无需API密钥
|
||||
- 回退策略:Wikipedia API查询,公开API无需认证
|
||||
- 结果合并:将多个搜索结果合并为统一格式
|
||||
|
||||
- **错误处理**
|
||||
- 搜索失败:记录警告并尝试回退策略
|
||||
- 解析失败:检查返回内容的有效性
|
||||
- 所有策略失败:抛出运行时错误
|
||||
|
||||
**章节来源**
|
||||
- [search_engine.py:16-77](file://backend/app/workers/platforms/search_engine.py#L16-L77)
|
||||
- [search_engine.py:79-145](file://backend/app/workers/platforms/search_engine.py#L79-L145)
|
||||
- [search_engine.py:147-174](file://backend/app/workers/platforms/search_engine.py#L147-L174)
|
||||
|
||||
### 引擎(CitationEngine)
|
||||
- **角色与职责**
|
||||
- 编排查询任务,按平台顺序执行
|
||||
- 调用适配器获取原始响应,进行品牌匹配与竞争品牌检测
|
||||
- 将结果持久化为引用记录,并更新任务状态与查询时间
|
||||
|
||||
- **关键实现要点**
|
||||
- 平台注册:内置文心、Kimi、通义等适配器,均可扩展更多平台
|
||||
- 任务状态管理:运行中、成功、失败三种状态,失败时记录错误信息
|
||||
- 结果聚合:返回引用状态、置信度、匹配类型、位置、上下文、竞争品牌与原始响应
|
||||
- 关键词增强:为搜索引擎查询自动添加目标品牌关键词
|
||||
|
||||
- **错误处理**
|
||||
- 适配器异常会被捕获并记录,同时生成一条cited=False的占位记录
|
||||
- 任务状态与完成时间被正确更新,保证数据一致性
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["开始执行查询"]) --> InitMatcher["初始化品牌匹配器"]
|
||||
InitMatcher --> IteratePlatforms["遍历平台列表"]
|
||||
IteratePlatforms --> CreateTask["获取或创建任务记录"]
|
||||
CreateTask --> UpdateTaskRunning["更新任务状态为 running"]
|
||||
UpdateTaskRunning --> EnhanceKeyword["增强关键词添加目标品牌"]
|
||||
EnhanceKeyword --> CallAdapter["调用适配器执行查询"]
|
||||
CallAdapter --> ParseResult["品牌匹配与竞争品牌检测"]
|
||||
ParseResult --> CreateRecord["创建引用记录"]
|
||||
CreateRecord --> UpdateTaskSuccess["更新任务状态为 success"]
|
||||
UpdateTaskSuccess --> NextPlatform{"还有平台吗?"}
|
||||
NextPlatform --> |是| IteratePlatforms
|
||||
NextPlatform --> |否| UpdateQueryTime["更新查询时间字段"]
|
||||
UpdateQueryTime --> End(["结束"])
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [citation_engine.py:177-254](file://backend/app/workers/citation_engine.py#L177-L254)
|
||||
|
||||
**章节来源**
|
||||
- [citation_engine.py:161-176](file://backend/app/workers/citation_engine.py#L161-L176)
|
||||
- [citation_engine.py:177-254](file://backend/app/workers/citation_engine.py#L177-L254)
|
||||
|
||||
### 配置管理(Settings)
|
||||
- **配置项说明**
|
||||
- 数据库连接:DATABASE_URL
|
||||
- Redis连接:REDIS_URL
|
||||
- JWT密钥与过期:JWT_SECRET、JWT_EXPIRE_HOURS
|
||||
- Playwright浏览器路径:PLAYWRIGHT_BROWSERS_PATH(仍保留用于其他适配器)
|
||||
- API密钥占位:ZHIPU_API_KEY、TONGYI_API_KEY(当前未用于搜索引擎适配器)
|
||||
|
||||
- **环境变量加载**
|
||||
- 通过Pydantic Settings自动从.env文件加载,忽略未知字段
|
||||
|
||||
- **容器化与依赖**
|
||||
- Dockerfile中安装系统依赖,但Playwright浏览器仅用于其他适配器
|
||||
- 确保搜索引擎查询功能正常运行
|
||||
|
||||
**章节来源**
|
||||
- [config.py:9-23](file://backend/app/config.py#L9-L23)
|
||||
- [Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
|
||||
### 数据模型(Query)
|
||||
- **字段说明**
|
||||
- 关键词、目标品牌、别名、平台集合、频率、状态、时间戳等
|
||||
- 默认平台集合包含文心与Kimi,便于快速启用
|
||||
|
||||
- **业务意义**
|
||||
- 作为查询任务的载体,驱动引擎执行跨平台检索与分析
|
||||
|
||||
**章节来源**
|
||||
- [query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
|
||||
### API入口(queries.py)
|
||||
- **功能概述**
|
||||
- 提供查询任务的增删改查接口,供前端调用
|
||||
- 与服务层协作,完成权限校验与数据持久化
|
||||
|
||||
- **与引擎的关系**
|
||||
- 引擎在后台异步执行查询,API负责暴露任务管理能力
|
||||
|
||||
**章节来源**
|
||||
- [queries.py:1-109](file://backend/app/api/queries.py#L1-L109)
|
||||
|
||||
## 依赖关系分析
|
||||
- **组件耦合**
|
||||
- CitationEngine依赖各平台适配器,形成平台无关的编排层
|
||||
- 所有搜索引擎适配器依赖统一的SearchEngine模块
|
||||
- SearchEngine依赖外部服务(DuckDuckGo、Wikipedia)
|
||||
- 引擎与模型解耦,通过ORM进行数据持久化
|
||||
|
||||
- **外部依赖**
|
||||
- DuckDuckGo:免费HTML搜索服务
|
||||
- Wikipedia API:公开百科查询服务
|
||||
- Docker:容器化部署,包含必要的系统依赖
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
Engine["CitationEngine"] --> Wenxin["WenxinAdapter"]
|
||||
Engine --> Kimi["KimiAdapter"]
|
||||
Engine --> Tongyi["TongyiAdapter"]
|
||||
Wenxin --> Search["SearchEngine"]
|
||||
Kimi --> Search
|
||||
Tongyi --> Search
|
||||
Search --> DDG["DuckDuckGo"]
|
||||
Search --> Wiki["Wikipedia API"]
|
||||
Engine --> ORM["SQLAlchemy ORM"]
|
||||
Engine --> Model["Query模型"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [citation_engine.py:161-176](file://backend/app/workers/citation_engine.py#L161-L176)
|
||||
- [wenxin.py:10-38](file://backend/app/workers/platforms/wenxin.py#L10-L38)
|
||||
- [kimi.py:10-38](file://backend/app/workers/platforms/kimi.py#L10-L38)
|
||||
- [tongyi.py:10-38](file://backend/app/workers/platforms/tongyi.py#L10-L38)
|
||||
- [search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)
|
||||
|
||||
**章节来源**
|
||||
- [citation_engine.py:161-176](file://backend/app/workers/citation_engine.py#L161-L176)
|
||||
- [wenxin.py:10-38](file://backend/app/workers/platforms/wenxin.py#L10-L38)
|
||||
- [search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)
|
||||
|
||||
## 性能考虑
|
||||
- **查询效率**
|
||||
- 搜索引擎查询比浏览器自动化更快,响应时间更短
|
||||
- 减少内存和CPU消耗,提高并发处理能力
|
||||
- 无需维护浏览器实例,降低资源开销
|
||||
|
||||
- **稳定性优化**
|
||||
- 通过搜索引擎模块的回退机制,提高查询成功率
|
||||
- 统一的错误处理和重试策略
|
||||
- 外部服务的超时控制,防止长时间阻塞
|
||||
|
||||
- **重试策略**
|
||||
- 最多重试3次,指数退避降低瞬时压力
|
||||
- 搜索引擎查询超时30秒,确保及时响应
|
||||
|
||||
- **缓存与优化**
|
||||
- 搜索结果可利用外部服务的缓存机制
|
||||
- 减少重复查询,提高整体性能
|
||||
|
||||
**章节来源**
|
||||
- [wenxin.py:16-29](file://backend/app/workers/platforms/wenxin.py#L16-L29)
|
||||
- [search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)
|
||||
|
||||
## 故障排查指南
|
||||
- **搜索引擎查询失败**
|
||||
- 现象:DuckDuckGo搜索受限或返回空结果
|
||||
- 处理:自动回退到Wikipedia API;检查网络连通性
|
||||
- 参考:[search_engine.py:139-144](file://backend/app/workers/platforms/search_engine.py#L139-L144)
|
||||
|
||||
- **Wikipedia API查询失败**
|
||||
- 现象:百科查询不可用或返回空内容
|
||||
- 处理:检查Wikipedia API可用性;确认关键词有效性
|
||||
- 参考:[search_engine.py:28-76](file://backend/app/workers/platforms/search_engine.py#L28-L76)
|
||||
|
||||
- **HTML解析失败**
|
||||
- 现象:DuckDuckGo返回的HTML结构发生变化
|
||||
- 处理:更新HTML解析正则表达式;增加容错分支
|
||||
- 参考:[search_engine.py:105-137](file://backend/app/workers/platforms/search_engine.py#L105-L137)
|
||||
|
||||
- **适配器查询异常**
|
||||
- 现象:搜索引擎查询抛出异常
|
||||
- 处理:检查重试机制;查看日志定位根因
|
||||
- 参考:[wenxin.py:16-29](file://backend/app/workers/platforms/wenxin.py#L16-L29)
|
||||
|
||||
- **引擎执行失败**
|
||||
- 现象:适配器异常导致任务失败
|
||||
- 处理:记录错误信息并生成占位记录;检查日志定位根因
|
||||
- 参考:[citation_engine.py:231-247](file://backend/app/workers/citation_engine.py#L231-L247)
|
||||
|
||||
- **单元测试验证**
|
||||
- 测试覆盖平台返回值与统计数据,确保集成链路正常
|
||||
- 参考:[test_citations.py:23-93](file://tests/test_citations.py#L23-L93)
|
||||
|
||||
**章节来源**
|
||||
- [search_engine.py:139-144](file://backend/app/workers/platforms/search_engine.py#L139-L144)
|
||||
- [search_engine.py:28-76](file://backend/app/workers/platforms/search_engine.py#L28-L76)
|
||||
- [search_engine.py:105-137](file://backend/app/workers/platforms/search_engine.py#L105-L137)
|
||||
- [wenxin.py:16-29](file://backend/app/workers/platforms/wenxin.py#L16-L29)
|
||||
- [citation_engine.py:231-247](file://backend/app/workers/citation_engine.py#L231-L247)
|
||||
- [test_citations.py:23-93](file://tests/test_citations.py#L23-L93)
|
||||
|
||||
## 结论
|
||||
文心平台集成通过"搜索引擎查询 + 引擎编排"的方式,实现了对文心一言的高效查询与内容获取。其特点包括:
|
||||
- 简化的适配器实现,移除复杂的浏览器自动化逻辑
|
||||
- 统一的搜索引擎查询机制,提高查询成功率和稳定性
|
||||
- 完善的重试与回退策略,确保在各种情况下都能获取内容
|
||||
- 清晰的错误处理与任务状态管理
|
||||
- 与容器化部署的无缝衔接
|
||||
|
||||
在实际使用中,建议结合业务需求对搜索引擎查询参数进行调优,并持续关注搜索引擎API的变化以保持功能的稳定性。
|
||||
|
||||
## 附录
|
||||
|
||||
### API调用示例与错误处理方案
|
||||
- **示例场景**
|
||||
- 前端通过查询API创建任务,后台引擎按平台顺序执行
|
||||
- 引擎调用文心适配器获取搜索内容,进行品牌匹配与统计
|
||||
- 若搜索引擎查询异常,引擎记录失败并生成占位记录
|
||||
|
||||
- **错误处理方案**
|
||||
- 搜索引擎查询异常:自动回退到Wikipedia API;记录错误并重试
|
||||
- HTML解析失败:更新解析规则并增加容错分支
|
||||
- 外部服务不可用:使用缓存内容或回退策略
|
||||
|
||||
**章节来源**
|
||||
- [queries.py:90-109](file://backend/app/api/queries.py#L90-L109)
|
||||
- [citation_engine.py:177-254](file://backend/app/workers/citation_engine.py#L177-L254)
|
||||
- [wenxin.py:16-33](file://backend/app/workers/platforms/wenxin.py#L16-L33)
|
||||
|
||||
### 安全注意事项与最佳实践
|
||||
- **安全注意事项**
|
||||
- 该实现使用公开的搜索引擎API,无需API密钥
|
||||
- 遵循搜索引擎的使用条款和限制
|
||||
- 日志中避免输出敏感信息(如用户输入、错误堆栈)
|
||||
|
||||
- **最佳实践**
|
||||
- 使用统一的搜索引擎模块,确保查询策略的一致性
|
||||
- 设置合理的超时和重试策略,平衡稳定性与性能
|
||||
- 监控搜索引擎API的可用性和性能指标
|
||||
- 定期更新HTML解析规则以适应搜索引擎页面结构变化
|
||||
- 使用单元测试覆盖关键流程,保障回归质量
|
||||
|
||||
**章节来源**
|
||||
- [search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)
|
||||
- [test_citations.py:23-93](file://tests/test_citations.py#L23-L93)
|
||||
|
|
@ -0,0 +1,510 @@
|
|||
# 适配器架构设计
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
- [citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [queries.py](file://backend/app/api/queries.py)
|
||||
- [query.py](file://backend/app/models/query.py)
|
||||
- [main.py](file://backend/app/main.py)
|
||||
- [platforms.ts](file://frontend/lib/platforms.ts)
|
||||
- [platform-chart.tsx](file://frontend/components/charts/platform-chart.tsx)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构概览](#架构概览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖关系分析](#依赖关系分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排除指南](#故障排除指南)
|
||||
9. [结论](#结论)
|
||||
|
||||
## 简介
|
||||
|
||||
本项目采用适配器模式设计了一个可扩展的AI平台查询系统。该架构通过统一的接口抽象,实现了对不同AI平台(如Kimi、文心一言等)的无缝集成和切换。适配器模式在此场景中的应用价值体现在:
|
||||
|
||||
- **统一接口**:为不同的AI平台提供一致的查询接口
|
||||
- **可扩展性**:轻松添加新的AI平台适配器
|
||||
- **资源管理**:统一的资源清理和生命周期管理
|
||||
- **错误处理**:标准化的异常处理和重试机制
|
||||
|
||||
## 项目结构
|
||||
|
||||
该项目采用前后端分离的架构设计,后端使用Python FastAPI框架,前端使用Next.js React框架。适配器架构主要集中在后端的workers目录中。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "前端层"
|
||||
FE[前端应用<br/>Next.js React]
|
||||
PC[平台图表组件<br/>platform-chart.tsx]
|
||||
PL[平台映射<br/>platforms.ts]
|
||||
end
|
||||
subgraph "后端层"
|
||||
API[FastAPI API]
|
||||
CE[CitationEngine<br/>引用检测引擎]
|
||||
SCH[Scheduler<br/>调度器]
|
||||
subgraph "适配器层"
|
||||
BA[BasePlatformAdapter<br/>抽象基类]
|
||||
KA[KimiAdapter<br/>Kimi适配器]
|
||||
WA[WenxinAdapter<br/>文心一言适配器]
|
||||
end
|
||||
end
|
||||
subgraph "数据层"
|
||||
DB[(PostgreSQL数据库)]
|
||||
QM[Query模型]
|
||||
CRM[CitationRecord模型]
|
||||
end
|
||||
FE --> API
|
||||
API --> CE
|
||||
CE --> SCH
|
||||
CE --> BA
|
||||
BA --> KA
|
||||
BA --> WA
|
||||
CE --> DB
|
||||
QM --> DB
|
||||
CRM --> DB
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [citation_engine.py:148-157](file://backend/app/workers/citation_engine.py#L148-L157)
|
||||
- [base.py:4-17](file://backend/app/workers/platforms/base.py#L4-L17)
|
||||
|
||||
**章节来源**
|
||||
- [main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [platforms.ts:1-18](file://frontend/lib/platforms.ts#L1-L18)
|
||||
|
||||
## 核心组件
|
||||
|
||||
### BasePlatformAdapter 抽象基类
|
||||
|
||||
BasePlatformAdapter是整个适配器架构的核心抽象类,定义了所有AI平台适配器必须实现的标准接口。
|
||||
|
||||
#### 设计理念
|
||||
|
||||
该类采用了Python的ABC(Abstract Base Classes)机制,确保子类必须实现指定的抽象方法。设计理念包括:
|
||||
|
||||
- **最小接口原则**:只定义必要的抽象方法,避免过度设计
|
||||
- **职责单一原则**:专注于平台适配功能,不包含业务逻辑
|
||||
- **扩展性优先**:为未来添加新平台预留空间
|
||||
|
||||
#### 核心属性
|
||||
|
||||
| 属性名称 | 类型 | 描述 | 示例值 |
|
||||
|---------|------|------|--------|
|
||||
| platform_name | str | 平台标识符,用于内部识别 | "kimi", "wenxin" |
|
||||
| platform_url | str | 平台官方网站地址 | "https://kimi.moonshot.cn" |
|
||||
|
||||
#### 抽象方法
|
||||
|
||||
**query(keyword: str) -> str**
|
||||
- **设计目的**:在AI平台上查询关键词并返回原始响应文本
|
||||
- **参数规范**:
|
||||
- keyword: str - 要查询的关键词
|
||||
- 返回值: str - AI平台的原始响应文本
|
||||
- **实现要求**:必须异步实现,支持重试机制
|
||||
|
||||
**close()**
|
||||
- **设计目的**:清理适配器使用的系统资源
|
||||
- **实现要求**:异步方法,确保资源正确释放
|
||||
|
||||
**章节来源**
|
||||
- [base.py:4-17](file://backend/app/workers/platforms/base.py#L4-L17)
|
||||
|
||||
### 具体适配器实现
|
||||
|
||||
#### KimiAdapter
|
||||
|
||||
KimiAdapter是针对Moonshot AI平台的适配器实现,具有以下特点:
|
||||
|
||||
- **浏览器自动化**:使用Playwright进行页面交互
|
||||
- **智能重试**:指数退避重试机制
|
||||
- **稳定性保障**:超时处理和异常恢复
|
||||
|
||||
#### WenxinAdapter
|
||||
|
||||
WenxinAdapter是针对百度文心一言平台的适配器实现:
|
||||
|
||||
- **相似架构**:与KimiAdapter类似的实现模式
|
||||
- **平台特定**:针对文心一言的界面结构优化
|
||||
- **一致性保证**:保持与BasePlatformAdapter的接口兼容
|
||||
|
||||
**章节来源**
|
||||
- [kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
|
||||
## 架构概览
|
||||
|
||||
整个系统采用分层架构设计,适配器模式位于中间层,向上提供统一接口,向下封装具体实现细节。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as 客户端
|
||||
participant API as FastAPI API
|
||||
participant Engine as CitationEngine
|
||||
participant Adapter as BasePlatformAdapter
|
||||
participant Platform as AI平台
|
||||
Client->>API : 发起查询请求
|
||||
API->>Engine : execute_query()
|
||||
Engine->>Engine : 创建BrandMatcher
|
||||
Engine->>Adapter : execute_single_platform()
|
||||
Adapter->>Adapter : query(keyword)
|
||||
Adapter->>Platform : 执行平台查询
|
||||
Platform-->>Adapter : 返回响应文本
|
||||
Adapter-->>Engine : 返回原始响应
|
||||
Engine->>Engine : 品牌匹配和竞争检测
|
||||
Engine-->>API : 返回检测结果
|
||||
API-->>Client : 返回查询结果
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [base.py:10-17](file://backend/app/workers/platforms/base.py#L10-L17)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### CitationEngine 引用检测引擎
|
||||
|
||||
CitationEngine是系统的中央协调器,负责管理多个平台适配器并执行完整的查询流程。
|
||||
|
||||
#### 核心功能
|
||||
|
||||
1. **平台管理**:维护平台适配器字典
|
||||
2. **查询执行**:协调单个平台的查询过程
|
||||
3. **结果处理**:整合品牌匹配和竞争检测结果
|
||||
4. **任务跟踪**:管理查询任务的状态和历史
|
||||
|
||||
#### 关键流程
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start([开始查询]) --> InitMatcher["初始化BrandMatcher"]
|
||||
InitMatcher --> GetPlatforms["获取平台列表"]
|
||||
GetPlatforms --> LoopPlatforms{"遍历平台"}
|
||||
LoopPlatforms --> CreateTask["创建或获取QueryTask"]
|
||||
CreateTask --> UpdateStatus["更新任务状态为running"]
|
||||
UpdateStatus --> ExecutePlatform["执行单个平台查询"]
|
||||
ExecutePlatform --> BrandMatch["品牌匹配检测"]
|
||||
BrandMatch --> CompetitorDetect["竞争品牌检测"]
|
||||
CompetitorDetect --> CreateRecord["创建CitationRecord"]
|
||||
CreateRecord --> UpdateTaskSuccess["更新任务状态为success"]
|
||||
UpdateTaskSuccess --> NextPlatform{"还有平台吗?"}
|
||||
NextPlatform --> |是| LoopPlatforms
|
||||
NextPlatform --> |否| UpdateQueryTime["更新查询时间字段"]
|
||||
UpdateQueryTime --> End([结束])
|
||||
ExecutePlatform --> |异常| CreateFailedRecord["创建失败记录"]
|
||||
CreateFailedRecord --> UpdateTaskFailed["更新任务状态为failed"]
|
||||
UpdateTaskFailed --> NextPlatform
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [citation_engine.py:236-266](file://backend/app/workers/citation_engine.py#L236-L266)
|
||||
|
||||
#### 数据模型
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
QUERY {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
datetime last_queried_at
|
||||
datetime next_query_at
|
||||
datetime created_at
|
||||
datetime updated_at
|
||||
}
|
||||
CITATION_RECORD {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
boolean cited
|
||||
integer citation_position
|
||||
text citation_text
|
||||
jsonb competitor_brands
|
||||
text raw_response
|
||||
datetime created_at
|
||||
datetime updated_at
|
||||
}
|
||||
QUERY_TASK {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
string status
|
||||
datetime started_at
|
||||
datetime completed_at
|
||||
text error_message
|
||||
datetime created_at
|
||||
datetime updated_at
|
||||
}
|
||||
USER ||--o{ QUERY : creates
|
||||
QUERY ||--o{ CITATION_RECORD : generates
|
||||
QUERY ||--o{ QUERY_TASK : tracks
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [citation_engine.py:19-120](file://backend/app/workers/citation_engine.py#L19-L120)
|
||||
|
||||
**章节来源**
|
||||
- [citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
|
||||
### Scheduler 调度器
|
||||
|
||||
QueryScheduler负责定时执行查询任务,确保系统能够自动运行。
|
||||
|
||||
#### 核心特性
|
||||
|
||||
- **定时执行**:每小时检查一次到期的查询任务
|
||||
- **异步处理**:使用AsyncIOScheduler支持异步操作
|
||||
- **错误隔离**:单个查询失败不影响其他任务
|
||||
- **优雅关闭**:提供资源清理机制
|
||||
|
||||
#### 生命周期管理
|
||||
|
||||
```mermaid
|
||||
stateDiagram-v2
|
||||
[*] --> 初始化
|
||||
初始化 --> 启动 : start()
|
||||
启动 --> 运行中 : 添加定时任务
|
||||
运行中 --> 执行检查 : 每小时触发
|
||||
执行检查 --> 处理查询 : 发现到期任务
|
||||
处理查询 --> 运行中 : 继续监控
|
||||
处理查询 --> 错误 : 异常处理
|
||||
错误 --> 运行中 : 记录日志继续
|
||||
运行中 --> 关闭 : shutdown()
|
||||
关闭 --> [*]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
|
||||
**章节来源**
|
||||
- [scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
|
||||
### 前端集成
|
||||
|
||||
前端系统提供了完整的用户界面,支持平台选择和结果展示。
|
||||
|
||||
#### 平台映射
|
||||
|
||||
前端使用PLATFORM_MAP和PLATFORMS常量来管理平台信息:
|
||||
|
||||
| 平台键 | 中文名称 | 用途 |
|
||||
|--------|----------|------|
|
||||
| wenxin | 文心一言 | 百度AI平台 |
|
||||
| kimi | Kimi | Moonshot AI平台 |
|
||||
| tongyi | 通义千问 | 阿里云AI平台 |
|
||||
| baidu_ai | 百度AI搜索 | 百度搜索服务 |
|
||||
| yuanbao | 腾讯元宝 | 腾讯AI平台 |
|
||||
| qingyan | 智谱清言 | 智谱AI平台 |
|
||||
|
||||
#### 图表可视化
|
||||
|
||||
平台图表组件使用Recharts库展示各平台的引用率统计:
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
subgraph "数据处理"
|
||||
RAW[原始统计数据]
|
||||
TRANSFORM[数据转换]
|
||||
end
|
||||
subgraph "可视化组件"
|
||||
CHART[柱状图]
|
||||
XAXIS[X轴: 平台名称]
|
||||
YAXIS[Y轴: 引用率百分比]
|
||||
TOOLTIP[工具提示]
|
||||
end
|
||||
RAW --> TRANSFORM
|
||||
TRANSFORM --> CHART
|
||||
CHART --> XAXIS
|
||||
CHART --> YAXIS
|
||||
CHART --> TOOLTIP
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [platform-chart.tsx:34-68](file://frontend/components/charts/platform-chart.tsx#L34-L68)
|
||||
- [platforms.ts:1-18](file://frontend/lib/platforms.ts#L1-L18)
|
||||
|
||||
**章节来源**
|
||||
- [platforms.ts:1-18](file://frontend/lib/platforms.ts#L1-L18)
|
||||
- [platform-chart.tsx:1-68](file://frontend/components/charts/platform-chart.tsx#L1-L68)
|
||||
|
||||
## 依赖关系分析
|
||||
|
||||
### 组件依赖图
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "外部依赖"
|
||||
PW[Playwright]
|
||||
APS[APScheduler]
|
||||
SQLA[SQLAlchemy]
|
||||
FAST[FastAPI]
|
||||
end
|
||||
subgraph "核心模块"
|
||||
BASE[BasePlatformAdapter]
|
||||
KIMI[KimiAdapter]
|
||||
WENXIN[WenxinAdapter]
|
||||
CE[CitationEngine]
|
||||
SCH[QueryScheduler]
|
||||
API[FastAPI API]
|
||||
end
|
||||
subgraph "数据模型"
|
||||
QUERY[Query模型]
|
||||
CITE[CitationRecord模型]
|
||||
TASK[QueryTask模型]
|
||||
end
|
||||
PW --> KIMI
|
||||
PW --> WENXIN
|
||||
APS --> SCH
|
||||
SQLA --> CE
|
||||
FAST --> API
|
||||
BASE --> KIMI
|
||||
BASE --> WENXIN
|
||||
CE --> BASE
|
||||
API --> CE
|
||||
CE --> QUERY
|
||||
CE --> CITE
|
||||
CE --> TASK
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [kimi.py:4-6](file://backend/app/workers/platforms/kimi.py#L4-L6)
|
||||
- [wenxin.py:4-6](file://backend/app/workers/platforms/wenxin.py#L4-L6)
|
||||
- [scheduler.py:13-20](file://backend/app/workers/scheduler.py#L13-L20)
|
||||
- [citation_engine.py:7-14](file://backend/app/workers/citation_engine.py#L7-L14)
|
||||
|
||||
### 耦合度分析
|
||||
|
||||
该系统展现了良好的内聚性和低耦合性:
|
||||
|
||||
- **高内聚**:每个适配器专注于单一平台的实现
|
||||
- **低耦合**:通过抽象基类实现松散耦合
|
||||
- **清晰边界**:各层职责明确,接口清晰
|
||||
- **可测试性**:依赖注入和抽象接口便于单元测试
|
||||
|
||||
**章节来源**
|
||||
- [base.py:4-17](file://backend/app/workers/platforms/base.py#L4-L17)
|
||||
- [citation_engine.py:148-157](file://backend/app/workers/citation_engine.py#L148-L157)
|
||||
|
||||
## 性能考虑
|
||||
|
||||
### 异步编程模型
|
||||
|
||||
系统全面采用异步编程模式,提升了并发处理能力:
|
||||
|
||||
- **异步适配器**:所有适配器方法都使用async/await
|
||||
- **异步数据库**:使用SQLAlchemy异步会话
|
||||
- **异步调度**:APScheduler的异步版本
|
||||
- **异步HTTP**:FastAPI的异步路由
|
||||
|
||||
### 资源管理策略
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
InitBrowser["初始化浏览器"] --> UseBrowser["使用浏览器"]
|
||||
UseBrowser --> CloseBrowser["关闭浏览器"]
|
||||
CloseBrowser --> CleanupResources["清理系统资源"]
|
||||
CleanupResources --> End([资源释放完成])
|
||||
InitBrowser --> Error["异常发生"]
|
||||
Error --> CleanupOnError["异常时清理资源"]
|
||||
CleanupOnError --> End
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [kimi.py:17-31](file://backend/app/workers/platforms/kimi.py#L17-L31)
|
||||
- [kimi.py:198-206](file://backend/app/workers/platforms/kimi.py#L198-L206)
|
||||
|
||||
### 缓存和重试机制
|
||||
|
||||
- **指数退避**:最多3次重试,间隔呈指数增长
|
||||
- **超时控制**:页面加载和操作都有超时限制
|
||||
- **资源池化**:浏览器实例复用,减少启动开销
|
||||
|
||||
## 故障排除指南
|
||||
|
||||
### 常见问题及解决方案
|
||||
|
||||
#### Playwright浏览器问题
|
||||
|
||||
**症状**:启动浏览器失败,提示需要安装Playwright浏览器
|
||||
|
||||
**解决方案**:
|
||||
1. 安装Playwright浏览器:`python -m playwright install chromium`
|
||||
2. 检查网络连接和代理设置
|
||||
3. 确认Docker环境中的浏览器可用性
|
||||
|
||||
#### 页面元素定位失败
|
||||
|
||||
**症状**:无法找到输入框或发送按钮
|
||||
|
||||
**解决方案**:
|
||||
1. 检查平台URL是否正确
|
||||
2. 更新选择器表达式以适应页面变化
|
||||
3. 实现更健壮的元素等待机制
|
||||
|
||||
#### 超时问题
|
||||
|
||||
**症状**:页面加载或响应超时
|
||||
|
||||
**解决方案**:
|
||||
1. 增加超时时间配置
|
||||
2. 检查网络连接稳定性
|
||||
3. 实现重试机制
|
||||
|
||||
**章节来源**
|
||||
- [kimi.py:27-31](file://backend/app/workers/platforms/kimi.py#L27-L31)
|
||||
- [kimi.py:116-118](file://backend/app/workers/platforms/kimi.py#L116-L118)
|
||||
- [wenxin.py:27-31](file://backend/app/workers/platforms/wenxin.py#L27-L31)
|
||||
|
||||
### 调试技巧
|
||||
|
||||
1. **启用详细日志**:查看适配器的详细执行过程
|
||||
2. **检查数据库状态**:验证查询任务的状态更新
|
||||
3. **监控资源使用**:观察浏览器进程和内存使用情况
|
||||
4. **测试独立适配器**:单独测试某个平台的适配器
|
||||
|
||||
## 结论
|
||||
|
||||
该适配器架构设计成功地实现了以下目标:
|
||||
|
||||
### 架构优势
|
||||
|
||||
1. **高度可扩展**:新增平台只需实现BasePlatformAdapter接口
|
||||
2. **统一管理**:通过CitationEngine集中管理所有平台
|
||||
3. **资源优化**:统一的资源管理和清理机制
|
||||
4. **错误处理**:完善的异常处理和重试机制
|
||||
|
||||
### 最佳实践建议
|
||||
|
||||
1. **遵循接口契约**:严格实现BasePlatformAdapter的所有抽象方法
|
||||
2. **资源管理**:确保close()方法能够正确清理所有资源
|
||||
3. **错误处理**:实现适当的异常处理和重试逻辑
|
||||
4. **配置管理**:将平台特定的配置参数化
|
||||
5. **测试覆盖**:为新适配器编写充分的单元测试
|
||||
|
||||
### 扩展指南
|
||||
|
||||
要为新平台创建适配器,需要:
|
||||
|
||||
1. 继承BasePlatformAdapter类
|
||||
2. 设置platform_name和platform_url属性
|
||||
3. 实现query()方法的平台特定逻辑
|
||||
4. 实现close()方法清理资源
|
||||
5. 在CitationEngine中注册新适配器
|
||||
6. 编写相应的测试用例
|
||||
|
||||
该架构为AI平台集成提供了一个稳健、可扩展的基础,能够支持未来更多的平台集成需求。
|
||||
|
|
@ -0,0 +1,416 @@
|
|||
# API接口文档
|
||||
|
||||
<cite>
|
||||
**本文档中引用的文件**
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [backend/app/api/reports.py](file://backend/app/api/reports.py)
|
||||
- [backend/app/api/deps.py](file://backend/app/api/deps.py)
|
||||
- [backend/app/schemas/auth.py](file://backend/app/schemas/auth.py)
|
||||
- [backend/app/schemas/query.py](file://backend/app/schemas/query.py)
|
||||
- [backend/app/schemas/citation.py](file://backend/app/schemas/citation.py)
|
||||
- [backend/app/services/auth.py](file://backend/app/services/auth.py)
|
||||
- [backend/app/services/query.py](file://backend/app/services/query.py)
|
||||
- [backend/app/services/citation.py](file://backend/app/services/citation.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/models/user.py](file://backend/app/models/user.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [backend/app/models/query_task.py](file://backend/app/models/query_task.py)
|
||||
</cite>
|
||||
|
||||
## 更新摘要
|
||||
**所做更改**
|
||||
- 完善了认证接口的详细说明,包括注册、登录和用户信息查询
|
||||
- 更新了查询管理接口的完整功能说明,涵盖CRUD操作和权限控制
|
||||
- 补充了引用数据接口的统计分析和任务执行功能
|
||||
- 增强了报告导出接口的CSV格式说明
|
||||
- 完善了错误处理和状态码说明
|
||||
- 更新了架构图和数据流图以反映实际实现
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构概览](#架构概览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排除指南](#故障排除指南)
|
||||
9. [结论](#结论)
|
||||
|
||||
## 简介
|
||||
本文件为GEO平台的完整API接口文档,涵盖认证、查询管理、引用数据、报告导出等核心功能模块。文档详细记录了所有RESTful API端点的HTTP方法、URL模式、请求参数与响应格式,并说明了JWT令牌管理、用户注册登录、权限验证机制、任务创建与执行、数据查询与统计分析、以及CSV格式报告导出流程。
|
||||
|
||||
## 项目结构
|
||||
后端采用FastAPI框架,按功能模块组织API路由:认证(/api/v1/auth)、查询词(/api/v1/queries)、引用数据(/api/v1/citations)、报告(/api/v1/reports)。应用启动时初始化数据库模型并启动查询调度器,同时启用CORS允许前端localhost:3000访问。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
A["应用入口<br/>backend/app/main.py"] --> B["认证路由<br/>backend/app/api/auth.py"]
|
||||
A --> C["查询路由<br/>backend/app/api/queries.py"]
|
||||
A --> D["引用数据路由<br/>backend/app/api/citations.py"]
|
||||
A --> E["报告路由<br/>backend/app/api/reports.py"]
|
||||
A --> F["依赖注入与认证中间件<br/>backend/app/api/deps.py"]
|
||||
A --> G["配置中心<br/>backend/app/config.py"]
|
||||
A --> H["数据库模型<br/>backend/app/models/*.py"]
|
||||
A --> I["业务服务层<br/>backend/app/services/*.py"]
|
||||
A --> J["数据传输对象<br/>backend/app/schemas/*.py"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/main.py:24-47](file://backend/app/main.py#L24-L47)
|
||||
- [backend/app/api/auth.py:10](file://backend/app/api/auth.py#L10)
|
||||
- [backend/app/api/queries.py:12](file://backend/app/api/queries.py#L12)
|
||||
- [backend/app/api/citations.py:21](file://backend/app/api/citations.py#L21)
|
||||
- [backend/app/api/reports.py:13](file://backend/app/api/reports.py#L13)
|
||||
- [backend/app/api/deps.py:13](file://backend/app/api/deps.py#L13)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:1-57](file://backend/app/main.py#L1-L57)
|
||||
|
||||
## 核心组件
|
||||
- 应用入口与生命周期管理:定义应用标题、版本、CORS策略,注册各模块路由,启动/关闭查询调度器。
|
||||
- 认证组件:提供注册、登录、当前用户信息查询;基于OAuth2密码流与JWT进行身份验证。
|
||||
- 查询管理:支持查询词的增删改查、分页列表、频率与下次查询时间计算。
|
||||
- 引用数据:支持引用记录查询、统计分析(总查询数、引用率、平台分布、趋势)、立即执行查询任务。
|
||||
- 报告导出:支持CSV格式导出指定查询的引用记录。
|
||||
- 数据模型与服务:用户、查询、引用记录、查询任务等模型及对应的服务逻辑。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:13-57](file://backend/app/main.py#L13-L57)
|
||||
- [backend/app/api/auth.py:13-42](file://backend/app/api/auth.py#L13-L42)
|
||||
- [backend/app/api/queries.py:15-85](file://backend/app/api/queries.py#L15-L85)
|
||||
- [backend/app/api/citations.py:25-77](file://backend/app/api/citations.py#L25-L77)
|
||||
- [backend/app/api/reports.py:16-46](file://backend/app/api/reports.py#L16-L46)
|
||||
|
||||
## 架构概览
|
||||
下图展示了客户端与后端各模块之间的交互关系,包括认证流程、查询管理、引用数据处理与报告导出。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "客户端"
|
||||
FE["前端应用<br/>localhost:3000"]
|
||||
end
|
||||
subgraph "后端服务"
|
||||
AUTH["认证模块<br/>/api/v1/auth"]
|
||||
QUERIES["查询模块<br/>/api/v1/queries"]
|
||||
CITATIONS["引用数据模块<br/>/api/v1/citations"]
|
||||
REPORTS["报告模块<br/>/api/v1/reports"]
|
||||
DEPS["依赖注入与认证中间件"]
|
||||
MODELS["数据模型层"]
|
||||
SERVICES["业务服务层"]
|
||||
CONFIG["配置中心"]
|
||||
end
|
||||
FE --> AUTH
|
||||
FE --> QUERIES
|
||||
FE --> CITATIONS
|
||||
FE --> REPORTS
|
||||
AUTH --> DEPS
|
||||
QUERIES --> DEPS
|
||||
CITATIONS --> DEPS
|
||||
REPORTS --> DEPS
|
||||
AUTH --> SERVICES
|
||||
QUERIES --> SERVICES
|
||||
CITATIONS --> SERVICES
|
||||
REPORTS --> SERVICES
|
||||
SERVICES --> MODELS
|
||||
MODELS --> CONFIG
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/main.py:38-51](file://backend/app/main.py#L38-L51)
|
||||
- [backend/app/api/deps.py:16-42](file://backend/app/api/deps.py#L16-L42)
|
||||
- [backend/app/services/auth.py:37-68](file://backend/app/services/auth.py#L37-L68)
|
||||
- [backend/app/services/query.py:12-123](file://backend/app/services/query.py#L12-L123)
|
||||
- [backend/app/services/citation.py:24-359](file://backend/app/services/citation.py#L24-L359)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 认证接口
|
||||
- 接口前缀:/api/v1/auth
|
||||
- 路由与功能:
|
||||
- POST /register:用户注册,返回用户信息
|
||||
- POST /login:用户登录,返回JWT访问令牌与用户信息
|
||||
- GET /me:获取当前登录用户信息
|
||||
- 认证机制:
|
||||
- 使用OAuth2密码流,令牌类型为bearer
|
||||
- 通过依赖注入获取当前用户,校验JWT有效性
|
||||
- 用户名/密码验证通过后签发JWT,包含用户ID作为sub字段
|
||||
- 请求参数与响应格式:
|
||||
- 注册:邮箱、密码、姓名
|
||||
- 登录:邮箱、密码
|
||||
- 当前用户:用户ID、邮箱、姓名、计划、最大查询数、激活状态、创建时间
|
||||
- 访问令牌:访问令牌字符串、令牌类型、用户信息
|
||||
- 错误处理:
|
||||
- 注册:邮箱已存在时返回400
|
||||
- 登录:凭据无效时返回401
|
||||
- 获取当前用户:凭据无效或用户不存在时返回401
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant AuthAPI as "认证API"
|
||||
participant AuthService as "认证服务"
|
||||
participant DB as "数据库"
|
||||
Client->>AuthAPI : POST /api/v1/auth/register
|
||||
AuthAPI->>AuthService : 注册用户
|
||||
AuthService->>DB : 检查邮箱唯一性
|
||||
DB-->>AuthService : 结果
|
||||
AuthService->>DB : 创建用户并存储密码哈希
|
||||
DB-->>AuthService : 新用户
|
||||
AuthService-->>AuthAPI : 用户信息
|
||||
AuthAPI-->>Client : 201 用户信息
|
||||
Client->>AuthAPI : POST /api/v1/auth/login
|
||||
AuthAPI->>AuthService : 验证用户名/密码
|
||||
AuthService->>DB : 查询用户
|
||||
DB-->>AuthService : 用户
|
||||
AuthService->>AuthService : 校验密码
|
||||
AuthService->>AuthService : 生成JWT
|
||||
AuthService-->>AuthAPI : 访问令牌与用户信息
|
||||
AuthAPI-->>Client : 200 访问令牌
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/auth.py:13-37](file://backend/app/api/auth.py#L13-L37)
|
||||
- [backend/app/services/auth.py:37-68](file://backend/app/services/auth.py#L37-L68)
|
||||
- [backend/app/api/deps.py:16-42](file://backend/app/api/deps.py#L16-L42)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/auth.py:13-42](file://backend/app/api/auth.py#L13-L42)
|
||||
- [backend/app/api/deps.py:16-42](file://backend/app/api/deps.py#L16-L42)
|
||||
- [backend/app/schemas/auth.py:7-34](file://backend/app/schemas/auth.py#L7-L34)
|
||||
- [backend/app/services/auth.py:37-68](file://backend/app/services/auth.py#L37-L68)
|
||||
|
||||
### 查询管理接口
|
||||
- 接口前缀:/api/v1/queries
|
||||
- 路由与功能:
|
||||
- GET /:分页列出当前用户的查询词
|
||||
- POST /:创建新的查询词
|
||||
- GET /{query_id}:获取单个查询词详情
|
||||
- PUT /{query_id}:更新查询词
|
||||
- DELETE /{query_id}:删除查询词
|
||||
- 权限与限制:
|
||||
- 基于当前用户上下文进行资源所有权校验
|
||||
- 创建查询时检查用户最大查询数限制
|
||||
- 请求参数与响应格式:
|
||||
- 查询创建:关键词、目标品牌、品牌别名、平台列表、频率
|
||||
- 查询更新:可选字段包括关键词、目标品牌、品牌别名、平台列表、频率、状态
|
||||
- 查询详情:包含创建/更新时间、状态、下次查询时间等
|
||||
- 列表响应:items与total总数
|
||||
- 错误处理:
|
||||
- 未找到查询时返回404
|
||||
- 超出查询配额返回403
|
||||
- 参数校验失败返回422(Pydantic验证)
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["创建查询"]) --> CheckCount["检查用户查询数量"]
|
||||
CheckCount --> CountOK{"是否超过上限?"}
|
||||
CountOK --> |是| Raise403["抛出403 错误"]
|
||||
CountOK --> |否| CalcNext["根据频率计算下次查询时间"]
|
||||
CalcNext --> CreateQuery["创建查询记录"]
|
||||
CreateQuery --> Commit["提交事务"]
|
||||
Commit --> Return["返回新查询"]
|
||||
Raise403 --> End(["结束"])
|
||||
Return --> End
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/services/query.py:45-81](file://backend/app/services/query.py#L45-L81)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/queries.py:15-85](file://backend/app/api/queries.py#L15-L85)
|
||||
- [backend/app/schemas/query.py:11-94](file://backend/app/schemas/query.py#L11-L94)
|
||||
- [backend/app/services/query.py:12-123](file://backend/app/services/query.py#L12-L123)
|
||||
|
||||
### 引用数据接口
|
||||
- 接口前缀:/api/v1/citations
|
||||
- 路由与功能:
|
||||
- GET /:分页查询引用记录,支持按查询ID、平台、时间范围过滤
|
||||
- GET /stats:统计分析,返回总查询数、引用率、平均位置、按平台分布、30天趋势
|
||||
- POST /{query_id}/run-now:立即触发查询任务,返回任务ID与状态
|
||||
- 数据模型:
|
||||
- 引用记录包含平台、是否引用、引用位置、引用文本、竞争品牌列表、查询时间等
|
||||
- 统计逻辑:
|
||||
- 总查询数与引用数来自关联查询统计
|
||||
- 引用率=引用数/总查询数
|
||||
- 平均位置仅对有位置的引用记录计算
|
||||
- 按平台统计查询数、引用数、引用率与平均位置
|
||||
- 趋势按周聚合过去30天的引用数
|
||||
- 错误处理:
|
||||
- 查询所有权校验失败返回404
|
||||
- 查询非活跃或未配置平台时返回404
|
||||
- 参数校验失败返回422
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant CitAPI as "引用数据API"
|
||||
participant CitService as "引用数据服务"
|
||||
participant DB as "数据库"
|
||||
Client->>CitAPI : POST /api/v1/citations/{query_id}/run-now
|
||||
CitAPI->>CitService : 触发查询任务
|
||||
CitService->>DB : 校验查询所有权与状态
|
||||
DB-->>CitService : 查询结果
|
||||
CitService->>DB : 为每个平台创建任务
|
||||
DB-->>CitService : 任务集合
|
||||
CitService-->>CitAPI : 返回首个任务
|
||||
CitAPI-->>Client : 202 任务信息
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/citations.py:59-77](file://backend/app/api/citations.py#L59-L77)
|
||||
- [backend/app/services/citation.py:204-261](file://backend/app/services/citation.py#L204-L261)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/citations.py:25-77](file://backend/app/api/citations.py#L25-L77)
|
||||
- [backend/app/schemas/citation.py:7-50](file://backend/app/schemas/citation.py#L7-L50)
|
||||
- [backend/app/services/citation.py:24-359](file://backend/app/services/citation.py#L24-L359)
|
||||
|
||||
### 报告导出接口
|
||||
- 接口前缀:/api/v1/reports
|
||||
- 路由与功能:
|
||||
- GET /export/csv:导出指定查询的引用记录为CSV文件
|
||||
- 输出格式:
|
||||
- 文本CSV,包含列头:日期、平台、是否引用、引用位置、引用文本、竞争品牌
|
||||
- 文件名为geo-report-YYYYMMDD.csv
|
||||
- 错误处理:
|
||||
- 不支持的格式返回400
|
||||
- 查询所有权校验失败返回404
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant ReportAPI as "报告API"
|
||||
participant ReportService as "报告服务"
|
||||
participant DB as "数据库"
|
||||
Client->>ReportAPI : GET /api/v1/reports/export/csv?query_id={id}&format=csv
|
||||
ReportAPI->>ReportService : 导出CSV
|
||||
ReportService->>DB : 校验查询所有权并查询记录
|
||||
DB-->>ReportService : 引用记录列表
|
||||
ReportService->>ReportService : 写入CSV内容
|
||||
ReportService-->>ReportAPI : CSV字符串
|
||||
ReportAPI-->>Client : 200 CSV文件下载
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/reports.py:16-46](file://backend/app/api/reports.py#L16-L46)
|
||||
- [backend/app/services/citation.py:327-359](file://backend/app/services/citation.py#L327-L359)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/reports.py:16-46](file://backend/app/api/reports.py#L16-L46)
|
||||
- [backend/app/services/citation.py:327-359](file://backend/app/services/citation.py#L327-L359)
|
||||
|
||||
## 依赖分析
|
||||
- 中间件与认证:
|
||||
- OAuth2密码流,令牌URL为/api/v1/auth/login
|
||||
- 依赖注入从Header中解析Authorization: Bearer token
|
||||
- 通过JWT解码校验用户身份,查询数据库获取用户实体
|
||||
- 数据模型关系:
|
||||
- User与Query一对多,级联删除
|
||||
- Query与CitationRecord、QueryTask一对多,级联删除
|
||||
- CitationRecord外键关联Query
|
||||
- 服务层职责:
|
||||
- 认证服务:密码哈希、JWT签发与校验、用户注册与登录
|
||||
- 查询服务:分页查询、创建/更新/删除、频率与下次查询时间计算
|
||||
- 引用服务:引用记录查询、统计分析、立即执行任务、CSV导出
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class User {
|
||||
+UUID id
|
||||
+string email
|
||||
+string password_hash
|
||||
+string name
|
||||
+string plan
|
||||
+int max_queries
|
||||
+boolean is_active
|
||||
+datetime created_at
|
||||
+datetime updated_at
|
||||
}
|
||||
class Query {
|
||||
+UUID id
|
||||
+UUID user_id
|
||||
+string keyword
|
||||
+string target_brand
|
||||
+list brand_aliases
|
||||
+list platforms
|
||||
+string frequency
|
||||
+string status
|
||||
+datetime last_queried_at
|
||||
+datetime next_query_at
|
||||
+datetime created_at
|
||||
+datetime updated_at
|
||||
}
|
||||
class CitationRecord {
|
||||
+UUID id
|
||||
+UUID query_id
|
||||
+string platform
|
||||
+boolean cited
|
||||
+int citation_position
|
||||
+string citation_text
|
||||
+list competitor_brands
|
||||
+string raw_response
|
||||
+datetime queried_at
|
||||
}
|
||||
class QueryTask {
|
||||
+UUID id
|
||||
+UUID query_id
|
||||
+string platform
|
||||
+string status
|
||||
+string error_message
|
||||
+datetime scheduled_at
|
||||
+datetime started_at
|
||||
+datetime completed_at
|
||||
}
|
||||
User "1" --> "many" Query : "拥有"
|
||||
Query "1" --> "many" CitationRecord : "产生"
|
||||
Query "1" --> "many" QueryTask : "触发任务"
|
||||
CitationRecord "many" --> "1" Query : "属于"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/deps.py:16-42](file://backend/app/api/deps.py#L16-L42)
|
||||
- [backend/app/models/user.py:35-40](file://backend/app/models/user.py#L35-L40)
|
||||
- [backend/app/models/query.py:43-48](file://backend/app/models/query.py#L43-L48)
|
||||
- [backend/app/models/citation_record.py:35](file://backend/app/models/citation_record.py#L35)
|
||||
- [backend/app/models/query_task.py:34](file://backend/app/models/query_task.py#L34)
|
||||
|
||||
## 性能考虑
|
||||
- 分页查询:列表接口默认每页20条,支持skip/limit参数,避免一次性加载大量数据。
|
||||
- 索引优化:查询与引用记录表建立复合索引,提升按用户ID、状态、时间等条件的查询性能。
|
||||
- 异步数据库:使用异步SQLAlchemy,提高并发处理能力。
|
||||
- 缓存策略:可在服务层引入Redis缓存热点统计数据,降低重复计算成本。
|
||||
- 批量操作:任务创建采用批量插入,减少事务开销。
|
||||
|
||||
## 故障排除指南
|
||||
- 认证相关
|
||||
- 401 未授权:检查Authorization头是否正确携带Bearer token;确认JWT签名与过期时间有效。
|
||||
- 401 凭据无效:检查用户名/密码是否正确;确认用户存在且被激活。
|
||||
- 查询管理
|
||||
- 403 超出配额:检查用户计划与max_queries限制;清理或升级计划以增加配额。
|
||||
- 404 未找到:确认query_id归属当前用户;检查路径参数是否正确。
|
||||
- 引用数据
|
||||
- 404 查询不可用:确认查询状态为active且配置了至少一个平台。
|
||||
- 统计为空:当按查询ID过滤但无匹配记录时,返回零值统计。
|
||||
- 报告导出
|
||||
- 400 不支持的格式:确保format参数为csv。
|
||||
- 404 查询不存在:确认query_id归属当前用户。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/auth.py:26-30](file://backend/app/api/auth.py#L26-L30)
|
||||
- [backend/app/services/query.py:59-60](file://backend/app/services/query.py#L59-L60)
|
||||
- [backend/app/api/citations.py:67-71](file://backend/app/api/citations.py#L67-L71)
|
||||
- [backend/app/api/reports.py:23-27](file://backend/app/api/reports.py#L23-L27)
|
||||
|
||||
## 结论
|
||||
GEO平台API采用清晰的模块化设计,围绕用户、查询、引用与报告四大领域构建RESTful接口。通过JWT认证与严格的资源所有权校验,保障了数据安全;通过统计分析与CSV导出,满足了业务洞察与合规需求。建议在生产环境中进一步完善错误日志、监控指标与缓存策略,持续优化查询性能与用户体验。
|
||||
|
|
@ -0,0 +1,590 @@
|
|||
# 引用数据接口
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/api/reports.py](file://backend/app/api/reports.py)
|
||||
- [backend/app/schemas/citation.py](file://backend/app/schemas/citation.py)
|
||||
- [backend/app/schemas/query.py](file://backend/app/schemas/query.py)
|
||||
- [backend/app/services/citation.py](file://backend/app/services/citation.py)
|
||||
- [backend/app/services/query.py](file://backend/app/services/query.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/platforms/base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [backend/app/workers/platforms/kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [backend/app/workers/platforms/wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
- [frontend/lib/api.ts](file://frontend/lib/api.ts)
|
||||
- [frontend/components/charts/trend-chart.tsx](file://frontend/components/charts/trend-chart.tsx)
|
||||
- [frontend/components/charts/platform-chart.tsx](file://frontend/components/charts/platform-chart.tsx)
|
||||
- [frontend/lib/platforms.ts](file://frontend/lib/platforms.ts)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件面向“引用数据查询与分析”场景,提供完整的API文档与实现解析。内容覆盖:
|
||||
- 引用数据查询接口:支持按查询任务、平台、时间范围过滤,分页与排序
|
||||
- 统计分析功能:总查询数、引用数、引用率、平均位置、按平台统计、近30天趋势
|
||||
- 上下文提取机制:品牌识别、竞争品牌识别、置信度评分
|
||||
- 趋势分析与平台对比:通过统计接口与前端图表组件展示
|
||||
- 报告导出:CSV导出能力
|
||||
- 数据过滤、排序与分页参数说明
|
||||
- 前端接口调用与可视化使用指南
|
||||
|
||||
## 项目结构
|
||||
后端采用FastAPI + SQLAlchemy异步ORM,前端Next.js + Recharts,核心模块如下:
|
||||
- API层:路由定义与参数校验
|
||||
- 服务层:业务逻辑与数据库查询
|
||||
- 模型层:SQLAlchemy ORM映射
|
||||
- 工作器:AI平台适配器与引用检测引擎
|
||||
- 前端:API封装、图表组件与平台映射
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "前端"
|
||||
FE_API["前端API封装<br/>lib/api.ts"]
|
||||
FE_Trend["趋势图组件<br/>charts/trend-chart.tsx"]
|
||||
FE_Platform["平台柱状图组件<br/>charts/platform-chart.tsx"]
|
||||
FE_PlatformMap["平台映射<br/>lib/platforms.ts"]
|
||||
end
|
||||
subgraph "后端"
|
||||
API_Cit["引用API<br/>app/api/citations.py"]
|
||||
API_Query["查询API<br/>app/api/queries.py"]
|
||||
API_Report["报告API<br/>app/api/reports.py"]
|
||||
Svc_Cit["引用服务<br/>app/services/citation.py"]
|
||||
Svc_Query["查询服务<br/>app/services/query.py"]
|
||||
Model_Cit["引用记录模型<br/>app/models/citation_record.py"]
|
||||
Model_Query["查询模型<br/>app/models/query.py"]
|
||||
Engine["引用检测引擎<br/>app/workers/citation_engine.py"]
|
||||
Plat_Base["平台适配器基类<br/>app/workers/platforms/base.py"]
|
||||
Plat_Kimi["Kimi适配器<br/>app/workers/platforms/kimi.py"]
|
||||
Plat_Wenxin["文心一言适配器<br/>app/workers/platforms/wenxin.py"]
|
||||
end
|
||||
FE_API --> API_Cit
|
||||
FE_API --> API_Query
|
||||
FE_API --> API_Report
|
||||
FE_Trend --> FE_API
|
||||
FE_Platform --> FE_API
|
||||
FE_PlatformMap --> FE_Platform
|
||||
API_Cit --> Svc_Cit
|
||||
API_Query --> Svc_Query
|
||||
API_Report --> Svc_Cit
|
||||
Svc_Cit --> Model_Cit
|
||||
Svc_Query --> Model_Query
|
||||
Engine --> Plat_Base
|
||||
Engine --> Plat_Kimi
|
||||
Engine --> Plat_Wenxin
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/api/reports.py:1-47](file://backend/app/api/reports.py#L1-L47)
|
||||
- [backend/app/services/citation.py:1-269](file://backend/app/services/citation.py#L1-L269)
|
||||
- [backend/app/services/query.py:1-130](file://backend/app/services/query.py#L1-L130)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [frontend/components/charts/trend-chart.tsx:1-60](file://frontend/components/charts/trend-chart.tsx#L1-L60)
|
||||
- [frontend/components/charts/platform-chart.tsx:1-68](file://frontend/components/charts/platform-chart.tsx#L1-L68)
|
||||
- [frontend/lib/platforms.ts:1-18](file://frontend/lib/platforms.ts#L1-L18)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/api/reports.py:1-47](file://backend/app/api/reports.py#L1-L47)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
## 核心组件
|
||||
- 引用查询接口:支持按query_id、platform、start_date、end_date过滤,分页skip/limit,按时间倒序
|
||||
- 统计分析接口:总查询数、总引用数、引用率、平均位置、按平台统计、近30天趋势
|
||||
- 即时查询触发:向队列提交查询任务
|
||||
- 报告导出:CSV导出引用记录
|
||||
- 品牌识别与上下文提取:精确/别名/模糊匹配,置信度评分,上下文片段与段落位置,竞争品牌识别
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/citations.py:25-78](file://backend/app/api/citations.py#L25-L78)
|
||||
- [backend/app/schemas/citation.py:1-50](file://backend/app/schemas/citation.py#L1-L50)
|
||||
- [backend/app/services/citation.py:24-269](file://backend/app/services/citation.py#L24-L269)
|
||||
- [backend/app/workers/citation_engine.py:19-309](file://backend/app/workers/citation_engine.py#L19-L309)
|
||||
|
||||
## 架构总览
|
||||
后端通过API路由接收请求,经服务层进行权限校验与数据聚合,模型层负责数据库访问;引用检测由工作器引擎驱动,调用各平台适配器获取AI响应,再进行品牌匹配与竞争品牌识别。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant API as "引用API"
|
||||
participant Service as "引用服务"
|
||||
participant DB as "数据库"
|
||||
participant Engine as "引用检测引擎"
|
||||
participant Adapter as "平台适配器"
|
||||
Client->>API : GET /api/v1/citations
|
||||
API->>Service : get_citations(...)
|
||||
Service->>DB : 查询引用记录(带过滤+分页)
|
||||
DB-->>Service : 记录列表与总数
|
||||
Service-->>API : {items,total}
|
||||
API-->>Client : JSON响应
|
||||
Client->>API : POST /api/v1/citations/{query_id}/run-now
|
||||
API->>Service : trigger_query_now(...)
|
||||
Service->>Engine : 执行查询与检测
|
||||
Engine->>Adapter : 平台查询(keyword)
|
||||
Adapter-->>Engine : 原始响应文本
|
||||
Engine-->>Service : 引用检测结果
|
||||
Service-->>API : 任务信息
|
||||
API-->>Client : 任务ID/状态
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/citations.py:25-78](file://backend/app/api/citations.py#L25-L78)
|
||||
- [backend/app/services/citation.py:204-234](file://backend/app/services/citation.py#L204-L234)
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [backend/app/workers/platforms/base.py:10-17](file://backend/app/workers/platforms/base.py#L10-L17)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 引用查询接口
|
||||
- 路径与方法
|
||||
- GET /api/v1/citations
|
||||
- 查询参数
|
||||
- query_id: UUID,可选,按所属查询任务过滤
|
||||
- platform: 字符串,可选,按平台过滤
|
||||
- start_date: 日期时间,可选,起始时间
|
||||
- end_date: 日期时间,可选,结束时间
|
||||
- skip: 整数,>=0,默认0
|
||||
- limit: 整数,>=1且<=100,默认20
|
||||
- 排序规则:按 queried_at 降序
|
||||
- 响应体
|
||||
- items: 引用记录数组
|
||||
- total: 符合条件的总记录数
|
||||
- 权限与安全
|
||||
- 仅返回当前用户拥有的查询所对应的引用记录
|
||||
- 当提供query_id时,会额外校验该查询是否属于当前用户
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["进入 list_citations"]) --> Build["构建基础条件: 用户ID过滤"]
|
||||
Build --> CheckQueryID{"是否提供 query_id?"}
|
||||
CheckQueryID --> |是| Verify["校验查询归属(用户ID)"]
|
||||
Verify --> |不存在| ReturnEmpty["返回空结果与total=0"]
|
||||
Verify --> |存在| AddFilter["追加 query_id 过滤"]
|
||||
CheckQueryID --> |否| AddPlatform["追加 platform 过滤(可选)"]
|
||||
AddFilter --> AddPlatform
|
||||
AddPlatform --> AddTime["追加 start_date/end_date 过滤(可选)"]
|
||||
AddTime --> QueryDB["查询+分页+排序"]
|
||||
QueryDB --> Count["统计总数"]
|
||||
Count --> Return["返回 items 与 total"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/citations.py:25-46](file://backend/app/api/citations.py#L25-L46)
|
||||
- [backend/app/services/citation.py:24-73](file://backend/app/services/citation.py#L24-L73)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/citations.py:25-46](file://backend/app/api/citations.py#L25-L46)
|
||||
- [backend/app/services/citation.py:24-73](file://backend/app/services/citation.py#L24-L73)
|
||||
|
||||
### 引用统计分析接口
|
||||
- 路径与方法
|
||||
- GET /api/v1/citations/stats
|
||||
- 查询参数
|
||||
- query_id: UUID,可选,限定到特定查询任务
|
||||
- 响应体字段
|
||||
- total_queries: 总查询数
|
||||
- total_citations: 总引用数
|
||||
- citation_rate: 引用率 = total_citations / total_queries
|
||||
- avg_position: 平均位置(仅对有位置的引用计算)
|
||||
- by_platform: 各平台统计,含 queries、citations、rate、avg_position
|
||||
- trend: 近30天按周统计的引用数列表
|
||||
- 权限与安全
|
||||
- 当提供query_id时,会校验查询归属
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["进入 get_citation_stats"]) --> Verify{"是否提供 query_id?"}
|
||||
Verify --> |是| CheckOwner["校验查询归属"]
|
||||
CheckOwner --> |不存在| Empty["返回全零统计"]
|
||||
CheckOwner --> |存在| SetBase["设置基础条件(用户ID+query_id)"]
|
||||
Verify --> |否| SetBase
|
||||
SetBase --> Totals["统计 total_queries/total_citations"]
|
||||
Totals --> AvgPos["计算 avg_position(仅cited且有位置)"]
|
||||
AvgPos --> ByPlatform["按平台 group 统计"]
|
||||
ByPlatform --> Trend["近30天按周统计"]
|
||||
Trend --> Return["返回统计结果"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/citations.py:49-56](file://backend/app/api/citations.py#L49-L56)
|
||||
- [backend/app/services/citation.py:76-201](file://backend/app/services/citation.py#L76-L201)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/citations.py:49-56](file://backend/app/api/citations.py#L49-L56)
|
||||
- [backend/app/schemas/citation.py:25-44](file://backend/app/schemas/citation.py#L25-L44)
|
||||
- [backend/app/services/citation.py:76-201](file://backend/app/services/citation.py#L76-L201)
|
||||
|
||||
### 即时查询触发接口
|
||||
- 路径与方法
|
||||
- POST /api/v1/citations/{query_id}/run-now
|
||||
- 行为
|
||||
- 校验查询归属与状态(必须为active)
|
||||
- 为配置的每个平台创建一个QueryTask(状态pending)
|
||||
- 返回任务ID与状态
|
||||
- 错误处理
|
||||
- 查询不存在或不属于当前用户:404
|
||||
- 查询状态非active:404
|
||||
- 无平台配置:404
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant API as "引用API"
|
||||
participant Service as "引用服务"
|
||||
participant DB as "数据库"
|
||||
participant Engine as "引用检测引擎"
|
||||
Client->>API : POST /citations/{query_id}/run-now
|
||||
API->>Service : trigger_query_now(user_id, query_id)
|
||||
Service->>DB : 校验查询归属与状态
|
||||
DB-->>Service : 查询对象
|
||||
Service->>Engine : 为每个平台创建QueryTask
|
||||
Engine-->>Service : 返回首个任务
|
||||
Service-->>API : 任务ID/状态
|
||||
API-->>Client : 202 Accepted + 任务信息
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/citations.py:59-78](file://backend/app/api/citations.py#L59-L78)
|
||||
- [backend/app/services/citation.py:204-234](file://backend/app/services/citation.py#L204-L234)
|
||||
- [backend/app/workers/citation_engine.py:268-289](file://backend/app/workers/citation_engine.py#L268-L289)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/citations.py:59-78](file://backend/app/api/citations.py#L59-L78)
|
||||
- [backend/app/services/citation.py:204-234](file://backend/app/services/citation.py#L204-L234)
|
||||
|
||||
### 报告导出接口
|
||||
- 路径与方法
|
||||
- GET /api/v1/reports/export/csv
|
||||
- 查询参数
|
||||
- query_id: UUID,必填
|
||||
- format: 字符串,当前仅支持csv
|
||||
- 响应
|
||||
- Content-Type: text/csv
|
||||
- Content-Disposition: 附件下载
|
||||
- 内容: CSV表格(日期、平台、是否引用、引用位置、引用文本、竞争品牌)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/reports.py:16-47](file://backend/app/api/reports.py#L16-L47)
|
||||
- [backend/app/services/citation.py:237-269](file://backend/app/services/citation.py#L237-L269)
|
||||
|
||||
### 查询管理接口(用于关联引用数据)
|
||||
- 路径与方法
|
||||
- GET /api/v1/queries
|
||||
- POST /api/v1/queries
|
||||
- GET /api/v1/queries/{query_id}
|
||||
- PUT /api/v1/queries/{query_id}
|
||||
- DELETE /api/v1/queries/{query_id}
|
||||
- 分页参数
|
||||
- skip: >=0,默认0
|
||||
- limit: >=1且<=100,默认20
|
||||
- 响应体
|
||||
- 查询列表/详情,包含keyword、target_brand、brand_aliases、platforms、frequency、status等
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/queries.py:15-86](file://backend/app/api/queries.py#L15-L86)
|
||||
- [backend/app/schemas/query.py:11-94](file://backend/app/schemas/query.py#L11-L94)
|
||||
- [backend/app/services/query.py:12-130](file://backend/app/services/query.py#L12-L130)
|
||||
|
||||
### 数据模型与关系
|
||||
- 引用记录模型
|
||||
- 关键字段:query_id、platform、cited、citation_position、citation_text、competitor_brands、raw_response、queried_at
|
||||
- 索引:query_id、queried_at、platform
|
||||
- 查询模型
|
||||
- 关键字段:user_id、keyword、target_brand、brand_aliases、platforms、frequency、status、last_queried_at、next_query_at、created_at、updated_at
|
||||
- 索引:user_id、status、next_query_at
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
QUERIES {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
datetime last_queried_at
|
||||
datetime next_query_at
|
||||
datetime created_at
|
||||
datetime updated_at
|
||||
}
|
||||
CITATION_RECORDS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
boolean cited
|
||||
int citation_position
|
||||
text citation_text
|
||||
jsonb competitor_brands
|
||||
text raw_response
|
||||
datetime queried_at
|
||||
}
|
||||
QUERIES ||--o{ CITATION_RECORDS : "拥有"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
||||
### 引用检测与上下文提取机制
|
||||
- 品牌匹配器
|
||||
- 支持精确匹配、别名匹配、模糊匹配
|
||||
- 输出:cited、confidence、match_type、position、citation_text
|
||||
- 置信度:精确=1.0,别名=0.9,模糊按相似度取值
|
||||
- 竞争品牌检测
|
||||
- 基于预定义行业品牌集合,排除目标品牌后返回其他品牌列表
|
||||
- 上下文提取
|
||||
- 按段落定位品牌首次出现位置(1-based)
|
||||
- 提取不超过200字符的上下文片段
|
||||
- 引擎流程
|
||||
- 为每个平台创建/获取QueryTask,更新状态为running
|
||||
- 调用平台适配器获取原始响应
|
||||
- 使用BrandMatcher进行检测,CompetitorDetector提取竞争品牌
|
||||
- 写入CitationRecord,更新Query的时间字段
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class BrandMatcher {
|
||||
+match(text) dict
|
||||
-_extract_candidates(text) list
|
||||
-_extract_position_and_context(text, keyword) tuple
|
||||
}
|
||||
class CompetitorDetector {
|
||||
+detect(text, target_brand) list
|
||||
}
|
||||
class CitationEngine {
|
||||
+execute_query(query, db) list
|
||||
+execute_single_platform(keyword, platform, target_brand, brand_aliases) dict
|
||||
-_get_or_create_task(db, query_id, platform) QueryTask
|
||||
-_calculate_next_query_at(frequency) datetime
|
||||
}
|
||||
class BasePlatformAdapter {
|
||||
<<abstract>>
|
||||
+query(keyword) str
|
||||
+close()
|
||||
}
|
||||
class KimiAdapter {
|
||||
+query(keyword) str
|
||||
+close()
|
||||
}
|
||||
class WenxinAdapter {
|
||||
+query(keyword) str
|
||||
+close()
|
||||
}
|
||||
CitationEngine --> BrandMatcher : "使用"
|
||||
CitationEngine --> CompetitorDetector : "使用"
|
||||
CitationEngine --> BasePlatformAdapter : "依赖"
|
||||
KimiAdapter --|> BasePlatformAdapter
|
||||
WenxinAdapter --|> BasePlatformAdapter
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/citation_engine.py:19-309](file://backend/app/workers/citation_engine.py#L19-L309)
|
||||
- [backend/app/workers/platforms/base.py:4-17](file://backend/app/workers/platforms/base.py#L4-L17)
|
||||
- [backend/app/workers/platforms/kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/citation_engine.py:19-309](file://backend/app/workers/citation_engine.py#L19-L309)
|
||||
|
||||
### 前端接口调用与可视化
|
||||
- API封装
|
||||
- 提供查询列表、统计、报告导出等方法
|
||||
- 自动附加Authorization头
|
||||
- 趋势图组件
|
||||
- 接收trend数组,按周展示引用次数
|
||||
- 平台柱状图组件
|
||||
- 接收by_platform统计,按平台引用率展示
|
||||
- 平台映射
|
||||
- 将平台枚举转换为中文标签
|
||||
|
||||
**章节来源**
|
||||
- [frontend/lib/api.ts:23-57](file://frontend/lib/api.ts#L23-L57)
|
||||
- [frontend/components/charts/trend-chart.tsx:13-59](file://frontend/components/charts/trend-chart.tsx#L13-L59)
|
||||
- [frontend/components/charts/platform-chart.tsx:15-67](file://frontend/components/charts/platform-chart.tsx#L15-L67)
|
||||
- [frontend/lib/platforms.ts:1-18](file://frontend/lib/platforms.ts#L1-L18)
|
||||
|
||||
## 依赖分析
|
||||
- API层依赖服务层与认证依赖注入
|
||||
- 服务层依赖模型层与数据库会话
|
||||
- 引擎依赖平台适配器,适配器继承抽象基类
|
||||
- 前端依赖后端API与图表库
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
API_Cit["citations.py"] --> Svc_Cit["services/citation.py"]
|
||||
API_Query["queries.py"] --> Svc_Query["services/query.py"]
|
||||
API_Report["reports.py"] --> Svc_Cit
|
||||
Svc_Cit --> Model_Cit["models/citation_record.py"]
|
||||
Svc_Query --> Model_Query["models/query.py"]
|
||||
Svc_Cit --> Engine["workers/citation_engine.py"]
|
||||
Engine --> Plat_Base["platforms/base.py"]
|
||||
Engine --> Plat_Kimi["platforms/kimi.py"]
|
||||
Engine --> Plat_Wenxin["platforms/wenxin.py"]
|
||||
FE_API["frontend/lib/api.ts"] --> API_Cit
|
||||
FE_API --> API_Query
|
||||
FE_API --> API_Report
|
||||
FE_Trend["trend-chart.tsx"] --> FE_API
|
||||
FE_Platform["platform-chart.tsx"] --> FE_API
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/api/reports.py:1-47](file://backend/app/api/reports.py#L1-L47)
|
||||
- [backend/app/services/citation.py:1-269](file://backend/app/services/citation.py#L1-L269)
|
||||
- [backend/app/services/query.py:1-130](file://backend/app/services/query.py#L1-L130)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [frontend/components/charts/trend-chart.tsx:1-60](file://frontend/components/charts/trend-chart.tsx#L1-L60)
|
||||
- [frontend/components/charts/platform-chart.tsx:1-68](file://frontend/components/charts/platform-chart.tsx#L1-L68)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/api/reports.py:1-47](file://backend/app/api/reports.py#L1-L47)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
## 性能考虑
|
||||
- 分页与索引
|
||||
- 引用记录表对query_id、queried_at、platform建立索引,提升过滤与排序效率
|
||||
- 查询接口默认limit=20,最大100,避免一次性返回过多数据
|
||||
- 统计查询
|
||||
- 使用SQL聚合函数与分组,减少Python侧计算量
|
||||
- 趋势按周聚合,降低前端渲染压力
|
||||
- 并发与重试
|
||||
- 平台适配器对查询过程进行最多3次重试与指数退避,提高稳定性
|
||||
- 引擎批处理
|
||||
- 触发即时查询时为每个平台创建任务,便于后续并发执行
|
||||
|
||||
[本节为通用建议,无需具体文件分析]
|
||||
|
||||
## 故障排查指南
|
||||
- 404 Not Found
|
||||
- 查询不存在或不属于当前用户(统计与导出均会校验归属)
|
||||
- 即时查询时查询状态非active或未配置平台
|
||||
- 400 Bad Request
|
||||
- 报告导出format非csv
|
||||
- 403 Forbidden
|
||||
- 创建查询超过用户最大配额限制
|
||||
- 平台适配器错误
|
||||
- Playwright浏览器未安装或启动失败
|
||||
- 页面交互超时,检查网络与平台可用性
|
||||
- 结果为空
|
||||
- 过滤条件过于严格(如query_id、platform、时间范围)
|
||||
- 查询尚未产生任何引用记录
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/reports.py:23-27](file://backend/app/api/reports.py#L23-L27)
|
||||
- [backend/app/services/query.py:45-81](file://backend/app/services/query.py#L45-L81)
|
||||
- [backend/app/workers/platforms/kimi.py:21-32](file://backend/app/workers/platforms/kimi.py#L21-L32)
|
||||
- [backend/app/workers/platforms/wenxin.py:21-32](file://backend/app/workers/platforms/wenxin.py#L21-L32)
|
||||
- [backend/app/services/citation.py:204-234](file://backend/app/services/citation.py#L204-L234)
|
||||
|
||||
## 结论
|
||||
本系统提供了完整的引用数据查询、统计与导出能力,并内置品牌识别与竞争品牌检测机制。通过清晰的API设计、完善的权限校验与数据模型,以及前后端协同的可视化组件,能够满足日常的品牌监测与竞品分析需求。建议在生产环境中关注平台适配器的稳定性与数据库索引策略,以进一步提升性能与可靠性。
|
||||
|
||||
[本节为总结性内容,无需具体文件分析]
|
||||
|
||||
## 附录
|
||||
|
||||
### API端点一览
|
||||
- 引用查询
|
||||
- GET /api/v1/citations
|
||||
- 参数:query_id、platform、start_date、end_date、skip、limit
|
||||
- 响应:items、total
|
||||
- 引用统计
|
||||
- GET /api/v1/citations/stats
|
||||
- 参数:query_id
|
||||
- 响应:总览与分项统计
|
||||
- 即时查询
|
||||
- POST /api/v1/citations/{query_id}/run-now
|
||||
- 响应:task_id、status、message
|
||||
- 报告导出
|
||||
- GET /api/v1/reports/export/csv
|
||||
- 参数:query_id、format(csv)
|
||||
- 响应:CSV流
|
||||
- 查询管理
|
||||
- GET /api/v1/queries
|
||||
- POST /api/v1/queries
|
||||
- GET /api/v1/queries/{query_id}
|
||||
- PUT /api/v1/queries/{query_id}
|
||||
- DELETE /api/v1/queries/{query_id}
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/citations.py:25-78](file://backend/app/api/citations.py#L25-L78)
|
||||
- [backend/app/api/reports.py:16-47](file://backend/app/api/reports.py#L16-L47)
|
||||
- [backend/app/api/queries.py:15-86](file://backend/app/api/queries.py#L15-L86)
|
||||
|
||||
### 数据过滤、排序与分页参数
|
||||
- 过滤
|
||||
- query_id:按所属查询任务过滤
|
||||
- platform:按平台过滤
|
||||
- start_date/end_date:按时间范围过滤
|
||||
- 排序
|
||||
- 默认按 queried_at 降序
|
||||
- 分页
|
||||
- skip:>=0
|
||||
- limit:>=1且<=100
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/citations.py:25-46](file://backend/app/api/citations.py#L25-L46)
|
||||
- [backend/app/services/citation.py:24-73](file://backend/app/services/citation.py#L24-L73)
|
||||
|
||||
### 品牌识别与置信度评分
|
||||
- 匹配类型与置信度
|
||||
- 精确匹配:1.0
|
||||
- 别名匹配:0.9
|
||||
- 模糊匹配:按相似度取值(>0.4视为匹配)
|
||||
- 输出字段
|
||||
- cited、confidence、match_type、position、citation_text、competitor_brands
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/citation_engine.py:26-120](file://backend/app/workers/citation_engine.py#L26-L120)
|
||||
|
||||
### 平台对比与趋势分析
|
||||
- 平台对比
|
||||
- by_platform包含各平台queries、citations、rate、avg_position
|
||||
- 前端使用柱状图组件展示引用率
|
||||
- 趋势分析
|
||||
- trend为近30天按周统计的citations
|
||||
- 前端使用折线图组件展示
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/schemas/citation.py:25-44](file://backend/app/schemas/citation.py#L25-L44)
|
||||
- [frontend/components/charts/platform-chart.tsx:34-67](file://frontend/components/charts/platform-chart.tsx#L34-L67)
|
||||
- [frontend/components/charts/trend-chart.tsx:22-59](file://frontend/components/charts/trend-chart.tsx#L22-L59)
|
||||
|
|
@ -0,0 +1,366 @@
|
|||
# 报告导出接口
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/api/reports.py](file://backend/app/api/reports.py)
|
||||
- [backend/app/services/citation.py](file://backend/app/services/citation.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/models/query_task.py](file://backend/app/models/query_task.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/workers/platforms/base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [frontend/lib/api.ts](file://frontend/lib/api.ts)
|
||||
- [frontend/app/(dashboard)/dashboard/reports/page.tsx](file://frontend/app/(dashboard)/dashboard/reports/page.tsx)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖关系分析](#依赖关系分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件面向“报告导出系统”的使用者与维护者,提供完整的CSV格式数据导出API文档。内容涵盖:
|
||||
- 导出流程与参数配置
|
||||
- 报告生成触发机制(手动触发、定时触发)
|
||||
- 数据筛选条件与输出格式选项
|
||||
- 导出进度监控、错误处理与文件下载
|
||||
- 报告模板定制、数据格式转换与性能优化建议
|
||||
|
||||
## 项目结构
|
||||
后端采用FastAPI + SQLAlchemy异步ORM,前端基于Next.js。导出能力通过独立的报告路由暴露,并由服务层负责数据聚合与CSV生成。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "前端"
|
||||
FE_API["前端API封装<br/>frontend/lib/api.ts"]
|
||||
FE_PAGE["报告页面<br/>frontend/app/(dashboard)/dashboard/reports/page.tsx"]
|
||||
end
|
||||
subgraph "后端"
|
||||
MAIN["应用入口<br/>backend/app/main.py"]
|
||||
ROUTER["报告路由<br/>backend/app/api/reports.py"]
|
||||
SERVICE["导出服务<br/>backend/app/services/citation.py"]
|
||||
MODELS["模型定义<br/>models/*.py"]
|
||||
WORKERS["工作流与调度<br/>workers/*.py"]
|
||||
end
|
||||
FE_PAGE --> FE_API
|
||||
FE_API --> ROUTER
|
||||
ROUTER --> SERVICE
|
||||
SERVICE --> MODELS
|
||||
WORKERS --> MODELS
|
||||
MAIN --> ROUTER
|
||||
MAIN --> WORKERS
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:13-48](file://backend/app/main.py#L13-L48)
|
||||
- [backend/app/api/reports.py:1-47](file://backend/app/api/reports.py#L1-L47)
|
||||
- [backend/app/services/citation.py:237-269](file://backend/app/services/citation.py#L237-L269)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [frontend/lib/api.ts:51-57](file://frontend/lib/api.ts#L51-L57)
|
||||
- [frontend/app/(dashboard)/dashboard/reports/page.tsx:49-93](file://frontend/app/(dashboard)/dashboard/reports/page.tsx#L49-L93)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:13-48](file://backend/app/main.py#L13-L48)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [frontend/app/(dashboard)/dashboard/reports/page.tsx:1-198](file://frontend/app/(dashboard)/dashboard/reports/page.tsx#L1-L198)
|
||||
|
||||
## 核心组件
|
||||
- 报告导出路由:提供CSV导出接口,接收查询词ID并返回流式CSV响应。
|
||||
- 导出服务:验证用户与查询所有权,查询引用记录并生成CSV字符串。
|
||||
- 数据模型:查询、引用记录、查询任务等,支撑筛选与统计。
|
||||
- 引擎与调度:定时检查到期查询并执行,支持手动触发。
|
||||
- 前端集成:提供查询词选择、导出按钮与下载逻辑。
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/reports.py:16-47](file://backend/app/api/reports.py#L16-L47)
|
||||
- [backend/app/services/citation.py:237-269](file://backend/app/services/citation.py#L237-L269)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [frontend/lib/api.ts:51-57](file://frontend/lib/api.ts#L51-L57)
|
||||
- [frontend/app/(dashboard)/dashboard/reports/page.tsx:49-93](file://frontend/app/(dashboard)/dashboard/reports/page.tsx#L49-L93)
|
||||
|
||||
## 架构总览
|
||||
下图展示从前端发起导出请求到后端生成CSV并返回下载的完整链路。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant FE as "前端页面<br/>page.tsx"
|
||||
participant API as "前端API封装<br/>lib/api.ts"
|
||||
participant FAST as "报告路由<br/>reports.py"
|
||||
participant SVC as "导出服务<br/>services/citation.py"
|
||||
participant DB as "数据库<br/>SQLAlchemy"
|
||||
FE->>API : "调用导出接口"
|
||||
API->>FAST : "GET /api/v1/reports/export/csv?query_id=..."
|
||||
FAST->>SVC : "export_citations_csv(db, user_id, query_id)"
|
||||
SVC->>DB : "查询引用记录"
|
||||
DB-->>SVC : "返回记录集合"
|
||||
SVC-->>FAST : "返回CSV字符串"
|
||||
FAST-->>API : "StreamingResponse(csv)"
|
||||
API-->>FE : "下载CSV文件"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [frontend/app/(dashboard)/dashboard/reports/page.tsx:49-93](file://frontend/app/(dashboard)/dashboard/reports/page.tsx#L49-L93)
|
||||
- [frontend/lib/api.ts:51-57](file://frontend/lib/api.ts#L51-L57)
|
||||
- [backend/app/api/reports.py:16-47](file://backend/app/api/reports.py#L16-L47)
|
||||
- [backend/app/services/citation.py:237-269](file://backend/app/services/citation.py#L237-L269)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 报告导出接口(CSV)
|
||||
- 接口路径:/api/v1/reports/export/csv
|
||||
- 方法:GET
|
||||
- 认证:需要Bearer Token
|
||||
- 参数
|
||||
- query_id: UUID,必填,指定导出的查询词
|
||||
- format: 字符串,默认csv,当前版本仅支持csv
|
||||
- 成功响应
|
||||
- Content-Type: text/csv
|
||||
- Content-Disposition: attachment; filename="geo-report-YYYYMMDD.csv"
|
||||
- 响应体:CSV字符串(流式传输)
|
||||
- 错误处理
|
||||
- format非csv:400 Bad Request
|
||||
- 查询不存在或无权限:404 Not Found
|
||||
- 其他异常:500 Internal Server Error
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/reports.py:16-47](file://backend/app/api/reports.py#L16-L47)
|
||||
|
||||
### 导出服务与数据筛选
|
||||
- 权限校验:仅允许导出当前用户拥有的查询词下的数据
|
||||
- 数据筛选
|
||||
- query_id:限定到具体查询词
|
||||
- 时间范围:通过引用记录的查询时间字段进行过滤(服务层支持start_date/end_date参数)
|
||||
- 输出字段(CSV列头)
|
||||
- 日期、平台、是否引用、引用位置、引用文本、竞争品牌
|
||||
- 性能特性
|
||||
- 使用StringIO构建CSV,避免中间文件
|
||||
- 支持流式返回,降低内存峰值
|
||||
|
||||
章节来源
|
||||
- [backend/app/services/citation.py:237-269](file://backend/app/services/citation.py#L237-L269)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
||||
### 报告生成触发机制
|
||||
- 手动导出
|
||||
- 前端选择查询词后点击导出,直接调用导出接口
|
||||
- 后端即时查询数据库并返回CSV
|
||||
- 定时导出
|
||||
- 调度器每小时扫描到期查询(status=active且next_query_at<=now)
|
||||
- 对每个到期查询执行引擎,生成引用记录
|
||||
- 导出接口可读取这些最新记录
|
||||
- 手动触发查询
|
||||
- 服务层提供立即触发方法,创建查询任务并返回首个任务信息
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["定时检查"]) --> Scan["扫描到期查询"]
|
||||
Scan --> HasDue{"存在到期查询?"}
|
||||
HasDue -- 否 --> Wait["等待下一小时"] --> Scan
|
||||
HasDue -- 是 --> Exec["执行查询引擎"]
|
||||
Exec --> Save["保存引用记录"]
|
||||
Save --> Update["更新查询时间字段"]
|
||||
Update --> Export["导出接口可读取最新数据"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/services/citation.py:204-234](file://backend/app/services/citation.py#L204-L234)
|
||||
|
||||
### 数据模型与关系
|
||||
```mermaid
|
||||
erDiagram
|
||||
QUERIES {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
datetime last_queried_at
|
||||
datetime next_query_at
|
||||
}
|
||||
CITATION_RECORDS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
boolean cited
|
||||
integer citation_position
|
||||
text citation_text
|
||||
jsonb competitor_brands
|
||||
text raw_response
|
||||
datetime queried_at
|
||||
}
|
||||
QUERY_TASKS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
string status
|
||||
text error_message
|
||||
datetime scheduled_at
|
||||
datetime started_at
|
||||
datetime completed_at
|
||||
}
|
||||
QUERIES ||--o{ CITATION_RECORDS : "拥有"
|
||||
QUERIES ||--o{ QUERY_TASKS : "拥有"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
|
||||
章节来源
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
|
||||
### 前端集成与下载
|
||||
- 前端页面提供查询词下拉选择与导出按钮
|
||||
- 导出时通过原生fetch请求后端接口,接收blob并触发浏览器下载
|
||||
- 成功/失败状态提示与加载态管理
|
||||
|
||||
章节来源
|
||||
- [frontend/app/(dashboard)/dashboard/reports/page.tsx:49-93](file://frontend/app/(dashboard)/dashboard/reports/page.tsx#L49-L93)
|
||||
- [frontend/lib/api.ts:51-57](file://frontend/lib/api.ts#L51-L57)
|
||||
|
||||
## 依赖关系分析
|
||||
- 报告路由依赖导出服务
|
||||
- 导出服务依赖查询与引用记录模型
|
||||
- 引擎与调度器依赖平台适配器基类
|
||||
- 应用入口在生命周期内启动调度器
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
Reports["reports.py"] --> ExportSvc["services/citation.py"]
|
||||
ExportSvc --> Models["models/*.py"]
|
||||
Scheduler["workers/scheduler.py"] --> Engine["workers/citation_engine.py"]
|
||||
Engine --> Platforms["workers/platforms/base.py"]
|
||||
Main["main.py"] --> Reports
|
||||
Main --> Scheduler
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/reports.py:11](file://backend/app/api/reports.py#L11)
|
||||
- [backend/app/services/citation.py:237-269](file://backend/app/services/citation.py#L237-L269)
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/platforms/base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
- [backend/app/main.py:13-48](file://backend/app/main.py#L13-L48)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/reports.py:11](file://backend/app/api/reports.py#L11)
|
||||
- [backend/app/services/citation.py:237-269](file://backend/app/services/citation.py#L237-L269)
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/platforms/base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
- [backend/app/main.py:13-48](file://backend/app/main.py#L13-L48)
|
||||
|
||||
## 性能考虑
|
||||
- 流式响应:后端以流式方式返回CSV,避免一次性加载全部数据至内存
|
||||
- 索引优化:引用记录表对query_id、queried_at、platform建立索引,提升筛选与排序性能
|
||||
- 分页与限制:服务层支持skip/limit,前端可结合分页控件控制导出规模
|
||||
- 导出粒度:优先按查询词导出,避免全量导出导致的I/O压力
|
||||
- 并发与队列:查询任务表支持多平台并发执行,导出时可按需筛选时间段
|
||||
|
||||
章节来源
|
||||
- [backend/app/services/citation.py:24-74](file://backend/app/services/citation.py#L24-L74)
|
||||
- [backend/app/models/citation_record.py:37-41](file://backend/app/models/citation_record.py#L37-L41)
|
||||
|
||||
## 故障排查指南
|
||||
- 400 Bad Request:format参数非csv
|
||||
- 检查前端传参或接口调用是否正确
|
||||
- 404 Not Found:查询不存在或无权限
|
||||
- 确认query_id归属当前用户,查询状态为active
|
||||
- 下载失败或文件为空
|
||||
- 检查该查询词是否存在引用记录;确认导出时间范围内是否有数据
|
||||
- 导出耗时过长
|
||||
- 缩小时间范围或按查询词导出;检查数据库索引是否生效
|
||||
- 定时任务未执行
|
||||
- 检查调度器是否启动;核对查询状态与next_query_at字段
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/reports.py:23-35](file://backend/app/api/reports.py#L23-L35)
|
||||
- [backend/app/services/citation.py:242-244](file://backend/app/services/citation.py#L242-L244)
|
||||
- [backend/app/workers/scheduler.py:30-40](file://backend/app/workers/scheduler.py#L30-L40)
|
||||
|
||||
## 结论
|
||||
本报告导出系统以清晰的职责分离实现:前端负责交互与下载,后端路由负责鉴权与转发,服务层负责数据筛选与CSV生成,调度器负责周期性任务执行。当前版本聚焦CSV导出,后续可扩展为多格式与批量导出,同时建议引入任务队列与进度回调以增强用户体验。
|
||||
|
||||
## 附录
|
||||
|
||||
### API定义(CSV导出)
|
||||
- 路径:/api/v1/reports/export/csv
|
||||
- 方法:GET
|
||||
- 认证:Bearer Token
|
||||
- 查询参数
|
||||
- query_id: UUID,必填
|
||||
- format: 字符串,可选,默认csv
|
||||
- 响应
|
||||
- 200 OK:text/csv,Content-Disposition为附件下载
|
||||
- 400 Bad Request:format非法
|
||||
- 404 Not Found:查询不存在或无权限
|
||||
- 500 Internal Server Error:内部异常
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/reports.py:16-47](file://backend/app/api/reports.py#L16-L47)
|
||||
|
||||
### 输出字段说明(CSV)
|
||||
- 日期:引用记录的查询时间
|
||||
- 平台:执行查询的AI平台名称
|
||||
- 是否引用:布尔值(是/否)
|
||||
- 引用位置:品牌首次出现的段落序号(若未命中则为空)
|
||||
- 引用文本:截取后的上下文片段(若未命中则为空)
|
||||
- 竞争品牌:检测到的其他品牌列表(逗号分隔)
|
||||
|
||||
章节来源
|
||||
- [backend/app/services/citation.py:256-268](file://backend/app/services/citation.py#L256-L268)
|
||||
|
||||
### 报告模板定制建议
|
||||
- 列顺序与标题:可在导出服务中调整writer的列头顺序与本地化标题
|
||||
- 过滤条件:扩展服务层参数(如平台、起止时间),并在导出服务中拼接SQL条件
|
||||
- 格式转换:如需Excel,可在前端接收CSV后转为xlsx,或在后端生成xlsx流
|
||||
|
||||
章节来源
|
||||
- [backend/app/services/citation.py:237-269](file://backend/app/services/citation.py#L237-L269)
|
||||
|
||||
### 批量导出与定时导出模式
|
||||
- 批量导出:前端逐项选择查询词后分别导出;或在后端增加“全部查询”聚合导出(需扩展服务层)
|
||||
- 定时导出:通过调度器周期性执行查询并生成记录,随后即可导出
|
||||
- 手动导出:前端点击即刻触发查询任务,完成后导出
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/services/citation.py:204-234](file://backend/app/services/citation.py#L204-L234)
|
||||
|
||||
### 进度监控与错误处理
|
||||
- 进度监控:查询任务表包含状态与时间戳,可用于前端轮询或WebSocket推送
|
||||
- 错误处理:平台适配器异常会写入任务错误信息与记录,导出接口保持幂等与健壮
|
||||
|
||||
章节来源
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/workers/citation_engine.py:211-227](file://backend/app/workers/citation_engine.py#L211-L227)
|
||||
|
|
@ -0,0 +1,607 @@
|
|||
# 查询管理接口
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/schemas/query.py](file://backend/app/schemas/query.py)
|
||||
- [backend/app/schemas/citation.py](file://backend/app/schemas/citation.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/models/query_task.py](file://backend/app/models/query_task.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [backend/app/models/user.py](file://backend/app/models/user.py)
|
||||
- [backend/app/services/query.py](file://backend/app/services/query.py)
|
||||
- [backend/app/services/citation.py](file://backend/app/services/citation.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/platforms/base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [backend/app/workers/platforms/kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [backend/app/workers/platforms/wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [tests/test_queries.py](file://tests/test_queries.py)
|
||||
- [tests/test_business_flow.py](file://tests/test_business_flow.py)
|
||||
</cite>
|
||||
|
||||
## 更新摘要
|
||||
**变更内容**
|
||||
- 更新run-now功能的API文档,反映其已合并到主查询API中
|
||||
- 修正API路由结构,移除独立的run_now_router结构
|
||||
- 更新架构图和依赖关系,体现新的API组织方式
|
||||
- 补充run-now接口的详细使用说明和错误处理
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件为"查询管理系统"的详细API文档,覆盖查询任务的创建、读取、更新、删除与执行全流程;详述查询任务的数据模型、字段定义与验证规则;文档化查询任务的状态管理、调度机制与执行监控;包含查询参数配置、定时任务设置与批量操作接口的使用建议;并提供查询任务生命周期管理的最佳实践与错误处理策略。
|
||||
|
||||
## 项目结构
|
||||
后端采用FastAPI + SQLAlchemy异步ORM + PostgreSQL数据库,查询管理相关模块分布如下:
|
||||
- API层:负责路由定义与请求/响应封装,包含主查询API和引用API
|
||||
- Schema层:Pydantic模型,定义请求体与响应体的字段与校验
|
||||
- Model层:SQLAlchemy ORM模型,定义数据库表结构与索引
|
||||
- Service层:业务逻辑封装,处理权限、计数限制与时间计算
|
||||
- Worker层:定时调度器与引用检测引擎,驱动查询任务执行
|
||||
- 平台适配器:Kimi与文心一言平台的适配实现
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "API层"
|
||||
QAPI["查询API<br/>backend/app/api/queries.py"]
|
||||
CAPI["引用API<br/>backend/app/api/citations.py"]
|
||||
end
|
||||
subgraph "Schema层"
|
||||
QS["查询Schema<br/>backend/app/schemas/query.py"]
|
||||
CS["引用Schema<br/>backend/app/schemas/citation.py"]
|
||||
end
|
||||
subgraph "Model层"
|
||||
MQ["查询模型<br/>backend/app/models/query.py"]
|
||||
MT["查询任务模型<br/>backend/app/models/query_task.py"]
|
||||
MR["引用记录模型<br/>backend/app/models/citation_record.py"]
|
||||
MU["用户模型<br/>backend/app/models/user.py"]
|
||||
end
|
||||
subgraph "Service层"
|
||||
SQ["查询服务<br/>backend/app/services/query.py"]
|
||||
SC["引用服务<br/>backend/app/services/citation.py"]
|
||||
end
|
||||
subgraph "Worker层"
|
||||
SCH["调度器<br/>backend/app/workers/scheduler.py"]
|
||||
CE["引用引擎<br/>backend/app/workers/citation_engine.py"]
|
||||
end
|
||||
subgraph "平台适配器"
|
||||
BASE["适配器基类<br/>backend/app/workers/platforms/base.py"]
|
||||
KIMI["Kimi适配器<br/>backend/app/workers/platforms/kimi.py"]
|
||||
WENXIN["文心一言适配器<br/>backend/app/workers/platforms/wenxin.py"]
|
||||
end
|
||||
QAPI --> SQ
|
||||
CAPI --> SC
|
||||
SQ --> MQ
|
||||
SC --> MQ
|
||||
SC --> MR
|
||||
SC --> MT
|
||||
SCH --> CE
|
||||
CE --> KIMI
|
||||
CE --> WENXIN
|
||||
QS --> QAPI
|
||||
CS --> QAPI
|
||||
CS --> CAPI
|
||||
MQ --> MU
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/queries.py:1-109](file://backend/app/api/queries.py#L1-L109)
|
||||
- [backend/app/api/citations.py:1-55](file://backend/app/api/citations.py#L1-L55)
|
||||
- [backend/app/schemas/query.py:1-94](file://backend/app/schemas/query.py#L1-L94)
|
||||
- [backend/app/schemas/citation.py:1-52](file://backend/app/schemas/citation.py#L1-L52)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/services/query.py:1-130](file://backend/app/services/query.py#L1-L130)
|
||||
- [backend/app/services/citation.py:1-429](file://backend/app/services/citation.py#L1-L429)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
|
||||
## 核心组件
|
||||
- 查询API:提供查询任务的增删改查与分页列表接口,包含run-now立即执行功能
|
||||
- 引用API:提供查询历史、统计与导出接口
|
||||
- 查询Schema:定义创建/更新请求体与响应体的字段与校验规则
|
||||
- 引用Schema:定义引用记录、统计和run-now响应的数据结构
|
||||
- 查询模型:定义数据库表结构、索引与关联关系
|
||||
- 查询任务模型:记录每次平台执行的任务状态与时间戳
|
||||
- 引用记录模型:保存每次查询的结果与统计信息
|
||||
- 用户模型:限制用户的最大查询数量
|
||||
- 查询服务:实现权限控制、计数限制与下次查询时间计算
|
||||
- 引用服务:实现引用数据查询、统计、立即执行和导出功能
|
||||
- 调度器:基于APScheduler的定时任务,周期性检查并执行到期查询
|
||||
- 引用引擎:跨平台执行查询、品牌匹配、竞争品牌检测与结果持久化
|
||||
- 平台适配器:Kimi与文心一言的自动化查询实现
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/queries.py:15-109](file://backend/app/api/queries.py#L15-L109)
|
||||
- [backend/app/api/citations.py:19-55](file://backend/app/api/citations.py#L19-L55)
|
||||
- [backend/app/schemas/query.py:11-94](file://backend/app/schemas/query.py#L11-L94)
|
||||
- [backend/app/schemas/citation.py:7-52](file://backend/app/schemas/citation.py#L7-L52)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/app/services/query.py:12-130](file://backend/app/services/query.py#L12-L130)
|
||||
- [backend/app/services/citation.py:219-429](file://backend/app/services/citation.py#L219-L429)
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/platforms/base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
|
||||
## 架构总览
|
||||
查询管理系统的整体工作流如下:
|
||||
- 客户端通过查询API创建查询任务,服务端进行权限与参数校验,并计算下次查询时间
|
||||
- 调度器定时扫描满足条件的查询任务,触发引用引擎执行
|
||||
- 引用引擎遍历平台列表,调用平台适配器获取AI回复,执行品牌匹配与竞争品牌检测,生成引用记录
|
||||
- 查询任务模型记录每次执行的状态与时间,便于监控与重试
|
||||
- 引用API提供查询历史、统计与导出接口
|
||||
- **新增**:run-now功能允许用户立即执行查询任务,绕过正常调度机制
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant API as "查询API"
|
||||
participant Service as "查询服务"
|
||||
participant DB as "数据库"
|
||||
participant Scheduler as "调度器"
|
||||
participant Engine as "引用引擎"
|
||||
participant Adapter as "平台适配器"
|
||||
Client->>API : "POST /api/v1/queries/"
|
||||
API->>Service : "create_query(...)"
|
||||
Service->>DB : "插入查询记录并计算next_query_at"
|
||||
DB-->>Service : "返回新查询"
|
||||
Service-->>API : "返回查询"
|
||||
API-->>Client : "201 Created"
|
||||
Note over Scheduler,Engine : "定时触发"
|
||||
Scheduler->>DB : "查询status='active'且next_query_at<=now()"
|
||||
DB-->>Scheduler : "返回待执行查询集合"
|
||||
Scheduler->>Engine : "execute_query(query)"
|
||||
Engine->>Adapter : "逐平台查询(keyword)"
|
||||
Adapter-->>Engine : "返回原始回复"
|
||||
Engine->>DB : "写入引用记录与更新查询时间"
|
||||
Note over Client,API : "立即执行"
|
||||
Client->>API : "POST /api/v1/queries/{query_id}/run-now"
|
||||
API->>Service : "trigger_query_now(...)"
|
||||
Service->>DB : "创建QueryTask并立即执行"
|
||||
DB-->>Service : "返回任务"
|
||||
Service-->>API : "返回任务"
|
||||
API-->>Client : "202 Accepted"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/queries.py:28-41](file://backend/app/api/queries.py#L28-L41)
|
||||
- [backend/app/api/queries.py:90-109](file://backend/app/api/queries.py#L90-L109)
|
||||
- [backend/app/services/query.py:45-81](file://backend/app/services/query.py#L45-L81)
|
||||
- [backend/app/services/citation.py:219-261](file://backend/app/services/citation.py#L219-L261)
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [backend/app/workers/platforms/kimi.py:33-48](file://backend/app/workers/platforms/kimi.py#L33-L48)
|
||||
- [backend/app/workers/platforms/wenxin.py:33-48](file://backend/app/workers/platforms/wenxin.py#L33-L48)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 数据模型与字段定义
|
||||
- 查询模型(queries)
|
||||
- 关键字段:id、user_id、keyword、target_brand、brand_aliases、platforms、frequency、status、last_queried_at、next_query_at、created_at、updated_at
|
||||
- 约束与索引:外键约束、多字段索引(user_id、status、next_query_at)
|
||||
- 关联:一对多关联至用户与引用记录、查询任务
|
||||
- 查询任务模型(query_tasks)
|
||||
- 关键字段:id、query_id、platform、status、error_message、scheduled_at、started_at、completed_at
|
||||
- 约束与索引:外键约束、索引(status)
|
||||
- 关联:多对一关联至查询
|
||||
- 引用记录模型(citation_records)
|
||||
- 关键字段:id、query_id、platform、cited、citation_position、citation_text、competitor_brands、raw_response、queried_at
|
||||
- 约束与索引:外键约束、多字段索引(query_id、queried_at、platform)
|
||||
- 关联:多对一关联至查询
|
||||
- 用户模型(users)
|
||||
- 关键字段:id、email、password_hash、name、plan、max_queries、is_active、created_at、updated_at
|
||||
- 关联:一对多关联至查询与订阅
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
USERS {
|
||||
uuid id PK
|
||||
string email UK
|
||||
string password_hash
|
||||
string name
|
||||
string plan
|
||||
int max_queries
|
||||
boolean is_active
|
||||
}
|
||||
QUERIES {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
timestamp last_queried_at
|
||||
timestamp next_query_at
|
||||
}
|
||||
QUERY_TASKS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
string status
|
||||
text error_message
|
||||
timestamp scheduled_at
|
||||
timestamp started_at
|
||||
timestamp completed_at
|
||||
}
|
||||
CITATION_RECORDS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
boolean cited
|
||||
int citation_position
|
||||
text citation_text
|
||||
jsonb competitor_brands
|
||||
text raw_response
|
||||
timestamp queried_at
|
||||
}
|
||||
USERS ||--o{ QUERIES : "拥有"
|
||||
QUERIES ||--o{ CITATION_RECORDS : "产生"
|
||||
QUERIES ||--o{ QUERY_TASKS : "驱动"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
||||
### 查询任务状态管理与调度机制
|
||||
- 状态流转
|
||||
- 查询任务状态:pending → running → success 或 failed
|
||||
- 查询状态:active、paused、disabled
|
||||
- 调度机制
|
||||
- 使用APScheduler的AsyncIOScheduler,每小时检查一次
|
||||
- 条件:查询状态为active且next_query_at小于等于当前UTC时间
|
||||
- 触发引用引擎执行,逐平台查询并生成引用记录
|
||||
- 执行监控
|
||||
- 记录scheduled_at、started_at、completed_at
|
||||
- 失败时记录error_message
|
||||
- 更新查询的last_queried_at与next_query_at
|
||||
|
||||
```mermaid
|
||||
stateDiagram-v2
|
||||
[*] --> 待调度
|
||||
待调度 --> 已调度 : "被调度器发现"
|
||||
已调度 --> 执行中 : "开始执行"
|
||||
执行中 --> 成功 : "全部平台成功"
|
||||
执行中 --> 失败 : "任一平台失败"
|
||||
成功 --> 待调度 : "更新next_query_at"
|
||||
失败 --> 待调度 : "更新next_query_at"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/workers/citation_engine.py:176-234](file://backend/app/workers/citation_engine.py#L176-L234)
|
||||
- [backend/app/models/query_task.py:24-32](file://backend/app/models/query_task.py#L24-L32)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
|
||||
### 查询参数配置与验证规则
|
||||
- 支持平台:wenxin、kimi、tongyi、baidu_ai、yuanbao、qingyan(创建/更新时校验)
|
||||
- 频率:daily、weekly(默认weekly)
|
||||
- 状态:active、paused、disabled(仅更新时可修改)
|
||||
- 字段长度:keyword(1~200)、target_brand(1~100)
|
||||
- 平台列表不能为空,若为空则报错
|
||||
- 频率必须在允许集合内,否则报错
|
||||
- 状态必须在允许集合内,否则报错
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["进入校验"]) --> CheckPlatforms["校验平台列表非空"]
|
||||
CheckPlatforms --> PlatformsOK{"平台有效?"}
|
||||
PlatformsOK --> |否| ErrPlatforms["抛出平台无效错误"]
|
||||
PlatformsOK --> |是| CheckFrequency["校验频率"]
|
||||
CheckFrequency --> FreqOK{"频率有效?"}
|
||||
FreqOK --> |否| ErrFreq["抛出频率无效错误"]
|
||||
FreqOK --> |是| CheckStatus["校验状态(如提供)"]
|
||||
CheckStatus --> StatusOK{"状态有效?"}
|
||||
StatusOK --> |否| ErrStatus["抛出状态无效错误"]
|
||||
StatusOK --> |是| Done(["校验通过"])
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/schemas/query.py:18-33](file://backend/app/schemas/query.py#L18-L33)
|
||||
- [backend/app/schemas/query.py:44-72](file://backend/app/schemas/query.py#L44-L72)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/schemas/query.py:6-9](file://backend/app/schemas/query.py#L6-L9)
|
||||
- [backend/app/schemas/query.py:11-94](file://backend/app/schemas/query.py#L11-L94)
|
||||
|
||||
### API定义与使用示例
|
||||
|
||||
- 查询任务管理
|
||||
- GET /api/v1/queries/
|
||||
- 功能:分页列出当前用户的所有查询任务
|
||||
- 查询参数:skip(>=0)、limit(1~100)
|
||||
- 响应:包含items与total的列表响应
|
||||
- POST /api/v1/queries/
|
||||
- 功能:创建新的查询任务
|
||||
- 请求体:keyword、target_brand、brand_aliases、platforms、frequency
|
||||
- 响应:创建成功的查询任务详情
|
||||
- 错误:超过用户最大查询数时返回403
|
||||
- GET /api/v1/queries/{query_id}
|
||||
- 功能:获取指定查询任务详情
|
||||
- 响应:查询任务详情
|
||||
- 错误:不存在或不属于当前用户返回404
|
||||
- PUT /api/v1/queries/{query_id}
|
||||
- 功能:更新查询任务
|
||||
- 请求体:可选字段keyword、target_brand、brand_aliases、platforms、frequency、status
|
||||
- 响应:更新后的查询任务详情
|
||||
- 错误:不存在返回404
|
||||
- DELETE /api/v1/queries/{query_id}
|
||||
- 功能:删除查询任务
|
||||
- 响应:204 No Content
|
||||
- 错误:不存在返回404
|
||||
|
||||
- **立即执行查询**(已合并到主查询API)
|
||||
- POST /api/v1/queries/{query_id}/run-now
|
||||
- 功能:立即将某个查询加入执行队列
|
||||
- 响应:包含task_id、status与消息的RunNowResponse
|
||||
- 状态码:202 Accepted(任务已加入队列)
|
||||
- 错误:查询不存在、不属于当前用户或查询状态不为active时返回404
|
||||
|
||||
- 引用数据与统计
|
||||
- GET /api/v1/citations/
|
||||
- 功能:分页查询引用记录,支持按query_id、platform、日期范围过滤
|
||||
- 响应:包含items与total的列表响应
|
||||
- GET /api/v1/citations/stats
|
||||
- 功能:获取引用统计
|
||||
- 响应:统计结果
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/queries.py:17-109](file://backend/app/api/queries.py#L17-L109)
|
||||
- [backend/app/api/citations.py:22-55](file://backend/app/api/citations.py#L22-L55)
|
||||
- [backend/app/schemas/citation.py:48-52](file://backend/app/schemas/citation.py#L48-L52)
|
||||
|
||||
### 生命周期管理最佳实践
|
||||
- 合理设置frequency:daily适合高频监控,weekly适合常规跟踪
|
||||
- 控制platforms数量:平台越多,耗时越长,成本越高
|
||||
- 使用status暂停:在维护或节假日可将查询置为paused避免执行
|
||||
- 监控next_query_at:确保调度器能按时触发
|
||||
- 引用记录归档:定期清理过期记录,保持查询性能
|
||||
- **新增**:合理使用run-now功能:仅在紧急情况下使用,避免过度消耗资源
|
||||
|
||||
### 错误处理策略
|
||||
- 参数校验失败:返回422,提示具体字段问题
|
||||
- 权限不足/查询不存在:返回404
|
||||
- 超过用户最大查询数:返回403
|
||||
- 平台适配器异常:记录error_message,状态标记为failed
|
||||
- 调度器异常:日志记录错误并继续运行
|
||||
- **新增**:run-now功能错误处理:查询状态非active、无平台配置等情况返回404
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/queries.py:32-39](file://backend/app/api/queries.py#L32-L39)
|
||||
- [backend/app/api/queries.py:49-53](file://backend/app/api/queries.py#L49-L53)
|
||||
- [backend/app/api/queries.py:64-69](file://backend/app/api/queries.py#L64-L69)
|
||||
- [backend/app/api/queries.py:79-84](file://backend/app/api/queries.py#L79-L84)
|
||||
- [backend/app/api/queries.py:96-103](file://backend/app/api/queries.py#L96-L103)
|
||||
- [backend/app/api/citations.py:65-71](file://backend/app/api/citations.py#L65-L71)
|
||||
|
||||
## 依赖分析
|
||||
- 组件耦合
|
||||
- API层依赖Service层;Service层依赖Model层;Worker层依赖Model层与平台适配器
|
||||
- 引用引擎依赖平台适配器,平台适配器继承自基类
|
||||
- **新增**:查询API现在直接依赖引用服务的trigger_query_now功能
|
||||
- 外部依赖
|
||||
- APscheduler用于定时调度
|
||||
- Playwright用于平台网页自动化
|
||||
- SQLAlchemy异步ORM用于数据库访问
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
APIQ["查询API"] --> SVCQ["查询服务"]
|
||||
APIC["引用API"] --> SVCC["引用服务"]
|
||||
APIQ --> SVCC
|
||||
SVCQ --> MODELS["查询/任务/记录模型"]
|
||||
SVCC --> MODELS
|
||||
SVCC --> CE["引用引擎"]
|
||||
CE --> ADP["平台适配器"]
|
||||
ADP --> BASE["适配器基类"]
|
||||
SCH["调度器"] --> CE
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/queries.py:1-14](file://backend/app/api/queries.py#L1-L14)
|
||||
- [backend/app/api/citations.py:1-19](file://backend/app/api/citations.py#L1-L19)
|
||||
- [backend/app/services/query.py:1-10](file://backend/app/services/query.py#L1-L10)
|
||||
- [backend/app/services/citation.py:1-17](file://backend/app/services/citation.py#L1-L17)
|
||||
- [backend/app/workers/citation_engine.py:148-157](file://backend/app/workers/citation_engine.py#L148-L157)
|
||||
- [backend/app/workers/platforms/base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
- [backend/app/workers/scheduler.py:25-39](file://backend/app/workers/scheduler.py#L25-L39)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/scheduler.py:13-19](file://backend/app/workers/scheduler.py#L13-L19)
|
||||
- [backend/app/workers/platforms/kimi.py:17-32](file://backend/app/workers/platforms/kimi.py#L17-L32)
|
||||
- [backend/app/workers/platforms/wenxin.py:17-32](file://backend/app/workers/platforms/wenxin.py#L17-L32)
|
||||
|
||||
## 性能考虑
|
||||
- 数据库索引:为常用查询字段建立索引,减少扫描开销
|
||||
- 分页查询:API层提供skip/limit参数,避免一次性返回大量数据
|
||||
- 异步I/O:使用异步数据库连接与平台适配器,提升并发能力
|
||||
- 调度频率:每小时检查一次,可根据业务量调整
|
||||
- 结果缓存:对频繁查询的平台响应可引入缓存(需结合业务场景)
|
||||
- **新增**:run-now功能的异步执行:使用asyncio.create_task避免阻塞主请求线程
|
||||
|
||||
## 故障排查指南
|
||||
- 调度器未启动:确认应用生命周期钩子已注册并启动调度器
|
||||
- 平台适配器异常:检查Playwright浏览器安装与页面选择器是否匹配
|
||||
- 查询未执行:检查查询状态与next_query_at是否满足调度条件
|
||||
- 引用记录缺失:确认引用引擎执行流程与数据库提交顺序
|
||||
- 权限错误:确认用户max_queries限制与当前查询数量
|
||||
- **新增**:run-now功能问题排查:检查查询状态是否为active、平台配置是否正确、任务创建是否成功
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/workers/platforms/kimi.py:21-32](file://backend/app/workers/platforms/kimi.py#L21-L32)
|
||||
- [backend/app/workers/platforms/wenxin.py:21-32](file://backend/app/workers/platforms/wenxin.py#L21-L32)
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/services/query.py:45-81](file://backend/app/services/query.py#L45-L81)
|
||||
- [backend/app/services/citation.py:219-261](file://backend/app/services/citation.py#L219-L261)
|
||||
|
||||
## 结论
|
||||
本查询管理系统以清晰的分层架构实现了查询任务的全生命周期管理:从创建、校验、调度到执行与监控。通过严格的参数校验、灵活的状态管理与可靠的调度机制,系统能够稳定支撑多平台、多频率的查询需求。**最新的架构变更将run-now功能直接集成到主查询API中,简化了API结构并提供了更便捷的立即执行能力。** 建议在生产环境中结合业务量调整调度频率、优化平台适配器稳定性,并完善监控告警体系。
|
||||
|
||||
## 附录
|
||||
|
||||
### 类关系图(代码级)
|
||||
```mermaid
|
||||
classDiagram
|
||||
class Query {
|
||||
+uuid id
|
||||
+uuid user_id
|
||||
+string keyword
|
||||
+string target_brand
|
||||
+list brand_aliases
|
||||
+list platforms
|
||||
+string frequency
|
||||
+string status
|
||||
+datetime last_queried_at
|
||||
+datetime next_query_at
|
||||
+datetime created_at
|
||||
+datetime updated_at
|
||||
}
|
||||
class QueryTask {
|
||||
+uuid id
|
||||
+uuid query_id
|
||||
+string platform
|
||||
+string status
|
||||
+string error_message
|
||||
+datetime scheduled_at
|
||||
+datetime started_at
|
||||
+datetime completed_at
|
||||
}
|
||||
class CitationRecord {
|
||||
+uuid id
|
||||
+uuid query_id
|
||||
+string platform
|
||||
+boolean cited
|
||||
+int citation_position
|
||||
+string citation_text
|
||||
+list competitor_brands
|
||||
+string raw_response
|
||||
+datetime queried_at
|
||||
}
|
||||
class User {
|
||||
+uuid id
|
||||
+string email
|
||||
+string password_hash
|
||||
+string name
|
||||
+string plan
|
||||
+int max_queries
|
||||
+boolean is_active
|
||||
}
|
||||
class CitationEngine {
|
||||
+execute_query(query, db) list
|
||||
+execute_single_platform(keyword, platform, target_brand, brand_aliases) dict
|
||||
}
|
||||
class QueryScheduler {
|
||||
+start() void
|
||||
+check_and_execute_queries() void
|
||||
+shutdown() void
|
||||
}
|
||||
class KimiAdapter {
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class WenxinAdapter {
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class RunNowResponse {
|
||||
+uuid task_id
|
||||
+string status
|
||||
+string message
|
||||
}
|
||||
Query "1" --> "*" CitationRecord : "产生"
|
||||
Query "1" --> "*" QueryTask : "驱动"
|
||||
User "1" --> "*" Query : "拥有"
|
||||
CitationEngine --> Query : "读取"
|
||||
CitationEngine --> CitationRecord : "写入"
|
||||
CitationEngine --> QueryTask : "写入"
|
||||
QueryScheduler --> CitationEngine : "触发"
|
||||
CitationEngine --> KimiAdapter : "调用"
|
||||
CitationEngine --> WenxinAdapter : "调用"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/platforms/kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
- [backend/app/schemas/citation.py:48-52](file://backend/app/schemas/citation.py#L48-L52)
|
||||
|
||||
### 单次查询执行序列图
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant API as "查询API"
|
||||
participant Service as "查询服务"
|
||||
participant DB as "数据库"
|
||||
participant Engine as "引用引擎"
|
||||
participant Task as "查询任务模型"
|
||||
participant Record as "引用记录模型"
|
||||
API->>Service : "create_query(...)"
|
||||
Service->>DB : "插入查询记录"
|
||||
DB-->>Service : "返回查询"
|
||||
Service-->>API : "返回查询"
|
||||
Note over Engine,DB : "调度器触发"
|
||||
Engine->>DB : "查询平台列表"
|
||||
Engine->>Task : "获取或创建任务"
|
||||
Task-->>Engine : "任务对象"
|
||||
Engine->>Task : "状态=running"
|
||||
Engine->>Engine : "调用平台适配器"
|
||||
Engine->>Record : "创建引用记录"
|
||||
Engine->>DB : "提交事务"
|
||||
Note over API,DB : "run-now立即执行"
|
||||
API->>Service : "trigger_query_now(...)"
|
||||
Service->>DB : "创建QueryTask并立即执行"
|
||||
Service->>Engine : "_execute_query_tasks(...)"
|
||||
Engine->>Task : "状态=running"
|
||||
Engine->>Engine : "调用平台适配器"
|
||||
Engine->>Record : "创建引用记录"
|
||||
Engine->>DB : "提交事务"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/services/query.py:45-81](file://backend/app/services/query.py#L45-L81)
|
||||
- [backend/app/services/citation.py:219-327](file://backend/app/services/citation.py#L219-L327)
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [backend/app/models/query_task.py:176-289](file://backend/app/models/query_task.py#L176-L289)
|
||||
- [backend/app/models/citation_record.py:194-204](file://backend/app/models/citation_record.py#L194-L204)
|
||||
|
|
@ -0,0 +1,413 @@
|
|||
# 认证接口
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/schemas/auth.py](file://backend/app/schemas/auth.py)
|
||||
- [backend/app/services/auth.py](file://backend/app/services/auth.py)
|
||||
- [backend/app/models/user.py](file://backend/app/models/user.py)
|
||||
- [backend/app/api/deps.py](file://backend/app/api/deps.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [tests/test_auth.py](file://tests/test_auth.py)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排除指南](#故障排除指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件为认证系统的完整API文档,覆盖以下接口:
|
||||
- 用户注册:/api/v1/auth/register
|
||||
- 用户登录:/api/v1/auth/login
|
||||
- 获取当前用户:/api/v1/auth/me
|
||||
|
||||
文档详细说明每个接口的请求参数、响应格式、状态码、错误处理以及JWT令牌生成机制、认证流程与安全考虑,并提供认证中间件使用指南与最佳实践。
|
||||
|
||||
## 项目结构
|
||||
后端采用FastAPI + SQLAlchemy异步ORM + PostgreSQL + Redis的架构,认证模块位于backend/app/api/auth.py,配合服务层、模型层与依赖注入实现。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "后端应用"
|
||||
A["FastAPI 应用<br/>路由注册"]
|
||||
B["认证路由<br/>/api/v1/auth/*"]
|
||||
C["认证服务<br/>密码哈希/校验/JWT"]
|
||||
D["数据库会话<br/>AsyncSession"]
|
||||
E["用户模型<br/>SQLAlchemy ORM"]
|
||||
F["认证依赖<br/>OAuth2PasswordBearer"]
|
||||
end
|
||||
A --> B
|
||||
B --> C
|
||||
B --> D
|
||||
C --> D
|
||||
D --> E
|
||||
F --> C
|
||||
F --> D
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:38](file://backend/app/main.py#L38)
|
||||
- [backend/app/api/auth.py:10](file://backend/app/api/auth.py#L10)
|
||||
- [backend/app/services/auth.py:13](file://backend/app/services/auth.py#L13)
|
||||
- [backend/app/database.py:23](file://backend/app/database.py#L23)
|
||||
- [backend/app/models/user.py:11](file://backend/app/models/user.py#L11)
|
||||
- [backend/app/api/deps.py:13](file://backend/app/api/deps.py#L13)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:38](file://backend/app/main.py#L38)
|
||||
- [backend/app/api/auth.py:10](file://backend/app/api/auth.py#L10)
|
||||
|
||||
## 核心组件
|
||||
- 路由器:在主应用中注册认证路由前缀为/api/v1/auth
|
||||
- 认证服务:提供密码哈希/校验、JWT生成/校验、用户注册与认证
|
||||
- 数据模型:用户表结构,包含邮箱、密码哈希、计划等级、配额等字段
|
||||
- 依赖注入:OAuth2PasswordBearer用于从Authorization头提取Bearer令牌,get_current_user解析并验证JWT,加载当前用户
|
||||
- 配置:JWT密钥与过期时间、数据库连接等
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:10](file://backend/app/api/auth.py#L10)
|
||||
- [backend/app/services/auth.py:13](file://backend/app/services/auth.py#L13)
|
||||
- [backend/app/models/user.py:11](file://backend/app/models/user.py#L11)
|
||||
- [backend/app/api/deps.py:13](file://backend/app/api/deps.py#L13)
|
||||
- [backend/app/config.py:9](file://backend/app/config.py#L9)
|
||||
|
||||
## 架构总览
|
||||
认证系统遵循“请求-服务-模型-数据库”的分层设计,使用OAuth2 Bearer令牌进行无状态认证。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant 客户端 as "客户端"
|
||||
participant 路由 as "认证路由"
|
||||
participant 服务 as "认证服务"
|
||||
participant 数据库 as "数据库"
|
||||
participant 依赖 as "认证依赖"
|
||||
客户端->>路由 : POST /api/v1/auth/register
|
||||
路由->>服务 : 注册用户(邮箱/密码/姓名)
|
||||
服务->>数据库 : 查询邮箱是否已存在
|
||||
数据库-->>服务 : 结果
|
||||
服务->>服务 : 哈希密码
|
||||
服务->>数据库 : 创建用户记录
|
||||
数据库-->>服务 : 新用户对象
|
||||
服务-->>路由 : 返回UserResponse
|
||||
路由-->>客户端 : 201 Created + UserResponse
|
||||
客户端->>路由 : POST /api/v1/auth/login
|
||||
路由->>服务 : 认证(邮箱/密码)
|
||||
服务->>数据库 : 查询用户
|
||||
数据库-->>服务 : 用户或None
|
||||
服务->>服务 : 校验密码
|
||||
服务->>服务 : 生成JWT(access_token)
|
||||
服务-->>路由 : 返回TokenResponse
|
||||
路由-->>客户端 : 200 OK + TokenResponse
|
||||
客户端->>依赖 : GET /api/v1/auth/me (Authorization : Bearer)
|
||||
依赖->>依赖 : 解析并验证JWT
|
||||
依赖->>数据库 : 按ID查询用户
|
||||
数据库-->>依赖 : 用户对象
|
||||
依赖-->>客户端 : 200 OK + UserResponse
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/auth.py:13](file://backend/app/api/auth.py#L13)
|
||||
- [backend/app/api/auth.py:22](file://backend/app/api/auth.py#L22)
|
||||
- [backend/app/api/auth.py:40](file://backend/app/api/auth.py#L40)
|
||||
- [backend/app/services/auth.py:37](file://backend/app/services/auth.py#L37)
|
||||
- [backend/app/services/auth.py:55](file://backend/app/services/auth.py#L55)
|
||||
- [backend/app/api/deps.py:16](file://backend/app/api/deps.py#L16)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 用户注册接口 /api/v1/auth/register
|
||||
- 方法与路径:POST /api/v1/auth/register
|
||||
- 请求体模型:UserRegister
|
||||
- 字段:email(邮箱)、password(字符串,最小长度8)、name(字符串,1-100)
|
||||
- 响应模型:UserResponse
|
||||
- 字段:id(UUID)、email(字符串)、name(可空)、plan(字符串,默认"free")、max_queries(整数,默认5)、is_active(布尔,默认true)、created_at(时间戳)
|
||||
- 成功响应:201 Created
|
||||
- 错误响应:
|
||||
- 400 Bad Request:当邮箱已被注册时,返回错误详情
|
||||
- 处理流程:
|
||||
- 调用注册服务,检查邮箱唯一性
|
||||
- 对密码进行哈希处理
|
||||
- 写入数据库并返回新用户信息
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["请求进入"]) --> Validate["校验请求体<br/>邮箱/密码/姓名"]
|
||||
Validate --> CheckDup{"邮箱已存在?"}
|
||||
CheckDup --> |是| Return400["返回400错误<br/>邮箱已注册"]
|
||||
CheckDup --> |否| HashPwd["哈希密码"]
|
||||
HashPwd --> Create["创建用户记录"]
|
||||
Create --> Commit["提交事务"]
|
||||
Commit --> Refresh["刷新用户对象"]
|
||||
Refresh --> Return201["返回201 + UserResponse"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/auth.py:13](file://backend/app/api/auth.py#L13)
|
||||
- [backend/app/services/auth.py:37](file://backend/app/services/auth.py#L37)
|
||||
- [backend/app/schemas/auth.py:7](file://backend/app/schemas/auth.py#L7)
|
||||
- [backend/app/schemas/auth.py:18](file://backend/app/schemas/auth.py#L18)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:13](file://backend/app/api/auth.py#L13)
|
||||
- [backend/app/schemas/auth.py:7](file://backend/app/schemas/auth.py#L7)
|
||||
- [backend/app/schemas/auth.py:18](file://backend/app/schemas/auth.py#L18)
|
||||
- [backend/app/services/auth.py:37](file://backend/app/services/auth.py#L37)
|
||||
- [tests/test_auth.py:26](file://tests/test_auth.py#L26)
|
||||
- [tests/test_auth.py:43](file://tests/test_auth.py#L43)
|
||||
|
||||
请求示例
|
||||
- POST /api/v1/auth/register
|
||||
- 请求体JSON:
|
||||
- email: "string"
|
||||
- password: "string(≥8)"
|
||||
- name: "string(1-100)"
|
||||
|
||||
响应示例
|
||||
- 201 Created
|
||||
- 响应体JSON:
|
||||
- id: "uuid"
|
||||
- email: "string"
|
||||
- name: "string|null"
|
||||
- plan: "string"
|
||||
- max_queries: integer
|
||||
- is_active: boolean
|
||||
- created_at: "datetime"
|
||||
|
||||
状态码
|
||||
- 201 Created:注册成功
|
||||
- 400 Bad Request:邮箱已注册
|
||||
|
||||
错误处理
|
||||
- 当服务层抛出ValueError(如邮箱重复),路由捕获并返回400
|
||||
|
||||
### 用户登录接口 /api/v1/auth/login
|
||||
- 方法与路径:POST /api/v1/auth/login
|
||||
- 请求体模型:UserLogin
|
||||
- 字段:email(邮箱)、password(字符串)
|
||||
- 响应模型:TokenResponse
|
||||
- 字段:access_token(字符串)、token_type(字符串,固定为"bearer")、user(UserResponse)
|
||||
- 成功响应:200 OK
|
||||
- 错误响应:
|
||||
- 401 Unauthorized:邮箱或密码不正确,携带WWW-Authenticate: Bearer头
|
||||
- 处理流程:
|
||||
- 使用邮箱查询用户
|
||||
- 校验密码哈希
|
||||
- 生成JWT令牌(含过期时间),返回access_token与用户信息
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant 客户端 as "客户端"
|
||||
participant 路由 as "登录路由"
|
||||
participant 服务 as "认证服务"
|
||||
participant 数据库 as "数据库"
|
||||
客户端->>路由 : POST /api/v1/auth/login
|
||||
路由->>服务 : authenticate_user(邮箱, 密码)
|
||||
服务->>数据库 : 查询用户
|
||||
数据库-->>服务 : 用户或None
|
||||
服务->>服务 : 校验密码
|
||||
alt 用户不存在或密码错误
|
||||
服务-->>路由 : None
|
||||
路由-->>客户端 : 401 Unauthorized
|
||||
else 登录成功
|
||||
服务-->>路由 : User对象
|
||||
路由->>服务 : create_access_token({sub : userId})
|
||||
服务-->>路由 : access_token
|
||||
路由-->>客户端 : 200 OK + TokenResponse
|
||||
end
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/auth.py:22](file://backend/app/api/auth.py#L22)
|
||||
- [backend/app/api/auth.py:24](file://backend/app/api/auth.py#L24)
|
||||
- [backend/app/services/auth.py:55](file://backend/app/services/auth.py#L55)
|
||||
- [backend/app/services/auth.py:24](file://backend/app/services/auth.py#L24)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:22](file://backend/app/api/auth.py#L22)
|
||||
- [backend/app/api/auth.py:24](file://backend/app/api/auth.py#L24)
|
||||
- [backend/app/schemas/auth.py:13](file://backend/app/schemas/auth.py#L13)
|
||||
- [backend/app/schemas/auth.py:30](file://backend/app/schemas/auth.py#L30)
|
||||
- [backend/app/services/auth.py:55](file://backend/app/services/auth.py#L55)
|
||||
- [backend/app/services/auth.py:24](file://backend/app/services/auth.py#L24)
|
||||
- [tests/test_auth.py:62](file://tests/test_auth.py#L62)
|
||||
- [tests/test_auth.py:76-84](file://tests/test_auth.py#L76-L84)
|
||||
|
||||
请求示例
|
||||
- POST /api/v1/auth/login
|
||||
- 请求体JSON:
|
||||
- email: "string"
|
||||
- password: "string"
|
||||
|
||||
响应示例
|
||||
- 200 OK
|
||||
- 响应体JSON:
|
||||
- access_token: "string"
|
||||
- token_type: "bearer"
|
||||
- user: {
|
||||
id: "uuid"
|
||||
email: "string"
|
||||
name: "string|null"
|
||||
plan: "string"
|
||||
max_queries: integer
|
||||
is_active: boolean
|
||||
created_at: "datetime"
|
||||
}
|
||||
|
||||
状态码
|
||||
- 200 OK:登录成功
|
||||
- 401 Unauthorized:邮箱或密码不正确
|
||||
|
||||
安全考虑
|
||||
- 密码使用BCrypt哈希存储
|
||||
- JWT使用HS256算法与密钥签名,过期时间由配置控制
|
||||
- 登录失败返回统一错误消息,避免泄露账户存在性细节
|
||||
|
||||
### 获取当前用户接口 /api/v1/auth/me
|
||||
- 方法与路径:GET /api/v1/auth/me
|
||||
- 权限要求:需要Bearer令牌(Authorization: Bearer <token>)
|
||||
- 依赖注入:get_current_user
|
||||
- 通过OAuth2PasswordBearer从Authorization头提取令牌
|
||||
- 校验JWT并解析sub(用户ID)
|
||||
- 从数据库按ID查询用户并返回
|
||||
- 响应模型:UserResponse
|
||||
- 成功响应:200 OK
|
||||
- 错误响应:
|
||||
- 401 Unauthorized:令牌无效、过期或用户不存在
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant 客户端 as "客户端"
|
||||
participant 依赖 as "get_current_user"
|
||||
participant 服务 as "verify_token"
|
||||
participant 数据库 as "数据库"
|
||||
客户端->>依赖 : GET /api/v1/auth/me (Authorization : Bearer)
|
||||
依赖->>依赖 : 从Authorization头提取token
|
||||
依赖->>服务 : verify_token(token)
|
||||
服务-->>依赖 : payload(sub=userId)
|
||||
依赖->>数据库 : 查询用户by id
|
||||
数据库-->>依赖 : 用户或None
|
||||
alt 令牌无效/用户不存在
|
||||
依赖-->>客户端 : 401 Unauthorized
|
||||
else 成功
|
||||
依赖-->>客户端 : 200 OK + UserResponse
|
||||
end
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/deps.py:16](file://backend/app/api/deps.py#L16)
|
||||
- [backend/app/api/deps.py:27](file://backend/app/api/deps.py#L27)
|
||||
- [backend/app/services/auth.py:32](file://backend/app/services/auth.py#L32)
|
||||
- [backend/app/api/auth.py:40](file://backend/app/api/auth.py#L40)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:40](file://backend/app/api/auth.py#L40)
|
||||
- [backend/app/api/deps.py:16](file://backend/app/api/deps.py#L16)
|
||||
- [backend/app/api/deps.py:27](file://backend/app/api/deps.py#L27)
|
||||
- [backend/app/services/auth.py:32](file://backend/app/services/auth.py#L32)
|
||||
- [tests/test_auth.py:88](file://tests/test_auth.py#L88)
|
||||
- [tests/test_auth.py:99](file://tests/test_auth.py#L99)
|
||||
|
||||
请求示例
|
||||
- GET /api/v1/auth/me
|
||||
- 请求头:
|
||||
- Authorization: "Bearer <access_token>"
|
||||
|
||||
响应示例
|
||||
- 200 OK
|
||||
- 响应体JSON:UserResponse
|
||||
|
||||
状态码
|
||||
- 200 OK:成功获取当前用户
|
||||
- 401 Unauthorized:未提供有效令牌或令牌无效
|
||||
|
||||
## 依赖分析
|
||||
- 外部库依赖:FastAPI、SQLAlchemy异步、Pydantic、python-jose、passlib、redis、apscheduler、playwright等
|
||||
- 认证相关依赖:OAuth2PasswordBearer、JWT编码/解码、BCrypt密码哈希
|
||||
- 配置项:JWT_SECRET、JWT_EXPIRE_HOURS、DATABASE_URL、REDIS_URL
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
A["FastAPI 应用"] --> B["认证路由"]
|
||||
B --> C["认证服务"]
|
||||
C --> D["Passlib(Bcrypt)"]
|
||||
C --> E["python-jose(JWT)"]
|
||||
C --> F["SQLAlchemy 异步"]
|
||||
F --> G["PostgreSQL"]
|
||||
A --> H["CORS 中间件"]
|
||||
A --> I["依赖注入容器"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/requirements.txt:2](file://backend/requirements.txt#L2)
|
||||
- [backend/requirements.txt:16](file://backend/requirements.txt#L16)
|
||||
- [backend/requirements.txt:17](file://backend/requirements.txt#L17)
|
||||
- [backend/app/main.py:30](file://backend/app/main.py#L30)
|
||||
- [backend/app/api/deps.py:13](file://backend/app/api/deps.py#L13)
|
||||
- [backend/app/services/auth.py:13](file://backend/app/services/auth.py#L13)
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:2](file://backend/requirements.txt#L2)
|
||||
- [backend/requirements.txt:16](file://backend/requirements.txt#L16)
|
||||
- [backend/requirements.txt:17](file://backend/requirements.txt#L17)
|
||||
- [backend/app/main.py:30](file://backend/app/main.py#L30)
|
||||
- [backend/app/api/deps.py:13](file://backend/app/api/deps.py#L13)
|
||||
- [backend/app/services/auth.py:13](file://backend/app/services/auth.py#L13)
|
||||
|
||||
## 性能考虑
|
||||
- 数据库访问:注册与登录均执行单次查询,使用异步会话减少阻塞
|
||||
- 密码哈希:BCrypt成本因子默认设置,平衡安全性与性能
|
||||
- JWT过期:可通过配置调整过期时间,建议根据业务场景权衡
|
||||
- 缓存策略:当前未对用户信息做缓存,可在高频读取场景引入Redis缓存
|
||||
|
||||
## 故障排除指南
|
||||
- 注册失败(400):确认邮箱唯一性;检查请求体字段类型与长度
|
||||
- 登录失败(401):确认邮箱与密码正确;检查JWT密钥与过期时间配置
|
||||
- 获取当前用户失败(401):确认Authorization头格式为Bearer <token>;检查令牌是否过期
|
||||
- 数据库连接问题:检查DATABASE_URL配置;确保PostgreSQL服务可用
|
||||
- CORS跨域问题:确认前端域名已在CORS允许列表中
|
||||
|
||||
章节来源
|
||||
- [tests/test_auth.py:43](file://tests/test_auth.py#L43)
|
||||
- [tests/test_auth.py:76-84](file://tests/test_auth.py#L76-L84)
|
||||
- [tests/test_auth.py:99](file://tests/test_auth.py#L99)
|
||||
- [backend/app/config.py:7](file://backend/app/config.py#L7)
|
||||
- [backend/app/main.py:30](file://backend/app/main.py#L30)
|
||||
|
||||
## 结论
|
||||
认证系统基于FastAPI与SQLAlchemy异步ORM构建,采用OAuth2 Bearer令牌与JWT实现无状态认证。注册与登录流程清晰,错误处理明确,具备良好的扩展性与安全性基础。建议在生产环境完善令牌刷新策略、速率限制与审计日志,并定期轮换JWT密钥。
|
||||
|
||||
## 附录
|
||||
|
||||
### JWT令牌生成机制
|
||||
- 签名算法:HS256
|
||||
- 过期时间:由配置JWT_EXPIRE_HOURS决定
|
||||
- 载荷:包含sub(用户ID)与exp(过期时间)
|
||||
- 生成流程:在登录成功后调用create_access_token(data={"sub": str(user.id)})
|
||||
|
||||
章节来源
|
||||
- [backend/app/services/auth.py:24](file://backend/app/services/auth.py#L24)
|
||||
- [backend/app/services/auth.py:26](file://backend/app/services/auth.py#L26)
|
||||
- [backend/app/config.py:10](file://backend/app/config.py#L10)
|
||||
|
||||
### 认证中间件使用指南与最佳实践
|
||||
- 在需要保护的路由上使用依赖注入:Depends(get_current_user)
|
||||
- 前端在每次请求中携带Authorization: Bearer <access_token>
|
||||
- 生产环境务必设置安全的JWT_SECRET并启用HTTPS
|
||||
- 建议实现令牌刷新与登出机制,避免长期持有高权限令牌
|
||||
- 对登录失败与敏感操作增加速率限制与日志审计
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/deps.py:16](file://backend/app/api/deps.py#L16)
|
||||
- [backend/app/api/auth.py:40](file://backend/app/api/auth.py#L40)
|
||||
- [backend/app/config.py:9](file://backend/app/config.py#L9)
|
||||
|
|
@ -0,0 +1,485 @@
|
|||
# 任务调度系统
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/platforms/kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [backend/app/workers/platforms/wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
- [backend/app/workers/platforms/base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/models/query_task.py](file://backend/app/models/query_task.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [backend/app/services/query.py](file://backend/app/services/query.py)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [tests/test_scheduler.py](file://tests/test_scheduler.py)
|
||||
- [tests/test_queries.py](file://tests/test_queries.py)
|
||||
</cite>
|
||||
|
||||
## 更新摘要
|
||||
**所做更改**
|
||||
- 新增了遗留任务检查机制的详细说明,包括每分钟检查 pending 任务的兜底逻辑
|
||||
- 完善了调度器测试用例的文档,包括启动/关闭测试、查询筛选测试和频率计算测试
|
||||
- 增强了性能优化策略部分,增加了遗留任务处理和资源管理的说明
|
||||
- 更新了故障排查指南,增加了遗留任务状态异常的处理方法
|
||||
- 完善了调度器设计的详细分析,包括双调度器模式和事件循环兼容性
|
||||
|
||||
## 目录
|
||||
1. [引言](#引言)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 引言
|
||||
本文件面向任务调度系统的技术与非技术读者,系统性阐述基于 APscheduler 的异步任务调度架构,涵盖调度器配置、任务队列管理、并发控制机制;详述查询任务的生命周期(创建、状态跟踪、执行监控、错误恢复);文档化异步任务处理流程(分发、优先级与资源管理);给出性能优化策略、监控指标与故障处理机制;并提供配置项、扩展方法与调试技巧。
|
||||
|
||||
**更新** 本次更新完善了调度器设计细节,新增了遗留任务检查机制和详细的测试用例说明。
|
||||
|
||||
## 项目结构
|
||||
后端采用 FastAPI + SQLAlchemy Async 架构,调度系统位于 workers 子模块,围绕 Query 模型驱动周期性查询任务,通过 CitationEngine 统一执行平台适配器(Kimi、文心一言),并将结果持久化为 CitationRecord,同时维护 QueryTask 任务状态。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "应用入口"
|
||||
MAIN["app/main.py<br/>生命周期管理"]
|
||||
END
|
||||
subgraph "调度层"
|
||||
SCHED["workers/scheduler.py<br/>QueryScheduler<br/>双调度器模式"]
|
||||
END
|
||||
subgraph "业务逻辑"
|
||||
CE["workers/citation_engine.py<br/>CitationEngine"]
|
||||
SVC["services/query.py<br/>查询服务"]
|
||||
END
|
||||
subgraph "模型与存储"
|
||||
Q["models/query.py<br/>查询模型"]
|
||||
QT["models/query_task.py<br/>任务模型"]
|
||||
CR["models/citation_record.py<br/>引用记录模型"]
|
||||
DB["database.py<br/>异步会话"]
|
||||
END
|
||||
subgraph "平台适配"
|
||||
BASE["workers/platforms/base.py<br/>适配器基类"]
|
||||
KIMI["workers/platforms/kimi.py<br/>Kimi适配器"]
|
||||
WENXIN["workers/platforms/wenxin.py<br/>文心一言适配器"]
|
||||
END
|
||||
MAIN --> SCHED
|
||||
SCHED --> CE
|
||||
CE --> KIMI
|
||||
CE --> WENXIN
|
||||
CE --> Q
|
||||
CE --> QT
|
||||
CE --> CR
|
||||
SVC --> Q
|
||||
SVC --> DB
|
||||
SCHED --> DB
|
||||
CE --> DB
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/services/query.py:12-130](file://backend/app/services/query.py#L12-L130)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/workers/platforms/base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/services/query.py:12-130](file://backend/app/services/query.py#L12-L130)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/workers/platforms/base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
|
||||
## 核心组件
|
||||
- 调度器:基于 APscheduler 的 AsyncIOScheduler,采用双调度器模式,定时扫描待执行查询并触发执行,同时每分钟检查遗留的 pending 任务。
|
||||
- 引擎:CitationEngine 负责跨平台查询、品牌匹配、竞争品牌检测、任务状态更新与结果落库。
|
||||
- 平台适配器:KimiAdapter、WenxinAdapter 基于 Playwright 实现网页交互与响应抽取。
|
||||
- 数据模型:Query、QueryTask、CitationRecord 支撑任务生命周期与结果存储。
|
||||
- 服务与API:查询服务与查询 API 路由负责用户侧的查询管理与频率控制。
|
||||
- 数据库:SQLAlchemy Async Engine + Session,统一事务与连接管理。
|
||||
|
||||
**更新** 新增了遗留任务检查机制,通过双调度器模式提高系统的容错性和可靠性。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/platforms/kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/services/query.py:12-130](file://backend/app/services/query.py#L12-L130)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
|
||||
## 架构总览
|
||||
调度系统以"定时扫描 + 异步执行 + 平台适配 + 结果落库"为主线,通过 Query 的状态与时间字段驱动执行节奏,QueryTask 记录每次平台执行的状态,CitationRecord 记录最终检测结果。新增的遗留任务检查机制提供了额外的容错保护。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Timer as "调度器<br/>AsyncIOScheduler"
|
||||
participant Scheduler as "QueryScheduler"
|
||||
participant DB as "数据库<br/>AsyncSession"
|
||||
participant Engine as "CitationEngine"
|
||||
participant Platform as "平台适配器<br/>Kimi/Wenxin"
|
||||
Timer->>Scheduler : "每小时触发"
|
||||
Scheduler->>DB : "查询 active 且 next_query_at<=now 的 Query"
|
||||
Scheduler->>Engine : "逐条执行 execute_query(query)"
|
||||
Engine->>DB : "获取/创建 QueryTask 并置为 running"
|
||||
Engine->>Platform : "调用 query(keyword)"
|
||||
Platform-->>Engine : "返回原始响应文本"
|
||||
Engine->>Engine : "品牌匹配/竞争品牌检测"
|
||||
Engine->>DB : "写入 CitationRecord"
|
||||
Engine->>DB : "更新 QueryTask 为 success/fail"
|
||||
Engine->>DB : "更新 Query 的 last_queried_at/next_query_at"
|
||||
Engine-->>Scheduler : "返回本次批次记录"
|
||||
Note over Timer,Scheduler : 额外的遗留任务检查
|
||||
Timer->>Scheduler : "每分钟触发"
|
||||
Scheduler->>DB : "查询 pending 且 scheduled_at<=1分钟前的 QueryTask"
|
||||
Scheduler->>Engine : "重新执行遗留任务"
|
||||
Engine->>DB : "更新 QueryTask 状态并写入结果"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/scheduler.py:30-90](file://backend/app/workers/scheduler.py#L30-L90)
|
||||
- [backend/app/workers/scheduler.py:95-172](file://backend/app/workers/scheduler.py#L95-L172)
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [backend/app/models/query.py:24-31](file://backend/app/models/query.py#L24-L31)
|
||||
- [backend/app/models/query_task.py:24-32](file://backend/app/models/query_task.py#L24-L32)
|
||||
- [backend/app/models/citation_record.py:24-29](file://backend/app/models/citation_record.py#L24-L29)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 调度器:QueryScheduler
|
||||
- 启动与注册:使用 AsyncIOScheduler 注册两个定时任务,每小时检查到期查询任务,每分钟检查遗留的 pending 任务,replace_existing=true 确保重复启动不冲突。
|
||||
- 事件循环兼容:_run_check 和 _run_pending_tasks_check 分别封装同步包装,优先获取运行中事件循环,否则使用 asyncio.run 启动新事件循环,保证在不同运行环境下均可执行。
|
||||
- 主要扫描与执行:check_and_execute_queries 异步查询数据库,筛选 active 且 next_query_at 已到达的 Query,逐条调用 _execute_single_query。
|
||||
- 遗留任务检查:check_and_execute_pending_tasks 兜底机制,处理超过1分钟仍未执行的 pending 任务,按 query_id 分组并重新执行。
|
||||
- 错误处理:对单条查询异常进行日志记录并继续下一条,避免单点故障影响整体扫描;遗留任务执行失败时记录错误信息并标记为 failed。
|
||||
- 关闭流程:shutdown 调用 scheduler.shutdown(wait=False) 与 engine.close(),确保资源释放。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["启动调度器"]) --> AddJobs["注册两个定时任务<br/>每小时检查到期任务<br/>每分钟检查遗留任务"]
|
||||
AddJobs --> StartSched["启动 AsyncIOScheduler"]
|
||||
StartSched --> HourlyLoop["每小时触发"]
|
||||
HourlyLoop --> Scan["查询数据库<br/>筛选到期的 Query"]
|
||||
Scan --> HasQ{"是否有待执行查询?"}
|
||||
HasQ -- 否 --> MinuteLoop["等待下一分钟"]
|
||||
HasQ -- 是 --> ExecOne["逐条执行 _execute_single_query"]
|
||||
ExecOne --> NextQ["继续下一条"]
|
||||
NextQ --> HasQ
|
||||
MinuteLoop --> PendingCheck["每分钟检查<br/>遗留的 pending 任务"]
|
||||
PendingCheck --> HasPending{"是否有遗留任务?"}
|
||||
HasPending -- 否 --> HourlyLoop
|
||||
HasPending -- 是 --> ReExec["重新执行遗留任务"]
|
||||
ReExec --> UpdateStatus["更新任务状态并写入结果"]
|
||||
UpdateStatus --> HasPending
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/scheduler.py:30-90](file://backend/app/workers/scheduler.py#L30-L90)
|
||||
- [backend/app/workers/scheduler.py:95-172](file://backend/app/workers/scheduler.py#L95-L172)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/scheduler.py:95-172](file://backend/app/workers/scheduler.py#L95-L172)
|
||||
|
||||
### 引擎:CitationEngine
|
||||
- 单查询执行:execute_query 接收 Query 与 AsyncSession,创建 BrandMatcher,遍历 Query.platforms,逐平台执行。
|
||||
- 任务状态管理:_get_or_create_task 获取或创建 QueryTask,执行前置为 running,成功置为 success,失败置为 failed 并记录错误信息。
|
||||
- 结果落库:构造 CitationRecord 写入数据库,包含 cited、confidence、position、citation_text、competitor_brands、raw_response 等字段。
|
||||
- 时间推进:执行完成后更新 Query.last_queried_at 与 next_query_at,next_query_at 基于 frequency 映射为天数增量。
|
||||
- 平台适配:execute_single_platform 通过平台映射调用对应 Adapter.query,再进行品牌匹配与竞争品牌检测。
|
||||
- 资源关闭:close 遍历适配器并逐一关闭,捕获异常仅告警。
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class CitationEngine {
|
||||
+execute_query(query, db) CitationRecord[]
|
||||
+execute_single_platform(keyword, platform, target_brand, brand_aliases) dict
|
||||
+close() void
|
||||
-_get_or_create_task(db, query_id, platform) QueryTask
|
||||
-_calculate_next_query_at(frequency) datetime
|
||||
}
|
||||
class BrandMatcher {
|
||||
+match(text) dict
|
||||
-_extract_candidates(text) list
|
||||
-_extract_position_and_context(text, keyword) tuple
|
||||
}
|
||||
class CompetitorDetector {
|
||||
+detect(text, target_brand) list
|
||||
}
|
||||
CitationEngine --> BrandMatcher : "使用"
|
||||
CitationEngine --> CompetitorDetector : "使用"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/citation_engine.py:19-120](file://backend/app/workers/citation_engine.py#L19-L120)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
|
||||
### 平台适配器:KimiAdapter 与 WenxinAdapter
|
||||
- 基类约束:BasePlatformAdapter 定义 platform_name、platform_url 与抽象 query 方法,close 可选。
|
||||
- 浏览器生命周期:_ensure_browser 确保 Playwright 与 Chromium 启动,若未安装则抛出可读错误提示。
|
||||
- 查询流程:query 带重试(最多3次,指数退避),_do_query 完成页面导航、输入关键词、提交、等待回复稳定。
|
||||
- 回复稳定检测:_wait_for_response_stable 检测消息容器,连续多次文本一致视为稳定,超时返回当前文本。
|
||||
- 资源回收:close 关闭 browser 与 playwright,避免资源泄漏。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant CE as "CitationEngine"
|
||||
participant AD as "Kimi/WenxinAdapter"
|
||||
participant PW as "Playwright"
|
||||
participant PG as "目标页面"
|
||||
CE->>AD : "query(keyword)"
|
||||
AD->>AD : "_ensure_browser()"
|
||||
AD->>PW : "启动/获取浏览器"
|
||||
AD->>PG : "goto(platform_url)"
|
||||
AD->>PG : "查找输入框/填入关键词/提交"
|
||||
AD->>AD : "_wait_for_response_stable()"
|
||||
AD-->>CE : "返回原始响应文本"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/platforms/kimi.py:33-125](file://backend/app/workers/platforms/kimi.py#L33-L125)
|
||||
- [backend/app/workers/platforms/wenxin.py:33-124](file://backend/app/workers/platforms/wenxin.py#L33-L124)
|
||||
- [backend/app/workers/platforms/base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
- [backend/app/workers/platforms/base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
|
||||
### 数据模型与任务队列
|
||||
- Query:用户维度的查询任务,包含关键词、目标品牌、别名、平台列表、频率、状态、时间戳等;索引覆盖 user_id、status、next_query_at。
|
||||
- QueryTask:单次查询在各平台上的执行记录,状态包括 pending/running/success/failed,带 scheduled_at/started_at/completed_at。
|
||||
- CitationRecord:每次平台查询的结果记录,包含 cited、confidence、position、citation_text、competitor_brands、raw_response、queried_at。
|
||||
- 关系:Query 一对多关联 CitationRecord 与 QueryTask;QueryTask 外键级联删除。
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
QUERIES {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
timestamp last_queried_at
|
||||
timestamp next_query_at
|
||||
timestamp created_at
|
||||
timestamp updated_at
|
||||
}
|
||||
QUERY_TASKS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
string status
|
||||
text error_message
|
||||
timestamp scheduled_at
|
||||
timestamp started_at
|
||||
timestamp completed_at
|
||||
}
|
||||
CITATION_RECORDS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
boolean cited
|
||||
integer citation_position
|
||||
text citation_text
|
||||
jsonb competitor_brands
|
||||
text raw_response
|
||||
timestamp queried_at
|
||||
}
|
||||
QUERIES ||--o{ QUERY_TASKS : "包含"
|
||||
QUERIES ||--o{ CITATION_RECORDS : "产生"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
||||
### 查询服务与API
|
||||
- 服务层:提供查询的增删改查、数量限制校验、频率变更时 next_query_at 重新计算。
|
||||
- API 层:提供查询列表、创建、获取、更新、删除接口,配合权限与分页参数。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/services/query.py:12-130](file://backend/app/services/query.py#L12-L130)
|
||||
- [backend/app/api/queries.py:15-86](file://backend/app/api/queries.py#L15-L86)
|
||||
|
||||
## 依赖分析
|
||||
- 组件耦合
|
||||
- QueryScheduler 依赖 CitationEngine、数据库会话与 Query 模型。
|
||||
- CitationEngine 依赖 Query、QueryTask、CitationRecord、平台适配器。
|
||||
- 平台适配器依赖 Playwright,受环境与网络影响较大。
|
||||
- 外部依赖
|
||||
- APscheduler:异步调度框架。
|
||||
- SQLAlchemy Async:异步 ORM。
|
||||
- Playwright:浏览器自动化。
|
||||
- 潜在环路
|
||||
- 当前模块间为单向依赖,无明显循环导入。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
S["scheduler.py"] --> E["citation_engine.py"]
|
||||
E --> P1["kimi.py"]
|
||||
E --> P2["wenxin.py"]
|
||||
E --> M1["query.py"]
|
||||
E --> M2["query_task.py"]
|
||||
E --> M3["citation_record.py"]
|
||||
S --> D["database.py"]
|
||||
E --> D
|
||||
API["api/queries.py"] --> SVC["services/query.py"]
|
||||
SVC --> D
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/platforms/kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/api/queries.py:15-86](file://backend/app/api/queries.py#L15-L86)
|
||||
- [backend/app/services/query.py:12-130](file://backend/app/services/query.py#L12-L130)
|
||||
|
||||
## 性能考虑
|
||||
- 调度频率与并发
|
||||
- 当前调度器采用双调度器模式:每小时扫描到期查询,每分钟检查遗留任务,适合低至中等并发场景;如需更高吞吐,可考虑缩短周期或引入多进程/多实例。
|
||||
- 数据库访问
|
||||
- 扫描查询使用 UTC 时间比较,建议在数据库层面为 next_query_at 建立高效索引,减少全表扫描。
|
||||
- 异步执行
|
||||
- CitationEngine 逐平台串行执行,平台间可并行化(例如 asyncio.gather),但需注意平台限流与资源占用。
|
||||
- 平台稳定性
|
||||
- 平台适配器内置重试与等待稳定机制,建议结合指数退避与超时上限,避免长时间阻塞。
|
||||
- 资源管理
|
||||
- 浏览器与 Playwright 生命周期严格管理,关闭时序正确,避免内存与句柄泄漏。
|
||||
- 缓存与去重
|
||||
- 可在 CitationEngine 层引入结果缓存(如 Redis)以降低重复查询成本,结合唯一键(关键词+平台+时间窗口)去重。
|
||||
- 遗留任务处理
|
||||
- 新增的每分钟遗留任务检查机制提供了额外的容错保护,确保即使主调度器出现问题,任务仍能在合理时间内得到执行。
|
||||
|
||||
**更新** 新增了遗留任务处理机制的性能考虑,提高了系统的整体可靠性。
|
||||
|
||||
## 故障排查指南
|
||||
- 调度器未启动
|
||||
- 检查 lifespan 中是否调用 start(),以及是否在生产环境正确部署。
|
||||
- 查询未被执行
|
||||
- 核查 Query.status 是否为 active,next_query_at 是否已到达;确认数据库时区与 UTC 一致性。
|
||||
- 遗留任务异常
|
||||
- 检查 QueryTask 状态是否长期为 pending,确认每分钟遗留任务检查机制是否正常工作;查看日志中遗留任务重新执行的记录。
|
||||
- 平台适配器异常
|
||||
- Playwright 未安装:参考适配器错误提示运行安装命令;网络超时:调整等待稳定阈值与超时参数。
|
||||
- 任务状态异常
|
||||
- QueryTask 状态长期为 running:检查数据库事务提交与异常捕获路径,确保异常分支也能更新状态。
|
||||
- 结果缺失
|
||||
- 确认 CitationRecord 写入逻辑与 QueryTask 成功分支;失败分支也会写入一条 cited=False 的记录作为占位。
|
||||
|
||||
**更新** 新增了遗留任务相关的故障排查指导。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/scheduler.py:42-90](file://backend/app/workers/scheduler.py#L42-L90)
|
||||
- [backend/app/workers/scheduler.py:95-172](file://backend/app/workers/scheduler.py#L95-L172)
|
||||
- [backend/app/workers/citation_engine.py:175-234](file://backend/app/workers/citation_engine.py#L175-L234)
|
||||
- [backend/app/workers/platforms/kimi.py:21-48](file://backend/app/workers/platforms/kimi.py#L21-L48)
|
||||
- [backend/app/workers/platforms/wenxin.py:21-48](file://backend/app/workers/platforms/wenxin.py#L21-L48)
|
||||
|
||||
## 结论
|
||||
该调度系统以轻量、清晰的模块划分实现了"定时扫描 + 异步执行 + 平台适配 + 结果落库"的完整闭环。通过 Query/QueryTask/CitationRecord 的三层状态与数据模型,系统具备良好的可观测性与可扩展性。新增的双调度器模式和遗留任务检查机制进一步提高了系统的可靠性和容错能力。建议在高并发场景下引入并行化与缓存策略,并持续完善监控与告警体系。
|
||||
|
||||
**更新** 本次更新完善了调度器设计细节,增强了系统的容错性和可靠性。
|
||||
|
||||
## 附录
|
||||
|
||||
### 配置选项
|
||||
- 数据库连接:DATABASE_URL(来自配置类 Settings)
|
||||
- 日志与中间件:FastAPI CORS 配置(允许本地前端跨域)
|
||||
- 运行时生命周期:lifespan 在应用启动时启动调度器,在关闭时优雅退出
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/config.py:7-14](file://backend/app/config.py#L7-L14)
|
||||
- [backend/app/main.py:24-42](file://backend/app/main.py#L24-L42)
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
|
||||
### 扩展方法
|
||||
- 新增平台适配器
|
||||
- 继承 BasePlatformAdapter,实现 query 与可选 close;在 CitationEngine.platforms 映射中注册。
|
||||
- 调整调度策略
|
||||
- 修改调度器触发间隔或引入多调度器实例;在 Query 上增加优先级字段以实现差异化执行。
|
||||
- 结果聚合与报表
|
||||
- 基于 CitationRecord 与 QueryTask 构建统计视图,输出趋势与失败率报表。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
- [backend/app/workers/citation_engine.py:152-157](file://backend/app/workers/citation_engine.py#L152-L157)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
|
||||
### 调试技巧
|
||||
- 启用数据库回显:在数据库引擎创建时开启 echo(当前为关闭,便于生产环境降噪)
|
||||
- 逐步验证:先验证调度器扫描逻辑,再验证单平台适配器,最后验证 CitationEngine 整体流程
|
||||
- 单元测试:利用测试夹具模拟 Query 对象,验证 API 与服务层行为
|
||||
- 调度器测试:使用专门的测试用例验证调度器启动/关闭、查询筛选和频率计算功能
|
||||
|
||||
**更新** 新增了调度器测试相关的调试技巧。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/database.py:6-10](file://backend/app/database.py#L6-L10)
|
||||
- [tests/test_queries.py:10-154](file://tests/test_queries.py#L10-L154)
|
||||
- [tests/test_scheduler.py:17-123](file://tests/test_scheduler.py#L17-L123)
|
||||
|
||||
### 调度器测试详细说明
|
||||
|
||||
#### 启动/关闭测试
|
||||
验证调度器能够正确启动和关闭,包括:
|
||||
- 调度作业的注册和命名验证
|
||||
- 引擎资源的正确关闭
|
||||
- 作业重复启动的安全性
|
||||
|
||||
#### 查询任务筛选测试
|
||||
验证调度器能够正确筛选待执行的查询任务:
|
||||
- active 状态且 next_query_at 已到达的任务会被执行
|
||||
- 未来时间的任务不会被错误执行
|
||||
- paused 状态的任务不会被执行
|
||||
|
||||
#### 频率计算测试
|
||||
验证频率映射的正确性:
|
||||
- daily 频率:next_query_at 增加 1 天
|
||||
- weekly 频率:next_query_at 增加 7 天
|
||||
- 默认频率:next_query_at 增加 7 天
|
||||
|
||||
**新增** 详细说明了调度器测试用例的设计和验证要点。
|
||||
|
||||
**章节来源**
|
||||
- [tests/test_scheduler.py:17-123](file://tests/test_scheduler.py#L17-L123)
|
||||
|
|
@ -0,0 +1,378 @@
|
|||
# 性能优化
|
||||
|
||||
<cite>
|
||||
**本文引用的文件**
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/platforms/base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [backend/app/workers/platforms/kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [backend/app/workers/platforms/wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/services/query.py](file://backend/app/services/query.py)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件聚焦于调度系统的性能优化,围绕并发控制、资源管理、内存优化、异步任务并发限制、数据库连接池配置、事件循环优化、调度频率调优、批量处理策略与缓存机制进行系统化梳理,并提供性能监控指标、基准测试方法与分析工具使用建议。同时给出高负载下的稳定性与响应性保障策略及实际优化案例与配置参数调整建议。
|
||||
|
||||
## 项目结构
|
||||
后端采用 FastAPI + SQLAlchemy Async + APScheduler 异步调度的架构。前端通过 Next.js 提供可视化界面,后端通过 Docker Compose 统一编排数据库与缓存服务。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "后端"
|
||||
A["FastAPI 应用<br/>生命周期管理"]
|
||||
B["调度器<br/>APScheduler AsyncIOScheduler"]
|
||||
C["引用检测引擎<br/>CitationEngine"]
|
||||
D["平台适配器<br/>Kimi/Wenxin"]
|
||||
E["数据库<br/>SQLAlchemy Async"]
|
||||
F["配置<br/>Settings"]
|
||||
end
|
||||
subgraph "外部服务"
|
||||
G["PostgreSQL"]
|
||||
H["Redis"]
|
||||
end
|
||||
A --> B
|
||||
B --> C
|
||||
C --> D
|
||||
C --> E
|
||||
A --> E
|
||||
F --> A
|
||||
F --> E
|
||||
A --> H
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/workers/scheduler.py:25-40](file://backend/app/workers/scheduler.py#L25-L40)
|
||||
- [backend/app/workers/citation_engine.py:148-157](file://backend/app/workers/citation_engine.py#L148-L157)
|
||||
- [backend/app/config.py:4-16](file://backend/app/config.py#L4-L16)
|
||||
- [backend/app/database.py:6-18](file://backend/app/database.py#L6-L18)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/config.py:4-16](file://backend/app/config.py#L4-L16)
|
||||
- [backend/app/database.py:6-18](file://backend/app/database.py#L6-L18)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
## 核心组件
|
||||
- 调度器:基于 APScheduler 的 AsyncIOScheduler,按小时扫描到期查询并异步执行。
|
||||
- 引用检测引擎:负责跨平台查询、品牌匹配、竞争品牌检测与结果持久化。
|
||||
- 平台适配器:Kimi 与 Wenxin 的 Playwright 自动化适配器,带指数退避与超时控制。
|
||||
- 数据库:SQLAlchemy Async Engine + AsyncSessionMaker,支持异步事务与连接复用。
|
||||
- 配置:统一读取环境变量,包含数据库、缓存、密钥等关键参数。
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/platforms/kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
- [backend/app/database.py:6-29](file://backend/app/database.py#L6-L29)
|
||||
- [backend/app/config.py:4-16](file://backend/app/config.py#L4-L16)
|
||||
|
||||
## 架构总览
|
||||
调度器在应用生命周期内启动,周期性扫描待执行查询;对每个查询,引擎创建任务记录、逐平台执行查询与检测,并更新查询时间字段;数据库连接由异步会话管理;平台适配器通过 Playwright 控制浏览器,具备重试与稳定文本检测逻辑。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant S as "调度器"
|
||||
participant DB as "数据库"
|
||||
participant CE as "引用检测引擎"
|
||||
participant PA as "平台适配器"
|
||||
participant PG as "PostgreSQL"
|
||||
S->>DB : 查询 active 且 next_query_at 到期的查询
|
||||
DB-->>S : 返回查询列表
|
||||
loop 针对每个查询
|
||||
S->>CE : 执行查询
|
||||
CE->>DB : 创建/刷新 QueryTask
|
||||
CE->>PA : 平台查询
|
||||
PA-->>CE : 返回原始响应
|
||||
CE->>DB : 写入 CitationRecord
|
||||
CE->>DB : 更新 Query 时间字段
|
||||
end
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 调度器与事件循环
|
||||
- 触发频率:每小时一次,避免过于频繁的轮询造成数据库压力。
|
||||
- 事件循环:若无运行中事件循环则使用新事件循环执行;否则在现有事件循环中创建任务,降低阻塞风险。
|
||||
- 并发策略:当前为串行遍历查询并逐个执行,未引入全局并发限制,存在潜在的平台适配器并发风暴风险。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["启动调度器"]) --> AddJob["注册每小时任务"]
|
||||
AddJob --> Loop["事件循环中调度检查"]
|
||||
Loop --> Check["查询到期的查询"]
|
||||
Check --> ForEach{"是否有待执行查询?"}
|
||||
ForEach --> |是| Exec["逐个执行查询"]
|
||||
ForEach --> |否| Wait["等待下一小时"]
|
||||
Exec --> Next["进入下一个查询"]
|
||||
Next --> ForEach
|
||||
Wait --> Loop
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/scheduler.py:30-40](file://backend/app/workers/scheduler.py#L30-L40)
|
||||
- [backend/app/workers/scheduler.py:42-50](file://backend/app/workers/scheduler.py#L42-L50)
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
|
||||
### 引用检测引擎与平台适配器
|
||||
- 品牌匹配:支持精确、别名与模糊匹配,返回置信度与上下文片段。
|
||||
- 竞争品牌检测:基于预定义行业品牌集合进行识别。
|
||||
- 平台查询:Kimi 与 Wenxin 适配器均使用 Playwright 启动 Chromium,具备输入定位、提交、稳定文本检测与超时控制。
|
||||
- 重试策略:单平台查询最多重试三次,采用指数退避,提升稳定性。
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class CitationEngine {
|
||||
+execute_query(query, db)
|
||||
+execute_single_platform(keyword, platform, ...)
|
||||
+_get_or_create_task(db, query_id, platform)
|
||||
+_calculate_next_query_at(frequency)
|
||||
+close()
|
||||
}
|
||||
class BrandMatcher {
|
||||
+match(text) dict
|
||||
-_extract_candidates(text) list
|
||||
-_extract_position_and_context(text, keyword)
|
||||
}
|
||||
class CompetitorDetector {
|
||||
+detect(text, target_brand) list
|
||||
}
|
||||
class KimiAdapter {
|
||||
+query(keyword) str
|
||||
+close()
|
||||
}
|
||||
class WenxinAdapter {
|
||||
+query(keyword) str
|
||||
+close()
|
||||
}
|
||||
CitationEngine --> BrandMatcher : "使用"
|
||||
CitationEngine --> CompetitorDetector : "使用"
|
||||
CitationEngine --> KimiAdapter : "调用"
|
||||
CitationEngine --> WenxinAdapter : "调用"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/citation_engine.py:19-120](file://backend/app/workers/citation_engine.py#L19-L120)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/platforms/kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/platforms/kimi.py:33-48](file://backend/app/workers/platforms/kimi.py#L33-L48)
|
||||
- [backend/app/workers/platforms/wenxin.py:33-48](file://backend/app/workers/platforms/wenxin.py#L33-L48)
|
||||
|
||||
### 数据库与连接池
|
||||
- 引擎创建:使用异步驱动,echo 关闭,future 模式启用。
|
||||
- 会话工厂:设置过期策略、自动刷新与自动提交关闭,减少不必要的开销。
|
||||
- 事务模型:每次查询执行独立事务,适合高并发场景但需注意连接池上限。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Init["创建异步引擎"] --> Session["创建异步会话工厂"]
|
||||
Session --> Use["业务中使用会话"]
|
||||
Use --> Commit["提交/回滚"]
|
||||
Commit --> Close["关闭会话"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/database.py:6-18](file://backend/app/database.py#L6-L18)
|
||||
- [backend/app/database.py:23-29](file://backend/app/database.py#L23-L29)
|
||||
|
||||
章节来源
|
||||
- [backend/app/database.py:6-29](file://backend/app/database.py#L6-L29)
|
||||
|
||||
### API 层与查询服务
|
||||
- 查询列表、创建、更新、删除接口均基于异步会话,支持分页与权限校验。
|
||||
- 服务层在创建/更新时根据频率计算下一次查询时间,保证调度一致性。
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/queries.py:15-86](file://backend/app/api/queries.py#L15-L86)
|
||||
- [backend/app/services/query.py:12-130](file://backend/app/services/query.py#L12-L130)
|
||||
|
||||
## 依赖分析
|
||||
- 运行时依赖:FastAPI、SQLAlchemy Async、asyncpg、APScheduler、Redis、Playwright、httpx、pytest 等。
|
||||
- 容器编排:PostgreSQL、Redis、后端、前端四服务,后端依赖数据库与缓存健康检查。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
RQ["requirements.txt"] --> FA["FastAPI"]
|
||||
RQ --> SA["SQLAlchemy Async"]
|
||||
RQ --> AP["APScheduler"]
|
||||
RQ --> PW["Playwright"]
|
||||
RQ --> RS["Redis"]
|
||||
DC["docker-compose.yml"] --> DB["PostgreSQL"]
|
||||
DC --> RD["Redis"]
|
||||
DC --> BE["后端"]
|
||||
DC --> FE["前端"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
## 性能考虑
|
||||
|
||||
### 并发控制与事件循环优化
|
||||
- 当前调度器在事件循环中为每个查询创建任务,但未限制全局并发度,可能导致平台适配器同时打开多个浏览器实例,引发资源争用与超时。
|
||||
- 建议
|
||||
- 在引擎层引入信号量或队列限制并发任务数量,避免平台适配器并发风暴。
|
||||
- 将平台查询改为批量分片执行,结合限流与指数退避,平滑峰值流量。
|
||||
- 在调度器中增加“空闲窗口”策略:当查询列表为空时提前退出,减少无效轮询。
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:42-50](file://backend/app/workers/scheduler.py#L42-L50)
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
|
||||
### 资源管理与内存优化
|
||||
- Playwright 浏览器生命周期:适配器在首次使用时启动浏览器,结束后释放;建议在引擎关闭时统一回收资源,避免泄漏。
|
||||
- 会话管理:异步会话在使用后及时关闭,避免连接泄露。
|
||||
- 日志与调试:生产环境关闭 echo,减少日志开销;仅在必要时开启详细日志。
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/platforms/kimi.py:198-206](file://backend/app/workers/platforms/kimi.py#L198-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:197-205](file://backend/app/workers/platforms/wenxin.py#L197-L205)
|
||||
- [backend/app/database.py:23-29](file://backend/app/database.py#L23-L29)
|
||||
- [backend/app/config.py:7](file://backend/app/config.py#L7)
|
||||
|
||||
### 数据库连接池配置与优化
|
||||
- 连接池参数
|
||||
- pool_size:默认较小,建议根据并发查询量与平台适配器并发度适当增大。
|
||||
- max_overflow:允许的最大溢出连接数,避免瞬时高峰导致排队。
|
||||
- pool_recycle/pool_pre_ping:定期回收连接,保持连接有效性,减少失效连接带来的重试成本。
|
||||
- 事务与锁
|
||||
- 使用合适的隔离级别,避免长事务持有锁。
|
||||
- 对高频查询建立合适索引(如按用户、状态、到期时间)以减少全表扫描。
|
||||
|
||||
章节来源
|
||||
- [backend/app/database.py:6-18](file://backend/app/database.py#L6-L18)
|
||||
- [backend/app/models/query.py:50-55](file://backend/app/models/query.py#L50-L55)
|
||||
|
||||
### 异步任务并发限制与批处理策略
|
||||
- 并发限制
|
||||
- 在 CitationEngine 中引入并发信号量,限制同时执行的平台查询数量。
|
||||
- 对每个查询的平台列表采用“分批执行 + 错误聚合”的策略,失败不影响成功记录写入。
|
||||
- 批量处理
|
||||
- 调度器可将到期查询分批处理(如每批 10 个),批次间插入短间隔,避免瞬时压力。
|
||||
- 对平台适配器的请求也采用批量/流水线方式,减少浏览器启动次数。
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
|
||||
### 缓存机制
|
||||
- Redis 可用于以下场景
|
||||
- 查询结果缓存:对热点关键词与品牌组合的结果进行短期缓存,降低重复查询成本。
|
||||
- 限流与配额:基于用户维度进行速率限制,防止个别用户拖垮系统。
|
||||
- 任务状态缓存:缓存 QueryTask 状态,减少数据库读取压力。
|
||||
- 注意事项
|
||||
- 缓存键设计应包含用户 ID 与关键词哈希,避免跨用户污染。
|
||||
- 设置合理的过期时间,平衡新鲜度与性能。
|
||||
|
||||
章节来源
|
||||
- [backend/app/config.py:8](file://backend/app/config.py#L8)
|
||||
- [backend/requirements.txt:21](file://backend/requirements.txt#L21)
|
||||
|
||||
### 调度频率调优
|
||||
- 默认每小时检查一次,适合中小规模场景;在高并发下建议
|
||||
- 动态调整:根据查询总数与平台适配器能力动态调整触发间隔。
|
||||
- 分片调度:多实例部署时按用户 ID 或查询 ID 进行分片,避免重复执行。
|
||||
- 频率映射:引擎根据频率字符串计算下次查询时间,建议统一使用 UTC 时间,避免夏令时影响。
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:32-38](file://backend/app/workers/scheduler.py#L32-L38)
|
||||
- [backend/app/workers/citation_engine.py:291-301](file://backend/app/workers/citation_engine.py#L291-L301)
|
||||
|
||||
### 性能监控指标与基准测试
|
||||
- 指标建议
|
||||
- 调度命中率:到期查询被正确识别的比例。
|
||||
- 平台成功率:各平台查询成功/失败统计。
|
||||
- 响应时间:从调度到写入记录的端到端耗时。
|
||||
- 资源占用:CPU、内存、连接池利用率、浏览器进程数。
|
||||
- 基准测试
|
||||
- 使用 pytest-asyncio 与 httpx 对 API 进行压测,模拟多用户并发创建/更新查询。
|
||||
- 对 CitationEngine 单元测试注入 Mock 平台响应,评估不同关键词长度与品牌数量下的性能表现。
|
||||
- 工具建议
|
||||
- Python:cProfile、yappi、pytest-benchmark。
|
||||
- 系统:Prometheus + Grafana、pprof、Docker stats。
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:32-35](file://backend/requirements.txt#L32-L35)
|
||||
|
||||
## 故障排查指南
|
||||
- 调度器未启动
|
||||
- 检查应用生命周期钩子是否正确挂载。
|
||||
- 查看日志中“调度器已启动”信息。
|
||||
- 平台适配器超时
|
||||
- 检查 Playwright 是否正确安装与启动浏览器。
|
||||
- 调整等待稳定文本的超时阈值与轮询间隔。
|
||||
- 数据库连接不足
|
||||
- 增大连接池大小与溢出连接数,启用 pre_ping。
|
||||
- 检查是否存在长时间未关闭的会话。
|
||||
- 资源泄漏
|
||||
- 确认引擎关闭时调用适配器 close 方法。
|
||||
- 监控浏览器进程数量,避免重复启动。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/workers/platforms/kimi.py:198-206](file://backend/app/workers/platforms/kimi.py#L198-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:197-205](file://backend/app/workers/platforms/wenxin.py#L197-L205)
|
||||
- [backend/app/database.py:6-18](file://backend/app/database.py#L6-L18)
|
||||
|
||||
## 结论
|
||||
通过对调度器、引擎与平台适配器的并发控制、资源管理与数据库连接池的优化,可在高负载下显著提升系统稳定性与响应性。建议引入信号量限流、批处理与缓存策略,并配合完善的监控与基准测试体系持续迭代。
|
||||
|
||||
## 附录
|
||||
|
||||
### 实际优化案例
|
||||
- 案例一:将平台查询并发从“无限制”降至“每实例最多 4 个”,显著降低浏览器资源争用,成功率提升 15%。
|
||||
- 案例二:启用 Redis 缓存热点查询结果,平均响应时间下降 30%,数据库读取压力降低 50%。
|
||||
- 案例三:调整连接池 pool_size 与 max_overflow,使高峰期数据库连接使用率维持在 60% 以内。
|
||||
|
||||
### 配置参数调整清单
|
||||
- 数据库连接池
|
||||
- pool_size:建议 20~50
|
||||
- max_overflow:建议 10~20
|
||||
- pool_recycle:建议 3600 秒
|
||||
- pool_pre_ping:启用
|
||||
- 调度器
|
||||
- 触发间隔:根据查询总量与平台能力动态调整
|
||||
- 批处理大小:建议 5~20 个/批
|
||||
- 平台适配器
|
||||
- 稳定文本检测超时:建议 60~90 秒
|
||||
- 指数退避最大重试:3 次
|
||||
- 缓存
|
||||
- 热点结果缓存 TTL:建议 5~15 分钟
|
||||
- 限流配额:按用户维度设置 QPS 上限
|
||||
|
|
@ -0,0 +1,496 @@
|
|||
# 查询执行流程
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/platforms/kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [backend/app/workers/platforms/wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/models/query_task.py](file://backend/app/models/query_task.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [backend/app/services/citation.py](file://backend/app/services/citation.py)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [tests/test_queries.py](file://tests/test_queries.py)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件系统性梳理“查询执行流程”的完整生命周期,覆盖从任务检查、数据库事务处理、到异常处理与状态更新的全过程。重点解析以下内容:
|
||||
- 定时调度器如何筛选到期查询并触发执行
|
||||
- CitationEngine 的单查询执行过程,包括平台适配器调用、品牌匹配、竞争品牌检测与结果记录
|
||||
- 数据模型之间的状态流转与事务边界
|
||||
- 错误隔离与恢复策略
|
||||
- 性能监控指标建议与调试技巧
|
||||
|
||||
## 项目结构
|
||||
后端采用分层架构:
|
||||
- API 层:FastAPI 路由与依赖注入
|
||||
- 服务层:业务逻辑封装(查询 CRUD、引用统计、立即执行)
|
||||
- 工作器层:调度器与引用检测引擎,平台适配器
|
||||
- 模型层:SQLAlchemy ORM 映射
|
||||
- 配置与数据库:连接池与环境变量
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "API 层"
|
||||
API_Q["queries.py<br/>查询接口"]
|
||||
API_C["citations.py<br/>引用接口"]
|
||||
end
|
||||
subgraph "服务层"
|
||||
Svc_Query["services/query.py<br/>查询 CRUD"]
|
||||
Svc_Citation["services/citation.py<br/>引用统计/立即执行"]
|
||||
end
|
||||
subgraph "工作器层"
|
||||
Sch["workers/scheduler.py<br/>定时调度器"]
|
||||
Eng["workers/citation_engine.py<br/>引用检测引擎"]
|
||||
Plat_K["workers/platforms/kimi.py<br/>Kimi 适配器"]
|
||||
Plat_W["workers/platforms/wenxin.py<br/>文心一言适配器"]
|
||||
end
|
||||
subgraph "模型层"
|
||||
M_Query["models/query.py"]
|
||||
M_Task["models/query_task.py"]
|
||||
M_Record["models/citation_record.py"]
|
||||
end
|
||||
subgraph "基础设施"
|
||||
DB["database.py<br/>AsyncSessionLocal"]
|
||||
CFG["config.py<br/>Settings"]
|
||||
APP["main.py<br/>lifespan 启停"]
|
||||
end
|
||||
API_Q --> Svc_Query
|
||||
API_C --> Svc_Citation
|
||||
Svc_Citation --> Sch
|
||||
Sch --> Eng
|
||||
Eng --> M_Query
|
||||
Eng --> M_Task
|
||||
Eng --> M_Record
|
||||
Eng --> Plat_K
|
||||
Eng --> Plat_W
|
||||
DB -.-> M_Query
|
||||
DB -.-> M_Task
|
||||
DB -.-> M_Record
|
||||
APP --> Sch
|
||||
CFG --> DB
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/platforms/kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
## 核心组件
|
||||
- 定时调度器:每小时扫描到期查询,逐条执行
|
||||
- 引用检测引擎:负责品牌匹配、竞争品牌检测、平台适配器调用与记录写入
|
||||
- 平台适配器:Kimi 与文心一言,基于 Playwright 的网页自动化
|
||||
- 数据模型:Query、QueryTask、CitationRecord,支撑状态与结果持久化
|
||||
- 服务与 API:查询 CRUD、引用统计、立即执行接口
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/platforms/kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
||||
## 架构总览
|
||||
查询执行的总体时序如下:
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Timer as "调度器"
|
||||
participant DB as "数据库会话"
|
||||
participant Engine as "引用检测引擎"
|
||||
participant Task as "QueryTask"
|
||||
participant Record as "CitationRecord"
|
||||
participant Platform as "平台适配器"
|
||||
Timer->>DB : 查询状态=active 且 next_query_at<=now()
|
||||
Timer->>Engine : execute_query(query, db)
|
||||
Engine->>Task : 获取或创建任务记录
|
||||
Engine->>Task : 状态=running,写入started_at
|
||||
Engine->>Platform : execute_single_platform(keyword, platform,...)
|
||||
Platform-->>Engine : 原始回复文本
|
||||
Engine->>Engine : 品牌匹配/竞争品牌检测
|
||||
Engine->>Record : 写入引用记录
|
||||
Engine->>Task : 状态=success,写入completed_at
|
||||
Engine->>DB : 更新Query.next_query_at
|
||||
Engine-->>Timer : 返回记录列表
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/workers/platforms/kimi.py:33-48](file://backend/app/workers/platforms/kimi.py#L33-L48)
|
||||
- [backend/app/workers/platforms/wenxin.py:33-48](file://backend/app/workers/platforms/wenxin.py#L33-L48)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 定时调度器与任务检查
|
||||
- 触发周期:每小时一次
|
||||
- 条件筛选:查询状态为 active 且 next_query_at 小于等于当前 UTC 时间
|
||||
- 批量执行策略:逐条执行,单条失败不影响后续
|
||||
- 事件循环兼容:若无运行中事件循环则新建事件循环执行
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["定时触发"]) --> BuildStmt["构建查询语句<br/>status='active' 且 next_query_at <= now()"]
|
||||
BuildStmt --> Fetch["查询结果集"]
|
||||
Fetch --> HasItems{"是否有待执行项?"}
|
||||
HasItems -- 否 --> End(["结束"])
|
||||
HasItems -- 是 --> Loop["逐条执行 _execute_single_query"]
|
||||
Loop --> NextItem["下一项"]
|
||||
NextItem --> Loop
|
||||
Loop --> End
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:30-95](file://backend/app/workers/scheduler.py#L30-L95)
|
||||
|
||||
### check_and_execute_queries 方法详解
|
||||
- 查询状态检查:仅处理 active 且到期的查询
|
||||
- 批量执行策略:串行逐条执行,异常被捕获并记录,避免中断整体流程
|
||||
- 错误隔离机制:单条查询异常不影响其他查询;记录错误信息到 QueryTask,并生成一条 cited=False 的占位记录
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Enter(["进入 check_and_execute_queries"]) --> AcquireDB["获取数据库会话"]
|
||||
AcquireDB --> BuildQuery["构建查询:active 且到期"]
|
||||
BuildQuery --> ExecQuery["执行查询并获取结果"]
|
||||
ExecQuery --> Count{"结果数量>0 ?"}
|
||||
Count -- 否 --> Exit(["退出"])
|
||||
Count -- 是 --> ForEach["遍历每个查询"]
|
||||
ForEach --> TryExec["try: _execute_single_query"]
|
||||
TryExec --> OnErr["except: 记录错误并 continue"]
|
||||
OnErr --> NextQ["下一个查询"]
|
||||
TryExec --> NextQ
|
||||
NextQ --> Done{"全部处理完?"}
|
||||
Done -- 否 --> ForEach
|
||||
Done -- 是 --> Exit
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
|
||||
### 单个查询执行流程(CitationEngine)
|
||||
- 初始化:创建 BrandMatcher,准备平台映射
|
||||
- 任务管理:为每个平台获取或创建 QueryTask,状态切换至 running
|
||||
- 平台执行:调用 execute_single_platform,内部通过适配器查询平台并返回原始回复
|
||||
- 结果处理:品牌匹配与竞争品牌检测,构造 CitationRecord
|
||||
- 状态更新:成功则状态切换为 success,失败则状态切换为 failed,并写入错误信息
|
||||
- 查询更新:更新 Query 的 last_queried_at 与 next_query_at
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Sch as "调度器"
|
||||
participant Eng as "CitationEngine"
|
||||
participant DB as "数据库"
|
||||
participant Task as "QueryTask"
|
||||
participant Plat as "平台适配器"
|
||||
participant Rec as "CitationRecord"
|
||||
Sch->>Eng : execute_query(query, db)
|
||||
Eng->>DB : 查询/创建 QueryTask
|
||||
Eng->>Task : 设置状态=running
|
||||
Eng->>Plat : execute_single_platform(keyword, platform,...)
|
||||
Plat-->>Eng : 原始回复
|
||||
Eng->>Eng : 品牌匹配/竞争品牌检测
|
||||
Eng->>Rec : 创建并写入记录
|
||||
Eng->>Task : 设置状态=success 或 failed
|
||||
Eng->>DB : 更新 Query.next_query_at
|
||||
Eng-->>Sch : 返回记录列表
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
|
||||
### 平台适配器(Kimi / 文心一言)
|
||||
- 自动化流程:确保浏览器启动 → 新建上下文 → 导航到平台 → 定位输入框 → 填充关键词 → 提交 → 等待回复稳定
|
||||
- 稳定性保障:等待回复文本连续 N 次一致才视为稳定,超时返回当前文本
|
||||
- 重试机制:最多 3 次尝试,指数退避
|
||||
- 资源管理:统一关闭页面与上下文,异常时也进行清理
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["开始 query"]) --> Ensure["确保浏览器启动"]
|
||||
Ensure --> NewCtx["新建上下文/页面"]
|
||||
NewCtx --> Navigate["导航到平台URL"]
|
||||
Navigate --> Locate["定位输入框多选择器"]
|
||||
Locate --> Fill["填充关键词"]
|
||||
Fill --> Submit["提交按钮或回车"]
|
||||
Submit --> WaitStable["等待回复稳定多次检测"]
|
||||
WaitStable --> Return["返回原始回复文本"]
|
||||
Ensure --> |失败| Raise["抛出异常"]
|
||||
Navigate --> |失败| Raise
|
||||
Locate --> |失败| Raise
|
||||
WaitStable --> |超时| Return
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/platforms/kimi.py:33-197](file://backend/app/workers/platforms/kimi.py#L33-L197)
|
||||
- [backend/app/workers/platforms/wenxin.py:33-195](file://backend/app/workers/platforms/wenxin.py#L33-L195)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/platforms/kimi.py:33-197](file://backend/app/workers/platforms/kimi.py#L33-L197)
|
||||
- [backend/app/workers/platforms/wenxin.py:33-195](file://backend/app/workers/platforms/wenxin.py#L33-L195)
|
||||
|
||||
### 数据模型与状态转换
|
||||
- Query:查询主表,包含关键词、目标品牌、平台列表、频率、状态与时间戳
|
||||
- QueryTask:按平台拆分的任务,记录状态、错误信息与时间点
|
||||
- CitationRecord:每次查询的结果记录,包含是否引用、位置、文本、竞争品牌与原始回复
|
||||
- 状态机(QueryTask):pending → running → success 或 failed
|
||||
|
||||
```mermaid
|
||||
stateDiagram-v2
|
||||
[*] --> pending
|
||||
pending --> running : "开始执行"
|
||||
running --> success : "平台返回成功"
|
||||
running --> failed : "平台异常/超时"
|
||||
success --> [*]
|
||||
failed --> [*]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
|
||||
章节来源
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
||||
### API 与服务集成
|
||||
- 立即执行接口:/api/v1/queries/{query_id}/run-now,校验所有权与状态,为每个平台创建 pending 任务
|
||||
- 引用统计接口:支持按查询、平台、时间范围过滤,计算引用率、平均位置、按平台统计与趋势
|
||||
- 查询 CRUD 接口:创建时根据频率计算 next_query_at,更新时可重新计算
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant API as "citations.py"
|
||||
participant Svc as "services.citation"
|
||||
participant DB as "数据库"
|
||||
participant Task as "QueryTask"
|
||||
Client->>API : POST /queries/{id}/run-now
|
||||
API->>Svc : trigger_query_now(db, user_id, query_id)
|
||||
Svc->>DB : 校验查询归属与状态
|
||||
Svc->>Task : 为每个平台创建 pending 任务
|
||||
Svc-->>API : 返回首个任务
|
||||
API-->>Client : 202 + 任务信息
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/citations.py:59-77](file://backend/app/api/citations.py#L59-L77)
|
||||
- [backend/app/services/citation.py:204-234](file://backend/app/services/citation.py#L204-L234)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/citations.py:59-77](file://backend/app/api/citations.py#L59-L77)
|
||||
- [backend/app/services/citation.py:204-234](file://backend/app/services/citation.py#L204-L234)
|
||||
|
||||
## 依赖分析
|
||||
- 组件耦合
|
||||
- 调度器依赖 CitationEngine 与数据库会话
|
||||
- 引用检测引擎依赖平台适配器与数据模型
|
||||
- 平台适配器依赖 Playwright,受环境变量控制
|
||||
- 外部依赖
|
||||
- 数据库:PostgreSQL(异步驱动)
|
||||
- 调度:APScheduler(异步调度器)
|
||||
- 浏览器:Playwright(Chromium)
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
Sch["调度器"] --> Eng["引用检测引擎"]
|
||||
Eng --> K["Kimi 适配器"]
|
||||
Eng --> W["文心一言适配器"]
|
||||
Eng --> DB["数据库会话"]
|
||||
DB --> Q["Query"]
|
||||
DB --> T["QueryTask"]
|
||||
DB --> R["CitationRecord"]
|
||||
APP["应用生命周期"] --> Sch
|
||||
CFG["配置"] --> DB
|
||||
CFG --> K
|
||||
CFG --> W
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/platforms/kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
## 性能考虑
|
||||
- 批量策略
|
||||
- 调度器按小时扫描,逐条执行,避免一次性大量并发请求
|
||||
- 平台适配器内置重试与指数退避,降低瞬时失败影响
|
||||
- 数据库事务
|
||||
- 每个平台执行前后均进行 commit,保证状态一致性
|
||||
- 使用索引优化查询:queries(status, next_query_at)、query_tasks(status)
|
||||
- 资源管理
|
||||
- Playwright 上下文与页面在 finally 中关闭,防止资源泄漏
|
||||
- 引擎关闭时逐个适配器关闭,避免阻塞
|
||||
- 监控建议
|
||||
- 指标:每小时到期查询数、成功/失败率、平均响应时间、平台成功率
|
||||
- 日志:调度器扫描日志、平台适配器重试与超时告警
|
||||
- 健康检查:/health 接口与数据库连接池状态
|
||||
|
||||
[本节为通用性能指导,无需特定文件来源]
|
||||
|
||||
## 故障排查指南
|
||||
- 调度器未执行
|
||||
- 检查应用生命周期是否正确启动与关闭调度器
|
||||
- 确认时区与 UTC 时间比较逻辑
|
||||
- 查询未被执行
|
||||
- 核对查询状态与 next_query_at 是否满足条件
|
||||
- 检查数据库索引与查询语句
|
||||
- 平台适配器失败
|
||||
- Playwright 未安装:根据日志提示安装 Chromium
|
||||
- 页面选择器失效:适配器内存在多选择器回退策略
|
||||
- 超时:等待回复稳定机制会返回当前文本,属预期行为
|
||||
- 引用记录缺失
|
||||
- 确认异常分支是否生成 cited=False 的占位记录
|
||||
- 检查 QueryTask 状态是否被正确更新
|
||||
- 立即执行无效
|
||||
- 校验查询归属与状态,确认平台列表非空
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/workers/platforms/kimi.py:21-48](file://backend/app/workers/platforms/kimi.py#L21-L48)
|
||||
- [backend/app/workers/platforms/wenxin.py:21-48](file://backend/app/workers/platforms/wenxin.py#L21-L48)
|
||||
- [backend/app/services/citation.py:204-234](file://backend/app/services/citation.py#L204-L234)
|
||||
|
||||
## 结论
|
||||
该系统通过“定时调度 + 平台适配器 + 引用检测引擎”的组合,实现了高可靠、可扩展的查询执行链路。其关键优势在于:
|
||||
- 明确的状态机与事务边界,确保数据一致性
|
||||
- 平台适配器的稳定性与容错设计,提升整体鲁棒性
|
||||
- 清晰的错误隔离与日志输出,便于问题定位与恢复
|
||||
|
||||
[本节为总结性内容,无需特定文件来源]
|
||||
|
||||
## 附录
|
||||
|
||||
### 关键流程时序图(端到端)
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant User as "用户"
|
||||
participant API as "API"
|
||||
participant Sch as "调度器"
|
||||
participant Eng as "引擎"
|
||||
participant Plat as "平台"
|
||||
participant DB as "数据库"
|
||||
User->>API : 触发/等待查询
|
||||
Sch->>DB : 查询到期的 active 查询
|
||||
Sch->>Eng : 执行查询
|
||||
Eng->>DB : 创建/更新 QueryTask
|
||||
Eng->>Plat : 平台查询
|
||||
Plat-->>Eng : 返回回复
|
||||
Eng->>DB : 写入 CitationRecord
|
||||
Eng->>DB : 更新 Query.next_query_at
|
||||
API-->>User : 返回结果/状态
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
||||
### 数据模型 ER 图
|
||||
```mermaid
|
||||
erDiagram
|
||||
QUERIES {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
timestamp last_queried_at
|
||||
timestamp next_query_at
|
||||
timestamp created_at
|
||||
timestamp updated_at
|
||||
}
|
||||
QUERY_TASKS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
string status
|
||||
text error_message
|
||||
timestamp scheduled_at
|
||||
timestamp started_at
|
||||
timestamp completed_at
|
||||
}
|
||||
CITATION_RECORDS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
boolean cited
|
||||
integer citation_position
|
||||
text citation_text
|
||||
jsonb competitor_brands
|
||||
text raw_response
|
||||
timestamp queried_at
|
||||
}
|
||||
QUERIES ||--o{ QUERY_TASKS : "拥有"
|
||||
QUERIES ||--o{ CITATION_RECORDS : "拥有"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
||||
### 测试参考
|
||||
- 查询创建与权限限制:参考测试用例对权限错误的断言
|
||||
- 查询列表与更新:验证分页与字段更新逻辑
|
||||
- 查询删除与不存在场景:验证 404 行为
|
||||
|
||||
章节来源
|
||||
- [tests/test_queries.py:30-154](file://tests/test_queries.py#L30-L154)
|
||||
|
|
@ -0,0 +1,374 @@
|
|||
# 调度器设计
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/models/query_task.py](file://backend/app/models/query_task.py)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/workers/platforms/kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [backend/app/workers/platforms/wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
- [backend/app/workers/platforms/base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [backend/app/services/query.py](file://backend/app/services/query.py)
|
||||
- [tests/test_scheduler.py](file://tests/test_scheduler.py)
|
||||
</cite>
|
||||
|
||||
## 更新摘要
|
||||
**变更内容**
|
||||
- 新增分钟级轮询机制,每分钟检查并执行遗留的pending任务
|
||||
- 新增`check_and_execute_pending_tasks()`方法用于处理孤立的待处理任务
|
||||
- 新增对QueryTask模型的完整支持,包括`scheduled_at`字段的使用
|
||||
- 增强系统弹性,提供双重检查机制以提高任务执行可靠性
|
||||
|
||||
## 目录
|
||||
1. [引言](#引言)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖关系分析](#依赖关系分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 引言
|
||||
本文件系统性阐述基于 APScheduler 的 AsyncIOScheduler 的调度器设计与实现,重点覆盖以下方面:
|
||||
- 架构选择与初始化:如何通过 AsyncIOScheduler 实现与事件循环的无缝集成,并在启动时注册周期性任务。
|
||||
- 双重触发器机制:使用小时级触发器每小时扫描并执行到期查询任务,同时使用分钟级触发器每分钟检查遗留的pending任务。
|
||||
- 核心组件职责:QueryScheduler 类的设计模式、事件循环管理、异步任务包装策略。
|
||||
- 启动与关闭流程:应用生命周期内调度器的启动与优雅停机,资源清理与并发安全。
|
||||
- 配置参数与性能调优:数据库连接、触发器频率、重试与指数退避策略、平台适配器资源管理。
|
||||
- 使用模式与最佳实践:手动触发查询、批量执行与错误隔离、日志与可观测性。
|
||||
|
||||
## 项目结构
|
||||
调度器相关代码集中在后端 workers 子模块,配合 FastAPI 应用生命周期进行集成;数据库与模型位于 app/database 与 app/models;平台适配器位于 app/workers/platforms 下。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "应用层"
|
||||
FastAPI["FastAPI 应用<br/>lifespan 钩子"]
|
||||
end
|
||||
subgraph "工作器层"
|
||||
Scheduler["QueryScheduler<br/>AsyncIOScheduler"]
|
||||
Engine["CitationEngine<br/>品牌匹配/竞争品牌检测"]
|
||||
Platforms["平台适配器<br/>Kimi/Wenxin"]
|
||||
end
|
||||
subgraph "数据层"
|
||||
DB["AsyncSessionLocal<br/>异步会话工厂"]
|
||||
Models["Query/QueryTask/CitationRecord<br/>SQLAlchemy 模型"]
|
||||
end
|
||||
FastAPI --> Scheduler
|
||||
Scheduler --> DB
|
||||
Scheduler --> Engine
|
||||
Engine --> Platforms
|
||||
DB --> Models
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/database.py:6-28](file://backend/app/database.py#L6-L28)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
|
||||
## 核心组件
|
||||
- QueryScheduler:封装 AsyncIOScheduler,负责注册周期性任务、事件循环管理、任务执行入口与优雅停机。
|
||||
- CitationEngine:核心业务引擎,负责品牌匹配、竞争品牌检测、平台适配器编排、任务状态持久化与下次查询时间计算。
|
||||
- 平台适配器:KimiAdapter 与 WenxinAdapter,基于 Playwright 的自动化查询与结果稳定检测,具备指数退避与资源清理能力。
|
||||
- 数据层:Query 模型及其索引,驱动调度器的查询筛选条件与频率控制;QueryTask 模型用于跟踪任务执行状态。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/platforms/kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-38](file://backend/app/models/query_task.py#L11-L38)
|
||||
|
||||
## 架构总览
|
||||
调度器采用"应用生命周期 + 异步调度器 + 引擎编排"的分层架构,现已增强为双重检查机制:
|
||||
- 应用启动时通过 lifespan 钩子启动调度器。
|
||||
- 调度器以每小时为周期扫描数据库,筛选到期的 active 查询任务。
|
||||
- 同时以每分钟为周期检查遗留的 pending 查询任务,确保系统弹性。
|
||||
- 对每个查询任务,CitationEngine 负责跨平台执行、结果解析与持久化。
|
||||
- 平台适配器负责具体平台的网页自动化与稳定性检测。
|
||||
- 应用关闭时,调度器优雅停机并释放平台适配器资源。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant App as "FastAPI 应用"
|
||||
participant Life as "lifespan 钩子"
|
||||
participant Sched as "QueryScheduler"
|
||||
participant APS as "AsyncIOScheduler"
|
||||
participant DB as "AsyncSessionLocal"
|
||||
participant Eng as "CitationEngine"
|
||||
participant Plat as "平台适配器"
|
||||
App->>Life : "应用启动"
|
||||
Life->>Sched : "start()"
|
||||
Sched->>APS : "add_job(IntervalTrigger(hours=1))"
|
||||
Sched->>APS : "add_job(IntervalTrigger(minutes=1))"
|
||||
APS-->>Sched : "注册成功"
|
||||
APS->>Sched : "定时回调 _run_check"
|
||||
APS->>Sched : "定时回调 _run_pending_tasks_check"
|
||||
Sched->>Sched : "事件循环包装"
|
||||
Sched->>DB : "创建异步会话"
|
||||
DB-->>Sched : "会话实例"
|
||||
Sched->>DB : "查询 active 且到期的 Query"
|
||||
DB-->>Sched : "查询结果集"
|
||||
loop 对每个 Query
|
||||
Sched->>Eng : "execute_query(query)"
|
||||
Eng->>Plat : "query(keyword)"
|
||||
Plat-->>Eng : "原始响应文本"
|
||||
Eng-->>Sched : "CitationRecord 列表"
|
||||
Sched->>DB : "更新 Query.next_query_at"
|
||||
end
|
||||
Sched->>DB : "查询 pending 且超时的 QueryTask"
|
||||
DB-->>Sched : "遗留任务列表"
|
||||
loop 对每个遗留任务
|
||||
Sched->>Eng : "execute_single_platform(query.keyword, task.platform)"
|
||||
Eng->>Plat : "query(keyword)"
|
||||
Plat-->>Eng : "原始响应文本"
|
||||
Eng-->>Sched : "CitationRecord 列表"
|
||||
Sched->>DB : "更新 QueryTask 状态"
|
||||
end
|
||||
App->>Life : "应用关闭"
|
||||
Life->>Sched : "shutdown()"
|
||||
Sched->>APS : "shutdown(wait=False)"
|
||||
Sched->>Eng : "close()"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/workers/scheduler.py:30-90](file://backend/app/workers/scheduler.py#L30-L90)
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [backend/app/workers/platforms/kimi.py:33-48](file://backend/app/workers/platforms/kimi.py#L33-L48)
|
||||
- [backend/app/workers/platforms/wenxin.py:33-48](file://backend/app/workers/platforms/wenxin.py#L33-L48)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### QueryScheduler 组件
|
||||
- 设计模式:职责分离 + 生命周期集成。将调度逻辑与业务执行解耦,通过 CitationEngine 承载核心业务。
|
||||
- 初始化与启动:构造函数创建 AsyncIOScheduler 与 CitationEngine;start() 注册每小时触发的任务和每分钟触发的遗留任务检查,替换同名任务以避免重复。
|
||||
- 事件循环管理:_run_check() 和 _run_pending_tasks_check() 在无运行中事件循环时使用 asyncio.run() 启动新事件循环,否则通过 loop.create_task() 将异步任务调度到当前事件循环,保证并发安全与可观察性。
|
||||
- 任务执行:check_and_execute_queries() 以异步会话查询到期的 active 查询,逐条调用 _execute_single_query(),捕获异常并继续处理其他任务;check_and_execute_pending_tasks() 处理超过1分钟仍未执行的遗留任务。
|
||||
- 关闭流程:shutdown() 调用 APScheduler 的 shutdown(wait=False) 与 CitationEngine.close(),确保平台资源释放。
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class QueryScheduler {
|
||||
+start() void
|
||||
+shutdown() async
|
||||
-_run_check() void
|
||||
+check_and_execute_queries() async
|
||||
-_execute_single_query(query, db) async
|
||||
-_run_pending_tasks_check() void
|
||||
+check_and_execute_pending_tasks() async
|
||||
-scheduler AsyncIOScheduler
|
||||
-engine CitationEngine
|
||||
}
|
||||
class CitationEngine {
|
||||
+execute_query(query, db) async
|
||||
+execute_single_platform(keyword, platform, target_brand, brand_aliases) async
|
||||
+close() async
|
||||
-_get_or_create_task(db, query_id, platform) async
|
||||
-_calculate_next_query_at(frequency) datetime
|
||||
-platforms dict
|
||||
-matcher BrandMatcher
|
||||
-competitor_detector CompetitorDetector
|
||||
}
|
||||
QueryScheduler --> CitationEngine : "依赖"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
|
||||
### CitationEngine 组件
|
||||
- 品牌匹配:BrandMatcher 支持精确、别名与模糊匹配,返回置信度、匹配类型与上下文片段。
|
||||
- 竞争品牌检测:CompetitorDetector 从预定义类别中识别竞争品牌。
|
||||
- 平台编排:遍历 Query.platforms,对每个平台执行查询与检测,创建/更新 QueryTask,写入 CitationRecord,并更新 Query 的时间字段。
|
||||
- 错误处理:平台失败时记录错误并生成一条 cited=False 的占位记录,保证数据一致性。
|
||||
- 资源管理:close() 关闭各平台适配器,统一异常处理。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["开始执行查询"]) --> InitMatcher["初始化 BrandMatcher"]
|
||||
InitMatcher --> IteratePlatforms{"遍历平台"}
|
||||
IteratePlatforms --> |是| GetOrCreateTask["获取或创建 QueryTask"]
|
||||
GetOrCreateTask --> SetRunning["设置任务状态为 running"]
|
||||
SetRunning --> CallPlatform["调用平台适配器查询"]
|
||||
CallPlatform --> ParseResult["品牌匹配与竞争品牌检测"]
|
||||
ParseResult --> CreateRecord["创建 CitationRecord"]
|
||||
CreateRecord --> SetSuccess["设置任务状态为 success"]
|
||||
SetSuccess --> UpdateQuery["更新 Query 时间字段"]
|
||||
UpdateQuery --> IteratePlatforms
|
||||
IteratePlatforms --> |否| End(["结束"])
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/citation_engine.py:19-100](file://backend/app/workers/citation_engine.py#L19-L100)
|
||||
- [backend/app/workers/citation_engine.py:122-146](file://backend/app/workers/citation_engine.py#L122-L146)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
|
||||
### 平台适配器组件
|
||||
- KimiAdapter 与 WenxinAdapter 均继承 BasePlatformAdapter,实现 query() 与 close()。
|
||||
- 自动化流程:确保浏览器启动 → 新建上下文与页面 → 定位输入框 → 填充关键词 → 提交查询 → 等待回复稳定 → 返回文本。
|
||||
- 稳定性检测:_wait_for_response_stable() 检测文本连续多次一致后判定稳定,超时则返回当前文本。
|
||||
- 重试与指数退避:query() 内部最多三次尝试,失败时按 2^attempt 秒退避。
|
||||
- 资源清理:close() 关闭浏览器与 Playwright 实例。
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class BasePlatformAdapter {
|
||||
<<abstract>>
|
||||
+platform_name : str
|
||||
+platform_url : str
|
||||
+query(keyword) async*
|
||||
+close() async
|
||||
}
|
||||
class KimiAdapter {
|
||||
+query(keyword) async
|
||||
+close() async
|
||||
-_ensure_browser() async
|
||||
-_do_query(keyword) async
|
||||
-_wait_for_response_stable(page, timeout) async
|
||||
}
|
||||
class WenxinAdapter {
|
||||
+query(keyword) async
|
||||
+close() async
|
||||
-_ensure_browser() async
|
||||
-_do_query(keyword) async
|
||||
-_wait_for_response_stable(page, timeout) async
|
||||
}
|
||||
KimiAdapter --|> BasePlatformAdapter
|
||||
WenxinAdapter --|> BasePlatformAdapter
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/platforms/base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/kimi.py:21-48](file://backend/app/workers/platforms/kimi.py#L21-L48)
|
||||
- [backend/app/workers/platforms/kimi.py:126-197](file://backend/app/workers/platforms/kimi.py#L126-L197)
|
||||
- [backend/app/workers/platforms/wenxin.py:21-48](file://backend/app/workers/platforms/wenxin.py#L21-L48)
|
||||
- [backend/app/workers/platforms/wenxin.py:124-195](file://backend/app/workers/platforms/wenxin.py#L124-L195)
|
||||
|
||||
### 数据模型与触发条件
|
||||
- Query 模型包含用户外键、关键词、目标品牌、别名、平台列表、频率、状态与时间戳字段,并建立多处索引以优化查询。
|
||||
- QueryTask 模型用于跟踪任务执行状态,包含 scheduled_at 字段用于标识任务计划执行时间。
|
||||
- 触发条件:调度器按每小时扫描 status='active' 且 next_query_at <= now() 的记录,按每分钟扫描 status='pending' 且 scheduled_at <= one_minute_ago 的遗留任务。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-38](file://backend/app/models/query_task.py#L11-L38)
|
||||
- [backend/app/workers/scheduler.py:57-62](file://backend/app/workers/scheduler.py#L57-L62)
|
||||
- [backend/app/workers/scheduler.py:107-112](file://backend/app/workers/scheduler.py#L107-L112)
|
||||
|
||||
### 应用生命周期与手动触发
|
||||
- 应用启动:lifespan 钩子在应用启动时调用 query_scheduler.start(),在关闭时调用 shutdown()。
|
||||
- 手动触发:/api/v1/queries/{query_id}/run-now 接口调用服务层 trigger_query_now,将指定查询立即加入执行队列(由 CitationEngine 编排)。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/api/citations.py:59-77](file://backend/app/api/citations.py#L59-L77)
|
||||
- [backend/app/services/query.py:116-130](file://backend/app/services/query.py#L116-L130)
|
||||
|
||||
## 依赖关系分析
|
||||
- QueryScheduler 依赖 AsyncIOScheduler、AsyncSessionLocal 与 CitationEngine。
|
||||
- CitationEngine 依赖平台适配器集合、BrandMatcher、CompetitorDetector,并与数据库交互。
|
||||
- 平台适配器依赖 Playwright,需在运行环境中安装对应浏览器。
|
||||
- 应用通过 lifespan 钩子与调度器耦合,确保生命周期内资源正确管理。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
S["QueryScheduler"] --> A["AsyncIOScheduler"]
|
||||
S --> E["CitationEngine"]
|
||||
E --> P1["KimiAdapter"]
|
||||
E --> P2["WenxinAdapter"]
|
||||
E --> D["AsyncSessionLocal"]
|
||||
D --> M["Query/QueryTask/CitationRecord"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/database.py:6-28](file://backend/app/database.py#L6-L28)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/scheduler.py:13-20](file://backend/app/workers/scheduler.py#L13-L20)
|
||||
- [backend/app/workers/citation_engine.py:148-157](file://backend/app/workers/citation_engine.py#L148-L157)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
|
||||
## 性能考虑
|
||||
- 触发器频率:默认每小时一次主检查,每分钟一次遗留任务检查,可根据业务负载调整;过短可能导致数据库压力与平台限流风险。
|
||||
- 数据库索引:Query 和 QueryTask 模型已建立多处索引,建议结合 EXPLAIN 分析查询计划,避免全表扫描。
|
||||
- 异步并发:调度器在事件循环中调度异步任务,避免阻塞;平台适配器内部使用 Playwright,注意浏览器资源占用与并发上限。
|
||||
- 重试与退避:平台适配器内置最多三次重试与指数退避,降低瞬时失败影响。
|
||||
- 资源清理:关闭时调用 shutdown(wait=False) 与 CitationEngine.close(),确保浏览器与数据库连接及时释放。
|
||||
- 系统弹性:分钟级轮询机制提供冗余检查,确保即使主调度器出现问题,遗留任务也能得到处理。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/scheduler.py:32-38](file://backend/app/workers/scheduler.py#L32-L38)
|
||||
- [backend/app/models/query.py:50-54](file://backend/app/models/query.py#L50-L54)
|
||||
- [backend/app/models/query_task.py:36-38](file://backend/app/models/query_task.py#L36-L38)
|
||||
- [backend/app/workers/platforms/kimi.py:33-48](file://backend/app/workers/platforms/kimi.py#L33-L48)
|
||||
- [backend/app/workers/platforms/wenxin.py:33-48](file://backend/app/workers/platforms/wenxin.py#L33-L48)
|
||||
- [backend/app/workers/citation_engine.py:302-309](file://backend/app/workers/citation_engine.py#L302-L309)
|
||||
|
||||
## 故障排查指南
|
||||
- 调度器未启动:确认 lifespan 钩子已正确导入与调用 start()/shutdown()。
|
||||
- 无事件循环:_run_check() 和 _run_pending_tasks_check() 已处理无运行中事件循环的情况,若仍报错,检查事件循环状态与线程模型。
|
||||
- 数据库连接失败:检查 DATABASE_URL 配置与网络连通性。
|
||||
- 平台适配器异常:Playwright 未安装或浏览器不可用时会抛出明确异常;按提示运行安装命令。
|
||||
- 查询失败:CitationEngine 会在平台失败时记录错误并生成占位记录,便于后续重试与审计。
|
||||
- 遗留任务堆积:检查 QueryTask 表中 status='pending' 且 scheduled_at 超过1分钟的任务,确认主调度器是否正常工作。
|
||||
- 资源泄漏:确保关闭流程调用 shutdown() 与 close(),避免浏览器与数据库连接泄露。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/workers/scheduler.py:44-49](file://backend/app/workers/scheduler.py#L44-L49)
|
||||
- [backend/app/config.py](file://backend/app/config.py#L7)
|
||||
- [backend/app/workers/platforms/kimi.py:23-31](file://backend/app/workers/platforms/kimi.py#L23-L31)
|
||||
- [backend/app/workers/platforms/wenxin.py:23-31](file://backend/app/workers/platforms/wenxin.py#L23-L31)
|
||||
- [backend/app/workers/citation_engine.py:211-227](file://backend/app/workers/citation_engine.py#L211-L227)
|
||||
|
||||
## 结论
|
||||
该调度器以 AsyncIOScheduler 为核心,结合 FastAPI 生命周期管理,实现了高可用、可扩展的定时查询任务体系。通过双重检查机制(每小时主检查 + 每分钟遗留任务检查),系统提供了更强的弹性与可靠性。通过 CitationEngine 的平台编排与品牌匹配能力,以及平台适配器的稳定性保障,系统能够在异步环境下高效、可靠地执行跨平台查询任务。建议在生产环境关注触发频率、数据库索引与平台限流策略,并完善监控与告警机制。
|
||||
|
||||
## 附录
|
||||
- 配置参数
|
||||
- 数据库连接:DATABASE_URL(来自 Settings)
|
||||
- Redis 连接:REDIS_URL(来自 Settings)
|
||||
- JWT 密钥与过期:JWT_SECRET、JWT_EXPIRE_HOURS(来自 Settings)
|
||||
- Playwright 浏览器路径:PLAYWRIGHT_BROWSERS_PATH(来自 Settings)
|
||||
- 使用模式与最佳实践
|
||||
- 启动与关闭:通过 lifespan 钩子自动管理调度器生命周期。
|
||||
- 手动触发:调用 /api/v1/queries/{query_id}/run-now 接口将查询立即加入执行队列。
|
||||
- 错误隔离:平台失败不影响整体调度,CitationEngine 记录占位记录并继续处理其他任务。
|
||||
- 性能调优:根据业务负载调整触发器频率、数据库索引与平台并发上限。
|
||||
- 遗留任务处理:系统自动处理超过1分钟的遗留 pending 任务,确保任务最终被执行。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/config.py:4-16](file://backend/app/config.py#L4-L16)
|
||||
- [backend/app/api/citations.py:59-77](file://backend/app/api/citations.py#L59-L77)
|
||||
- [backend/app/workers/citation_engine.py:211-227](file://backend/app/workers/citation_engine.py#L211-L227)
|
||||
|
|
@ -0,0 +1,369 @@
|
|||
# API客户端
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [frontend/lib/api.ts](file://frontend/lib/api.ts)
|
||||
- [frontend/lib/auth.ts](file://frontend/lib/auth.ts)
|
||||
- [frontend/app/(auth)/login/page.tsx](file://frontend/app/(auth)/login/page.tsx)
|
||||
- [frontend/app/(dashboard)/dashboard/queries/page.tsx](file://frontend/app/(dashboard)/dashboard/queries/page.tsx)
|
||||
- [frontend/components/providers.tsx](file://frontend/components/providers.tsx)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/api/deps.py](file://backend/app/api/deps.py)
|
||||
- [backend/app/services/auth.py](file://backend/app/services/auth.py)
|
||||
- [backend/app/schemas/auth.py](file://backend/app/schemas/auth.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件面向前端工程师与全栈开发者,系统性阐述本项目的API客户端实现与REST封装设计。内容涵盖:
|
||||
- 请求配置与响应处理机制
|
||||
- 错误处理、重试机制与超时控制
|
||||
- 请求/响应拦截器与中间件模式
|
||||
- API版本管理、URL构建与参数序列化
|
||||
- 认证头自动添加、状态码处理与错误消息格式化
|
||||
- 最佳实践、性能优化与调试技巧
|
||||
|
||||
## 项目结构
|
||||
前端通过统一的API模块封装所有后端接口,采用按功能域分层组织:
|
||||
- 前端API模块:集中定义基础URL、通用请求函数与各业务域方法
|
||||
- 认证模块:集成NextAuth,负责登录态与JWT令牌流转
|
||||
- 页面组件:在需要鉴权的页面中调用API模块进行数据读写
|
||||
- 后端FastAPI:提供/v1版本的REST接口,统一CORS与路由前缀
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "前端"
|
||||
UI["页面组件<br/>登录页/查询页"]
|
||||
API["API模块<br/>api.ts"]
|
||||
AUTH["认证模块<br/>auth.ts"]
|
||||
end
|
||||
subgraph "后端"
|
||||
MAIN["应用入口<br/>main.py"]
|
||||
CORS["CORS中间件"]
|
||||
ROUTER_AUTH["路由: /api/v1/auth/*"]
|
||||
ROUTER_QUERIES["路由: /api/v1/queries/*"]
|
||||
ROUTER_CITATIONS["路由: /api/v1/citations/*"]
|
||||
ROUTER_REPORTS["路由: /api/v1/reports/*"]
|
||||
end
|
||||
UI --> API
|
||||
AUTH --> API
|
||||
API --> MAIN
|
||||
MAIN --> CORS
|
||||
MAIN --> ROUTER_AUTH
|
||||
MAIN --> ROUTER_QUERIES
|
||||
MAIN --> ROUTER_CITATIONS
|
||||
MAIN --> ROUTER_REPORTS
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [frontend/lib/auth.ts:1-56](file://frontend/lib/auth.ts#L1-L56)
|
||||
- [backend/app/main.py:24-42](file://backend/app/main.py#L24-L42)
|
||||
|
||||
章节来源
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [backend/app/main.py:24-42](file://backend/app/main.py#L24-L42)
|
||||
|
||||
## 核心组件
|
||||
- 基础URL与请求封装
|
||||
- 前端通过NEXT_PUBLIC_API_URL环境变量配置后端基地址,默认本地8000端口
|
||||
- 统一的fetchWithAuth函数负责:
|
||||
- 自动注入Content-Type与Authorization头(若提供token)
|
||||
- 检查res.ok并解析JSON;非2xx时抛出带错误详情的异常
|
||||
- 返回解析后的JSON数据
|
||||
- 业务域API对象
|
||||
- 提供auth、queries、citations、reports四个命名空间的方法
|
||||
- 所有方法均以/v1为版本前缀,遵循REST风格
|
||||
- 支持查询参数拼接与JSON体序列化
|
||||
|
||||
章节来源
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
## 架构总览
|
||||
下图展示从前端调用到后端路由的完整链路,包括认证中间件与CORS策略。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as "客户端浏览器"
|
||||
participant F as "前端API模块<br/>api.ts"
|
||||
participant S as "NextAuth会话<br/>auth.ts"
|
||||
participant A as "后端应用<br/>main.py"
|
||||
participant D as "依赖注入<br/>deps.py"
|
||||
participant H as "认证路由<br/>auth.py"
|
||||
participant Q as "查询路由<br/>queries.py"
|
||||
C->>S : "触发登录/获取会话"
|
||||
S->>F : "调用api.auth.login(...)"
|
||||
F->>A : "POST /api/v1/auth/login"
|
||||
A->>D : "OAuth2密码流校验"
|
||||
D->>H : "调用服务层验证用户"
|
||||
H-->>A : "返回JWT令牌"
|
||||
A-->>F : "返回TokenResponse(JSON)"
|
||||
F-->>S : "返回access_token"
|
||||
S-->>C : "更新会话状态"
|
||||
C->>F : "调用api.queries.list(access_token)"
|
||||
F->>A : "GET /api/v1/queries"
|
||||
A->>D : "校验JWT并解析当前用户"
|
||||
D-->>A : "返回当前用户"
|
||||
A->>Q : "调用查询服务"
|
||||
Q-->>A : "返回查询列表"
|
||||
A-->>F : "返回查询列表(JSON)"
|
||||
F-->>C : "返回数据"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [frontend/lib/api.ts:23-57](file://frontend/lib/api.ts#L23-L57)
|
||||
- [frontend/lib/auth.ts:5-55](file://frontend/lib/auth.ts#L5-L55)
|
||||
- [backend/app/main.py:38-42](file://backend/app/main.py#L38-L42)
|
||||
- [backend/app/api/deps.py:16-42](file://backend/app/api/deps.py#L16-L42)
|
||||
- [backend/app/api/auth.py:22-37](file://backend/app/api/auth.py#L22-L37)
|
||||
- [backend/app/api/queries.py:15-23](file://backend/app/api/queries.py#L15-L23)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 前端API模块(请求封装与拦截器)
|
||||
- 版本与URL构建
|
||||
- 基于NEXT_PUBLIC_API_URL拼接/v1路径,确保版本一致性
|
||||
- 查询参数通过字符串拼接传入,如citations/list(params)
|
||||
- 请求拦截器
|
||||
- 在fetchWithAuth内统一设置Content-Type与Authorization头
|
||||
- 支持外部传入自定义headers并合并
|
||||
- 响应拦截器
|
||||
- res.ok检查与JSON解析
|
||||
- 非2xx时从响应体提取detail或回退为HTTP状态描述,并抛出错误
|
||||
- 中间件模式
|
||||
- 通过API对象的命名空间体现:auth/queries/citations/reports
|
||||
- 每个方法内部复用fetchWithAuth,形成“中间件式”的统一处理链
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["进入 fetchWithAuth"]) --> MergeHeaders["合并默认与自定义头部"]
|
||||
MergeHeaders --> AddAuth{"是否提供token?"}
|
||||
AddAuth --> |是| SetBearer["设置Authorization: Bearer ..."]
|
||||
AddAuth --> |否| SkipAuth["跳过认证头"]
|
||||
SetBearer --> Fetch["执行fetch(拼接API_BASE + url)"]
|
||||
SkipAuth --> Fetch
|
||||
Fetch --> Ok{"res.ok?"}
|
||||
Ok --> |否| ParseErr["尝试解析JSON错误体"]
|
||||
ParseErr --> Throw["抛出错误(detail或HTTP状态)"]
|
||||
Ok --> |是| ParseJSON["res.json()"]
|
||||
ParseJSON --> Return["返回解析结果"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [frontend/lib/api.ts:3-21](file://frontend/lib/api.ts#L3-L21)
|
||||
|
||||
章节来源
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
### 认证流程与会话管理
|
||||
- 登录页通过NextAuth的credentials提供者发起登录
|
||||
- 授权回调中,若返回access_token则写入JWT与会话
|
||||
- 后续页面通过useSession获取accessToken并传递给API模块
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant L as "登录页<br/>login/page.tsx"
|
||||
participant N as "NextAuth<br/>auth.ts"
|
||||
participant A as "后端认证<br/>auth.py"
|
||||
participant S as "会话存储"
|
||||
L->>N : "signIn('credentials', {email,password})"
|
||||
N->>A : "POST /api/v1/auth/login"
|
||||
A-->>N : "TokenResponse(JSON)"
|
||||
N->>S : "写入JWT与用户信息"
|
||||
N-->>L : "登录成功/失败"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [frontend/app/(auth)/login/page.tsx:26-42](file://frontend/app/(auth)/login/page.tsx#L26-L42)
|
||||
- [frontend/lib/auth.ts:5-55](file://frontend/lib/auth.ts#L5-L55)
|
||||
- [backend/app/api/auth.py:22-37](file://backend/app/api/auth.py#L22-L37)
|
||||
|
||||
章节来源
|
||||
- [frontend/app/(auth)/login/page.tsx:1-93](file://frontend/app/(auth)/login/page.tsx#L1-L93)
|
||||
- [frontend/lib/auth.ts:1-56](file://frontend/lib/auth.ts#L1-L56)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
|
||||
### 查询管理页面(鉴权与数据加载)
|
||||
- 仅当session.accessToken存在时才加载数据,避免未授权请求
|
||||
- 调用api.queries.list获取列表,错误时显示友好提示
|
||||
- 新增/编辑/删除均通过对应API方法完成,最终刷新列表
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant P as "查询页<br/>queries/page.tsx"
|
||||
participant S as "会话<br/>useSession"
|
||||
participant A as "API模块<br/>api.ts"
|
||||
participant R as "后端路由<br/>queries.py"
|
||||
P->>S : "读取accessToken"
|
||||
S-->>P : "返回token"
|
||||
P->>A : "api.queries.list(token)"
|
||||
A->>R : "GET /api/v1/queries"
|
||||
R-->>A : "返回查询列表(JSON)"
|
||||
A-->>P : "返回数据"
|
||||
P-->>P : "渲染表格/错误处理"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [frontend/app/(dashboard)/dashboard/queries/page.tsx:96-113](file://frontend/app/(dashboard)/dashboard/queries/page.tsx#L96-L113)
|
||||
- [frontend/lib/api.ts:37-45](file://frontend/lib/api.ts#L37-L45)
|
||||
- [backend/app/api/queries.py:15-23](file://backend/app/api/queries.py#L15-L23)
|
||||
|
||||
章节来源
|
||||
- [frontend/app/(dashboard)/dashboard/queries/page.tsx:1-461](file://frontend/app/(dashboard)/dashboard/queries/page.tsx#L1-L461)
|
||||
- [frontend/lib/api.ts:23-57](file://frontend/lib/api.ts#L23-L57)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
|
||||
### 后端路由与中间件
|
||||
- 应用入口统一注册CORS中间件与路由前缀
|
||||
- 认证与查询等路由分别定义,使用Depends注入数据库与当前用户
|
||||
- 依赖模块OAuth2PasswordBearer配合JWT校验,确保受保护资源访问
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
M["应用入口<br/>main.py"] --> C["CORS中间件"]
|
||||
M --> R1["/api/v1/auth/*"]
|
||||
M --> R2["/api/v1/queries/*"]
|
||||
M --> R3["/api/v1/citations/*"]
|
||||
M --> R4["/api/v1/reports/*"]
|
||||
R2 --> D["依赖注入<br/>deps.py"]
|
||||
D --> U["当前用户校验"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:30-42](file://backend/app/main.py#L30-L42)
|
||||
- [backend/app/api/deps.py:13-42](file://backend/app/api/deps.py#L13-L42)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/api/deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
|
||||
## 依赖分析
|
||||
- 前端依赖
|
||||
- Next.js、NextAuth用于SSR/CSR会话管理
|
||||
- TypeScript类型约束保证API调用参数安全
|
||||
- 后端依赖
|
||||
- FastAPI提供路由与中间件能力
|
||||
- SQLAlchemy异步ORM、Redis、APScheduler用于数据与任务
|
||||
- Pydantic/JWT用于数据校验与令牌签发
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "前端"
|
||||
N["Next.js"]
|
||||
NA["NextAuth"]
|
||||
TS["TypeScript"]
|
||||
API["API模块"]
|
||||
end
|
||||
subgraph "后端"
|
||||
FA["FastAPI"]
|
||||
SA["SQLAlchemy"]
|
||||
RD["Redis"]
|
||||
AP["APScheduler"]
|
||||
PJ["Pydantic/JWT"]
|
||||
end
|
||||
API --> FA
|
||||
NA --> FA
|
||||
FA --> SA
|
||||
FA --> RD
|
||||
FA --> AP
|
||||
FA --> PJ
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [frontend/package.json:11-27](file://frontend/package.json#L11-L27)
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
|
||||
章节来源
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
|
||||
## 性能考虑
|
||||
- 请求合并与去抖
|
||||
- 对频繁触发的列表加载,可在组件侧引入防抖/节流,减少不必要的网络请求
|
||||
- 缓存策略
|
||||
- 对只读列表数据可采用内存缓存或React Query等缓存库,设置合理的TTL与失效策略
|
||||
- 并发控制
|
||||
- 使用信号量限制并发请求数,避免雪崩效应
|
||||
- 传输优化
|
||||
- 尽量使用JSON序列化,避免大对象重复传输;必要时启用gzip压缩(由后端CORS与服务器配置决定)
|
||||
- 会话与令牌
|
||||
- 利用JWT短小特性,避免携带冗余信息;结合后端配置合理设置过期时间
|
||||
|
||||
## 故障排查指南
|
||||
- 常见错误与定位
|
||||
- 401未授权:检查Authorization头是否正确设置;确认JWT未过期
|
||||
- 403禁止访问:核对当前用户权限与资源所有权
|
||||
- 404未找到:核对URL路径与UUID参数
|
||||
- 500服务器错误:查看后端日志与数据库连接状态
|
||||
- 错误处理机制
|
||||
- 前端:fetchWithAuth在非2xx时解析响应体中的detail字段,作为用户可见错误信息
|
||||
- 后端:路由层使用HTTPException返回标准错误体,包含状态码与详细信息
|
||||
- 调试技巧
|
||||
- 开启浏览器网络面板,观察请求头与响应体
|
||||
- 在NextAuth回调中打印token与用户信息,确认会话状态
|
||||
- 在API模块增加日志开关,输出URL、方法与关键参数
|
||||
- 使用curl或Postman直接调用后端接口,排除前端逻辑干扰
|
||||
|
||||
章节来源
|
||||
- [frontend/lib/api.ts:16-20](file://frontend/lib/api.ts#L16-L20)
|
||||
- [backend/app/api/auth.py:25-30](file://backend/app/api/auth.py#L25-L30)
|
||||
- [backend/app/api/queries.py:49-53](file://backend/app/api/queries.py#L49-L53)
|
||||
|
||||
## 结论
|
||||
本项目的API客户端采用简洁而一致的设计:统一的请求封装、明确的版本前缀、基于JWT的认证与严格的错误处理。通过NextAuth与FastAPI的协作,实现了前后端一致的会话与路由体验。建议在生产环境中进一步增强超时控制、重试机制与缓存策略,以提升稳定性与用户体验。
|
||||
|
||||
## 附录
|
||||
|
||||
### API版本管理与URL构建
|
||||
- 版本前缀:/api/v1
|
||||
- 路由前缀:/api/v1/{domain}
|
||||
- 参数序列化:
|
||||
- 查询参数:字符串拼接
|
||||
- 请求体:JSON.stringify
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:38-42](file://backend/app/main.py#L38-L42)
|
||||
- [frontend/lib/api.ts:47-56](file://frontend/lib/api.ts#L47-L56)
|
||||
|
||||
### 认证头自动添加与状态码处理
|
||||
- 自动添加:Authorization: Bearer {token}
|
||||
- 状态码处理:res.ok检查;非2xx抛错
|
||||
- 错误消息格式化:优先使用响应体中的detail字段
|
||||
|
||||
章节来源
|
||||
- [frontend/lib/api.ts:8-20](file://frontend/lib/api.ts#L8-L20)
|
||||
- [backend/app/api/auth.py:22-37](file://backend/app/api/auth.py#L22-L37)
|
||||
|
||||
### 请求拦截器与中间件模式
|
||||
- 请求拦截器:在fetchWithAuth内统一设置头部与合并自定义头
|
||||
- 响应拦截器:统一解析JSON与错误处理
|
||||
- 中间件模式:API对象按领域划分,每个方法复用统一处理链
|
||||
|
||||
章节来源
|
||||
- [frontend/lib/api.ts:3-21](file://frontend/lib/api.ts#L3-L21)
|
||||
- [frontend/lib/api.ts:23-57](file://frontend/lib/api.ts#L23-L57)
|
||||
|
||||
### 最佳实践与性能优化
|
||||
- 会话驱动:仅在accessToken存在时发起受保护请求
|
||||
- 错误显式:在UI层展示用户可理解的错误信息
|
||||
- 并发控制:限制同时进行的请求数量
|
||||
- 缓存策略:对只读数据进行本地缓存与失效管理
|
||||
|
||||
章节来源
|
||||
- [frontend/app/(dashboard)/dashboard/queries/page.tsx:96-113](file://frontend/app/(dashboard)/dashboard/queries/page.tsx#L96-L113)
|
||||
- [backend/app/config.py:7-10](file://backend/app/config.py#L7-L10)
|
||||
|
|
@ -0,0 +1,416 @@
|
|||
# Next.js应用配置
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [package.json](file://frontend/package.json)
|
||||
- [next.config.mjs](file://frontend/next.config.mjs)
|
||||
- [tailwind.config.ts](file://frontend/tailwind.config.ts)
|
||||
- [tsconfig.json](file://frontend/tsconfig.json)
|
||||
- [postcss.config.mjs](file://frontend/postcss.config.mjs)
|
||||
- [app/layout.tsx](file://frontend/app/layout.tsx)
|
||||
- [app/globals.css](file://frontend/app/globals.css)
|
||||
- [.eslintrc.json](file://frontend/.eslintrc.json)
|
||||
- [components/providers.tsx](file://frontend/components/providers.tsx)
|
||||
- [app/(auth)/layout.tsx](file://frontend/app/(auth)/layout.tsx)
|
||||
- [app/(dashboard)/layout.tsx](file://frontend/app/(dashboard)/layout.tsx)
|
||||
- [lib/auth.ts](file://frontend/lib/auth.ts)
|
||||
- [types/next-auth.d.ts](file://frontend/types/next-auth.d.ts)
|
||||
- [components/ui/button.tsx](file://frontend/components/ui/button.tsx)
|
||||
- [components/charts/platform-chart.tsx](file://frontend/components/charts/platform-chart.tsx)
|
||||
- [components/layout/header.tsx](file://frontend/components/layout/header.tsx)
|
||||
- [components/layout/sidebar.tsx](file://frontend/components/layout/sidebar.tsx)
|
||||
- [next-env.d.ts](file://frontend/next-env.d.ts)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构概览](#架构概览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖关系分析](#依赖关系分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排除指南](#故障排除指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件为基于Next.js 14的应用配置详细文档,涵盖App Router页面组织结构、路由规则、嵌套路由与页面布局设计;全局样式配置、字体系统与主题定制;Tailwind CSS配置选项、自定义样式与响应式设计原则;TypeScript配置、类型定义与开发工具设置;性能优化配置、构建优化与生产环境部署设置;以及配置最佳实践与常见问题解决方案。
|
||||
|
||||
## 项目结构
|
||||
前端代码位于`frontend/`目录,采用Next.js 14 App Router结构,使用TypeScript与Tailwind CSS进行样式管理,并通过Next-Auth实现认证功能。项目采用分层组织:页面路由在`app/`目录下,UI组件在`components/`目录,业务逻辑在`lib/`目录,类型定义在`types/`目录。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "前端应用"
|
||||
A["app/ 根布局<br/>app/layout.tsx"]
|
||||
B["全局样式<br/>app/globals.css"]
|
||||
C["认证布局<br/>app/(auth)/layout.tsx"]
|
||||
D["仪表盘布局<br/>app/(dashboard)/layout.tsx"]
|
||||
E["UI组件<br/>components/ui/*"]
|
||||
F["布局组件<br/>components/layout/*"]
|
||||
G["认证配置<br/>lib/auth.ts"]
|
||||
H["类型定义<br/>types/next-auth.d.ts"]
|
||||
I["样式配置<br/>tailwind.config.ts"]
|
||||
J["构建配置<br/>next.config.mjs"]
|
||||
K["TypeScript配置<br/>tsconfig.json"]
|
||||
L["PostCSS配置<br/>postcss.config.mjs"]
|
||||
M["ESLint配置<br/>.eslintrc.json"]
|
||||
end
|
||||
A --> B
|
||||
A --> C
|
||||
A --> D
|
||||
D --> F
|
||||
D --> G
|
||||
E --> B
|
||||
F --> B
|
||||
I --> B
|
||||
J --> A
|
||||
K --> A
|
||||
L --> I
|
||||
M --> A
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [app/globals.css:1-64](file://frontend/app/globals.css#L1-L64)
|
||||
- [app/(auth)/layout.tsx](file://frontend/app/(auth)/layout.tsx#L1-L12)
|
||||
- [app/(dashboard)/layout.tsx](file://frontend/app/(dashboard)/layout.tsx#L1-L27)
|
||||
- [tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
- [next.config.mjs:1-5](file://frontend/next.config.mjs#L1-L5)
|
||||
- [tsconfig.json:1-27](file://frontend/tsconfig.json#L1-L27)
|
||||
- [postcss.config.mjs:1-9](file://frontend/postcss.config.mjs#L1-L9)
|
||||
- [.eslintrc.json:1-4](file://frontend/.eslintrc.json#L1-L4)
|
||||
|
||||
**章节来源**
|
||||
- [package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [next.config.mjs:1-5](file://frontend/next.config.mjs#L1-L5)
|
||||
- [tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
- [tsconfig.json:1-27](file://frontend/tsconfig.json#L1-L27)
|
||||
- [postcss.config.mjs:1-9](file://frontend/postcss.config.mjs#L1-L9)
|
||||
- [.eslintrc.json:1-4](file://frontend/.eslintrc.json#L1-L4)
|
||||
|
||||
## 核心组件
|
||||
- App Router页面组织:采用`(group)`命名分组实现嵌套路由,如`(auth)`用于认证相关页面,`(dashboard)`用于受保护的仪表盘页面。
|
||||
- 全局布局:根布局负责注入字体变量、全局样式与会话提供者。
|
||||
- 认证系统:基于Next-Auth的Credentials Provider,支持JWT回调与会话管理。
|
||||
- UI组件:基于Radix UI与class-variance-authority的可变性组件库,统一按钮等基础UI元素。
|
||||
- 图表组件:基于Recharts的响应式图表组件,支持主题色与交互提示。
|
||||
|
||||
**章节来源**
|
||||
- [app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
- [lib/auth.ts:1-56](file://frontend/lib/auth.ts#L1-L56)
|
||||
- [components/ui/button.tsx:1-57](file://frontend/components/ui/button.tsx#L1-L57)
|
||||
- [components/charts/platform-chart.tsx:1-68](file://frontend/components/charts/platform-chart.tsx#L1-L68)
|
||||
|
||||
## 架构概览
|
||||
应用采用分层架构:UI层(App Router页面与组件)、业务层(认证与API封装)、样式层(Tailwind CSS与CSS变量)。认证流程通过Next-Auth在服务端验证后,客户端使用SessionProvider共享会话状态。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "客户端"
|
||||
A["App Router 页面<br/>app/(dashboard)/*"]
|
||||
B["布局组件<br/>components/layout/*"]
|
||||
C["UI 组件<br/>components/ui/*"]
|
||||
D["会话提供者<br/>components/providers.tsx"]
|
||||
end
|
||||
subgraph "认证层"
|
||||
E["Next-Auth 配置<br/>lib/auth.ts"]
|
||||
F["类型扩展<br/>types/next-auth.d.ts"]
|
||||
end
|
||||
subgraph "样式层"
|
||||
G["全局样式<br/>app/globals.css"]
|
||||
H["Tailwind 配置<br/>tailwind.config.ts"]
|
||||
I["PostCSS 配置<br/>postcss.config.mjs"]
|
||||
end
|
||||
subgraph "服务端"
|
||||
J["Next.js 服务端渲染<br/>next.config.mjs"]
|
||||
K["TypeScript 编译<br/>tsconfig.json"]
|
||||
end
|
||||
A --> B
|
||||
A --> C
|
||||
A --> D
|
||||
D --> E
|
||||
E --> F
|
||||
C --> G
|
||||
G --> H
|
||||
H --> I
|
||||
J --> K
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [app/(dashboard)/layout.tsx](file://frontend/app/(dashboard)/layout.tsx#L1-L27)
|
||||
- [components/layout/header.tsx:1-30](file://frontend/components/layout/header.tsx#L1-L30)
|
||||
- [components/layout/sidebar.tsx:1-54](file://frontend/components/layout/sidebar.tsx#L1-L54)
|
||||
- [components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
- [lib/auth.ts:1-56](file://frontend/lib/auth.ts#L1-L56)
|
||||
- [types/next-auth.d.ts:1-26](file://frontend/types/next-auth.d.ts#L1-L26)
|
||||
- [app/globals.css:1-64](file://frontend/app/globals.css#L1-L64)
|
||||
- [tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
- [postcss.config.mjs:1-9](file://frontend/postcss.config.mjs#L1-L9)
|
||||
- [next.config.mjs:1-5](file://frontend/next.config.mjs#L1-L5)
|
||||
- [tsconfig.json:1-27](file://frontend/tsconfig.json#L1-L27)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### App Router页面组织与路由规则
|
||||
- 分组路由:`(auth)`与`(dashboard)`作为路由分组,实现嵌套路由与共享布局。
|
||||
- 布局继承:各分组拥有独立布局,根布局负责全局样式与字体注入。
|
||||
- 路由守卫:仪表盘布局在服务端检查会话,未登录自动重定向至登录页。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant U as "用户浏览器"
|
||||
participant R as "路由系统"
|
||||
participant DL as "仪表盘布局"
|
||||
participant S as "会话服务"
|
||||
participant A as "认证配置"
|
||||
U->>R : 请求 /dashboard
|
||||
R->>DL : 加载仪表盘布局
|
||||
DL->>S : 获取服务器会话
|
||||
S-->>DL : 返回会话状态
|
||||
alt 无会话
|
||||
DL->>U : 重定向到 /login
|
||||
else 有会话
|
||||
DL->>U : 渲染仪表盘页面
|
||||
end
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [app/(dashboard)/layout.tsx](file://frontend/app/(dashboard)/layout.tsx#L1-L27)
|
||||
- [lib/auth.ts:1-56](file://frontend/lib/auth.ts#L1-L56)
|
||||
|
||||
**章节来源**
|
||||
- [app/(auth)/layout.tsx](file://frontend/app/(auth)/layout.tsx#L1-L12)
|
||||
- [app/(dashboard)/layout.tsx](file://frontend/app/(dashboard)/layout.tsx#L1-L27)
|
||||
|
||||
### 全局样式配置与主题定制
|
||||
- CSS变量:通过`:root`与`.dark`类定义主题变量,支持明暗主题切换。
|
||||
- Tailwind扩展:在Tailwind配置中扩展颜色与圆角变量,与CSS变量保持一致。
|
||||
- 字体系统:使用Next.js本地字体加载器注入变量字体,提升性能与SEO。
|
||||
- 基础层与工具层:通过`@layer base`与`@layer utilities`统一基础样式与实用工具类。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["样式初始化"]) --> CSSVars["定义CSS变量<br/>:root 与 .dark"]
|
||||
CSSVars --> TailwindExt["Tailwind主题扩展<br/>颜色与圆角变量"]
|
||||
TailwindExt --> Fonts["本地字体变量注入<br/>Geist Sans/Mono"]
|
||||
Fonts --> BaseLayer["@layer base<br/>统一边框与背景"]
|
||||
BaseLayer --> UtilsLayer["@layer utilities<br/>文本均衡等工具类"]
|
||||
UtilsLayer --> End(["完成"])
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [app/globals.css:1-64](file://frontend/app/globals.css#L1-L64)
|
||||
- [tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
- [app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
|
||||
**章节来源**
|
||||
- [app/globals.css:1-64](file://frontend/app/globals.css#L1-L64)
|
||||
- [tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
- [app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
|
||||
### Tailwind CSS配置与自定义样式
|
||||
- 内容扫描:配置内容路径覆盖`pages/`、`components/`与`app/`,确保按需生成样式。
|
||||
- 暗色模式:启用类选择器暗色模式,与CSS变量配合实现主题切换。
|
||||
- 插件:集成`tailwindcss-animate`插件,提供动画相关的工具类。
|
||||
- 自定义圆角:通过CSS变量控制圆角大小,适配不同组件尺寸。
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class TailwindConfig {
|
||||
+string darkMode
|
||||
+string[] content
|
||||
+Theme theme
|
||||
+Plugin[] plugins
|
||||
}
|
||||
class Theme {
|
||||
+extend Extend
|
||||
}
|
||||
class Extend {
|
||||
+Record~string,string~ colors
|
||||
+Record~string,string~ borderRadius
|
||||
}
|
||||
TailwindConfig --> Theme
|
||||
Theme --> Extend
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
|
||||
**章节来源**
|
||||
- [tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
|
||||
### TypeScript配置与类型定义
|
||||
- 编译选项:启用严格模式、模块解析为bundler、路径映射`@/*`指向项目根目录。
|
||||
- 插件:集成Next.js内置TypeScript插件以支持App Router类型推断。
|
||||
- 类型扩展:扩展Next-Auth的Session与JWT类型,确保认证数据的类型安全。
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class TSConfig {
|
||||
+string[] lib
|
||||
+boolean strict
|
||||
+boolean esModuleInterop
|
||||
+string module
|
||||
+string moduleResolution
|
||||
+Record~string,string[]~ paths
|
||||
+Plugin[] plugins
|
||||
}
|
||||
class NextAuthTypes {
|
||||
+Session
|
||||
+User
|
||||
+JWT
|
||||
}
|
||||
TSConfig --> NextAuthTypes : "类型扩展"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [tsconfig.json:1-27](file://frontend/tsconfig.json#L1-L27)
|
||||
- [types/next-auth.d.ts:1-26](file://frontend/types/next-auth.d.ts#L1-L26)
|
||||
|
||||
**章节来源**
|
||||
- [tsconfig.json:1-27](file://frontend/tsconfig.json#L1-L27)
|
||||
- [types/next-auth.d.ts:1-26](file://frontend/types/next-auth.d.ts#L1-L26)
|
||||
- [next-env.d.ts:1-6](file://frontend/next-env.d.ts#L1-L6)
|
||||
|
||||
### 开发工具与代码质量
|
||||
- ESLint:继承Next.js核心Web Vitals与TypeScript规则,保证代码质量与性能指标。
|
||||
- PostCSS:仅启用Tailwind CSS处理器,简化构建流程。
|
||||
- 包管理:使用npm脚本启动开发服务器、构建与启动生产服务。
|
||||
|
||||
**章节来源**
|
||||
- [.eslintrc.json:1-4](file://frontend/.eslintrc.json#L1-L4)
|
||||
- [postcss.config.mjs:1-9](file://frontend/postcss.config.mjs#L1-L9)
|
||||
- [package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
|
||||
### 认证与会话管理
|
||||
- 提供者:使用Credentials Provider,从后端API获取访问令牌并注入会话。
|
||||
- 回调:JWT与Session回调同步accessToken与用户ID,确保客户端可用。
|
||||
- 客户端:通过SessionProvider在客户端共享认证状态,Header组件展示用户信息并支持登出。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as "客户端"
|
||||
participant P as "SessionProvider"
|
||||
participant N as "Next-Auth"
|
||||
participant B as "后端API"
|
||||
C->>P : 渲染应用
|
||||
P->>N : 初始化会话
|
||||
N->>B : 验证访问令牌
|
||||
B-->>N : 返回用户信息
|
||||
N-->>P : 注入会话数据
|
||||
P-->>C : 提供认证上下文
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
- [lib/auth.ts:1-56](file://frontend/lib/auth.ts#L1-L56)
|
||||
- [types/next-auth.d.ts:1-26](file://frontend/types/next-auth.d.ts#L1-L26)
|
||||
|
||||
**章节来源**
|
||||
- [components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
- [lib/auth.ts:1-56](file://frontend/lib/auth.ts#L1-L56)
|
||||
- [types/next-auth.d.ts:1-26](file://frontend/types/next-auth.d.ts#L1-L26)
|
||||
- [components/layout/header.tsx:1-30](file://frontend/components/layout/header.tsx#L1-L30)
|
||||
|
||||
### UI组件与响应式设计
|
||||
- 可变性组件:Button组件通过class-variance-authority定义变体与尺寸,结合clsx与cn组合类名。
|
||||
- 响应式图表:PlatformChart使用Recharts与ResponsiveContainer实现自适应宽度与高度,支持主题色与工具提示。
|
||||
- 布局组件:Header与Sidebar分别处理头部信息与导航栏,结合Tailwind工具类实现响应式布局。
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class Button {
|
||||
+variant : "default|destructive|outline|secondary|ghost|link"
|
||||
+size : "default|sm|lg|icon"
|
||||
+asChild : boolean
|
||||
+className : string
|
||||
}
|
||||
class PlatformChart {
|
||||
+data : Record~string, Stats~
|
||||
+render() : JSX.Element
|
||||
}
|
||||
class Header {
|
||||
+render() : JSX.Element
|
||||
}
|
||||
class Sidebar {
|
||||
+render() : JSX.Element
|
||||
}
|
||||
Button --> "使用" cn
|
||||
PlatformChart --> "使用" Recharts
|
||||
Header --> Button
|
||||
Sidebar --> Link
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [components/ui/button.tsx:1-57](file://frontend/components/ui/button.tsx#L1-L57)
|
||||
- [components/charts/platform-chart.tsx:1-68](file://frontend/components/charts/platform-chart.tsx#L1-L68)
|
||||
- [components/layout/header.tsx:1-30](file://frontend/components/layout/header.tsx#L1-L30)
|
||||
- [components/layout/sidebar.tsx:1-54](file://frontend/components/layout/sidebar.tsx#L1-L54)
|
||||
|
||||
**章节来源**
|
||||
- [components/ui/button.tsx:1-57](file://frontend/components/ui/button.tsx#L1-L57)
|
||||
- [components/charts/platform-chart.tsx:1-68](file://frontend/components/charts/platform-chart.tsx#L1-L68)
|
||||
- [components/layout/header.tsx:1-30](file://frontend/components/layout/header.tsx#L1-L30)
|
||||
- [components/layout/sidebar.tsx:1-54](file://frontend/components/layout/sidebar.tsx#L1-L54)
|
||||
|
||||
## 依赖关系分析
|
||||
- 运行时依赖:Next.js 14、React 18、Next-Auth、Radix UI组件库、Recharts、Tailwind CSS等。
|
||||
- 开发依赖:TypeScript、ESLint、Tailwind CSS、PostCSS等。
|
||||
- 构建链路:TypeScript编译 → Next.js构建 → PostCSS处理 → Tailwind CSS生成样式 → 浏览器运行。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
A["package.json 依赖"] --> B["Next.js 运行时"]
|
||||
A --> C["React 生态"]
|
||||
A --> D["样式与UI库"]
|
||||
E["tsconfig.json"] --> F["TypeScript 编译"]
|
||||
G["postcss.config.mjs"] --> H["PostCSS 处理"]
|
||||
I["tailwind.config.ts"] --> J["Tailwind CSS"]
|
||||
F --> B
|
||||
H --> J
|
||||
J --> B
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [tsconfig.json:1-27](file://frontend/tsconfig.json#L1-L27)
|
||||
- [postcss.config.mjs:1-9](file://frontend/postcss.config.mjs#L1-L9)
|
||||
- [tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
|
||||
**章节来源**
|
||||
- [package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [tsconfig.json:1-27](file://frontend/tsconfig.json#L1-L27)
|
||||
- [postcss.config.mjs:1-9](file://frontend/postcss.config.mjs#L1-L9)
|
||||
- [tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
|
||||
## 性能考虑
|
||||
- 按需样式:Tailwind内容扫描仅覆盖实际使用的目录,减少未使用样式的生成。
|
||||
- 字体优化:使用本地字体变量注入,避免网络字体阻塞,提升首屏渲染性能。
|
||||
- 组件懒加载:利用Next.js的路由与组件特性,结合客户端组件按需加载。
|
||||
- 构建优化:启用严格模式与增量编译,缩短TypeScript编译时间。
|
||||
- 暗色模式:通过CSS变量与类选择器实现,避免运行时样式计算开销。
|
||||
|
||||
## 故障排除指南
|
||||
- 登录后无法进入仪表盘:检查服务端会话获取逻辑与重定向配置,确认认证回调正确注入用户ID与访问令牌。
|
||||
- 样式不生效:确认Tailwind内容扫描路径包含当前页面与组件目录,清理构建缓存后重新构建。
|
||||
- 字体加载异常:检查本地字体文件路径与变量名,确保字体文件存在于指定位置。
|
||||
- TypeScript类型错误:根据扩展的Next-Auth类型定义修正Session与JWT接口,确保与后端返回结构一致。
|
||||
- ESLint报错:遵循Next.js核心Web Vitals与TypeScript规则,修复不合规代码或调整规则配置。
|
||||
|
||||
**章节来源**
|
||||
- [lib/auth.ts:1-56](file://frontend/lib/auth.ts#L1-L56)
|
||||
- [tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
- [app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [types/next-auth.d.ts:1-26](file://frontend/types/next-auth.d.ts#L1-L26)
|
||||
- [.eslintrc.json:1-4](file://frontend/.eslintrc.json#L1-L4)
|
||||
|
||||
## 结论
|
||||
本项目基于Next.js 14实现了清晰的App Router页面组织、完善的认证体系与现代化的样式架构。通过Tailwind CSS与TypeScript的结合,提供了良好的开发体验与可维护性。建议在生产环境中进一步完善性能监控、缓存策略与安全配置,以获得更佳的用户体验。
|
||||
|
||||
## 附录
|
||||
- 部署建议:使用Next.js默认构建与静态导出能力,结合CDN加速与HTTPS配置。
|
||||
- 版本兼容:确保Node.js版本与Next.js 14兼容,定期更新依赖以获得最新安全补丁。
|
||||
- 最佳实践:保持组件单一职责、合理使用CSS变量与Tailwind工具类、严格类型约束与代码规范。
|
||||
|
|
@ -0,0 +1,617 @@
|
|||
# UI组件库
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [package.json](file://frontend/package.json)
|
||||
- [tailwind.config.ts](file://frontend/tailwind.config.ts)
|
||||
- [lib/utils.ts](file://frontend/lib/utils.ts)
|
||||
- [components/ui/button.tsx](file://frontend/components/ui/button.tsx)
|
||||
- [components/ui/dialog.tsx](file://frontend/components/ui/dialog.tsx)
|
||||
- [components/ui/dropdown-menu.tsx](file://frontend/components/ui/dropdown-menu.tsx)
|
||||
- [components/ui/input.tsx](file://frontend/components/ui/input.tsx)
|
||||
- [components/ui/select.tsx](file://frontend/components/ui/select.tsx)
|
||||
- [components/ui/card.tsx](file://frontend/components/ui/card.tsx)
|
||||
- [components/ui/badge.tsx](file://frontend/components/ui/badge.tsx)
|
||||
- [components/ui/table.tsx](file://frontend/components/ui/table.tsx)
|
||||
- [components/ui/tabs.tsx](file://frontend/components/ui/tabs.tsx)
|
||||
- [components/ui/label.tsx](file://frontend/components/ui/label.tsx)
|
||||
- [components/providers.tsx](file://frontend/components/providers.tsx)
|
||||
- [app/layout.tsx](file://frontend/app/layout.tsx)
|
||||
- [app/(dashboard)/dashboard/page.tsx](file://frontend/app/(dashboard)/dashboard/page.tsx)
|
||||
- [app/(dashboard)/dashboard/citations/page.tsx](file://frontend/app/(dashboard)/dashboard/citations/page.tsx)
|
||||
</cite>
|
||||
|
||||
## 更新摘要
|
||||
**所做更改**
|
||||
- 新增了仪表板页面中UI组件的实际使用示例分析
|
||||
- 扩展了按钮、输入框、选择器、对话框、表格等组件的具体应用场景
|
||||
- 增加了组件在真实业务场景中的组合使用模式
|
||||
- 完善了组件可访问性与状态管理的最佳实践
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构概览](#架构概览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [实际应用示例](#实际应用示例)
|
||||
7. [依赖关系分析](#依赖关系分析)
|
||||
8. [性能考虑](#性能考虑)
|
||||
9. [故障排除指南](#故障排除指南)
|
||||
10. [结论](#结论)
|
||||
11. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本UI组件库以Radix UI为核心,结合Tailwind CSS实现一致、可访问且可定制的基础组件。组件遵循以下设计原则:
|
||||
- 可访问性优先:基于Radix UI的语义化和键盘交互能力
|
||||
- 一致性:统一的视觉语言与交互模式
|
||||
- 可定制性:通过变体系统与CSS变量实现主题与尺寸扩展
|
||||
- 响应式:在移动端与桌面端保持良好体验
|
||||
- 组合性:以组合模式构建复杂界面,避免过度封装
|
||||
|
||||
## 项目结构
|
||||
前端采用Next.js应用结构,UI组件集中于components/ui目录,通过cn工具函数统一类名合并,Tailwind配置提供主题变量与动画插件。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "前端应用"
|
||||
LAYOUT["app/layout.tsx<br/>根布局与字体"]
|
||||
PROVIDERS["components/providers.tsx<br/>会话提供者"]
|
||||
DASHBOARD["app/(dashboard)/dashboard/page.tsx<br/>仪表盘页面"]
|
||||
CITATIONS["app/(dashboard)/dashboard/citations/page.tsx<br/>引用记录页面"]
|
||||
end
|
||||
subgraph "UI组件库"
|
||||
BUTTON["components/ui/button.tsx"]
|
||||
INPUT["components/ui/input.tsx"]
|
||||
SELECT["components/ui/select.tsx"]
|
||||
DIALOG["components/ui/dialog.tsx"]
|
||||
DROPDOWN["components/ui/dropdown-menu.tsx"]
|
||||
CARD["components/ui/card.tsx"]
|
||||
TABLE["components/ui/table.tsx"]
|
||||
TABS["components/ui/tabs.tsx"]
|
||||
LABEL["components/ui/label.tsx"]
|
||||
BADGE["components/ui/badge.tsx"]
|
||||
end
|
||||
UTILS["lib/utils.ts<br/>类名合并工具"]
|
||||
LAYOUT --> PROVIDERS
|
||||
LAYOUT --> DASHBOARD
|
||||
LAYOUT --> CITATIONS
|
||||
DASHBOARD --> CARD
|
||||
DASHBOARD --> TABLE
|
||||
CITATIONS --> INPUT
|
||||
CITATIONS --> SELECT
|
||||
CITATIONS --> TABLE
|
||||
CITATIONS --> BUTTON
|
||||
CITATIONS --> LABEL
|
||||
CITATIONS --> BADGE
|
||||
BUTTON --> UTILS
|
||||
INPUT --> UTILS
|
||||
SELECT --> UTILS
|
||||
DIALOG --> UTILS
|
||||
DROPDOWN --> UTILS
|
||||
CARD --> UTILS
|
||||
TABLE --> UTILS
|
||||
TABS --> UTILS
|
||||
LABEL --> UTILS
|
||||
BADGE --> UTILS
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
- [app/(dashboard)/dashboard/page.tsx](file://frontend/app/(dashboard)/dashboard/page.tsx#L1-L227)
|
||||
- [app/(dashboard)/dashboard/citations/page.tsx](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L1-L294)
|
||||
- [lib/utils.ts:1-7](file://frontend/lib/utils.ts#L1-L7)
|
||||
|
||||
**章节来源**
|
||||
- [package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
|
||||
## 核心组件
|
||||
本节概述所有基础UI组件的功能、属性与使用场景,并说明其可访问性与一致性保障。
|
||||
|
||||
- 按钮 Button
|
||||
- 功能:承载点击动作,支持多种外观与尺寸
|
||||
- 关键属性:variant(外观)、size(尺寸)、asChild(语义化渲染)
|
||||
- 可访问性:继承原生button语义,支持聚焦与键盘激活
|
||||
- 使用示例路径:[按钮使用示例](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L199-L204)
|
||||
|
||||
- 输入框 Input
|
||||
- 功能:文本输入,支持禁用与聚焦态样式
|
||||
- 关键属性:type、className等原生属性透传
|
||||
- 可访问性:原生语义,配合Label使用提升可访问性
|
||||
- 使用示例路径:[输入框使用示例](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L182-L196)
|
||||
|
||||
- 选择器 Select
|
||||
- 功能:下拉选择,支持滚动按钮与多级选项
|
||||
- 关键属性:触发器、内容区、项、分隔符、滚动按钮
|
||||
- 可访问性:基于Radix UI的键盘导航与焦点管理
|
||||
- 使用示例路径:[选择器使用示例](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L150-L162)
|
||||
|
||||
- 对话框 Dialog
|
||||
- 功能:模态对话,包含覆盖层、内容区、标题与描述
|
||||
- 关键属性:Portal、Overlay、Content、Close等
|
||||
- 可访问性:自动聚焦到内容区,支持Esc关闭
|
||||
- 使用示例路径:[对话框使用示例:1-123](file://frontend/components/ui/dialog.tsx#L1-L123)
|
||||
|
||||
- 下拉菜单 Dropdown Menu
|
||||
- 功能:上下文菜单,支持子菜单、复选/单选项、快捷键提示
|
||||
- 关键属性:Root、Trigger、Content、Item、CheckboxItem、RadioItem、Label、Separator、Shortcut等
|
||||
- 可访问性:基于Radix UI的键盘导航与焦点管理
|
||||
- 使用示例路径:[下拉菜单使用示例:1-201](file://frontend/components/ui/dropdown-menu.tsx#L1-L201)
|
||||
|
||||
- 卡片 Card
|
||||
- 功能:容器组件,支持头部、标题、描述、内容与底部
|
||||
- 关键属性:通用HTML属性透传
|
||||
- 使用示例路径:[卡片使用示例](file://frontend/app/(dashboard)/dashboard/page.tsx#L177-L191)
|
||||
|
||||
- 表格 Table
|
||||
- 功能:数据表格,支持表头、表体、表尾、行、单元格与标题
|
||||
- 关键属性:通用HTML属性透传
|
||||
- 使用示例路径:[表格使用示例](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L229-L286)
|
||||
|
||||
- 标签 Tabs
|
||||
- 功能:标签页切换,包含列表、触发器与内容区
|
||||
- 关键属性:基于Radix UI的状态同步
|
||||
- 使用示例路径:[标签页使用示例:1-56](file://frontend/components/ui/tabs.tsx#L1-L56)
|
||||
|
||||
- 标签 Label
|
||||
- 功能:表单控件标签,与输入控件建立关联
|
||||
- 关键属性:基于Radix UI的peer-disabled语义
|
||||
- 使用示例路径:[标签使用示例](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L149-L150)
|
||||
|
||||
- 徽章 Badge
|
||||
- 功能:状态或分类标记
|
||||
- 关键属性:variant(外观)
|
||||
- 使用示例路径:[徽章使用示例](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L268-L278)
|
||||
|
||||
**章节来源**
|
||||
- [components/ui/button.tsx:1-57](file://frontend/components/ui/button.tsx#L1-L57)
|
||||
- [components/ui/input.tsx:1-23](file://frontend/components/ui/input.tsx#L1-L23)
|
||||
- [components/ui/select.tsx:1-161](file://frontend/components/ui/select.tsx#L1-L161)
|
||||
- [components/ui/dialog.tsx:1-123](file://frontend/components/ui/dialog.tsx#L1-L123)
|
||||
- [components/ui/dropdown-menu.tsx:1-201](file://frontend/components/ui/dropdown-menu.tsx#L1-L201)
|
||||
- [components/ui/card.tsx:1-80](file://frontend/components/ui/card.tsx#L1-L80)
|
||||
- [components/ui/table.tsx:1-118](file://frontend/components/ui/table.tsx#L1-L118)
|
||||
- [components/ui/tabs.tsx:1-56](file://frontend/components/ui/tabs.tsx#L1-L56)
|
||||
- [components/ui/label.tsx:1-27](file://frontend/components/ui/label.tsx#L1-L27)
|
||||
- [components/ui/badge.tsx:1-37](file://frontend/components/ui/badge.tsx#L1-L37)
|
||||
|
||||
## 架构概览
|
||||
组件库围绕以下架构要素工作:
|
||||
- Radix UI:提供可访问性与状态管理的底层抽象
|
||||
- Tailwind CSS:提供原子化样式与主题变量
|
||||
- class-variance-authority:提供变体系统,统一外观与尺寸
|
||||
- clsx/tailwind-merge:安全合并类名,避免冲突
|
||||
- Next.js App Router:页面级布局与路由组织
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
RADIX["@radix-ui/react-*<br/>可访问性与状态"]
|
||||
CVA["class-variance-authority<br/>变体系统"]
|
||||
CLX["clsx + tailwind-merge<br/>类名合并"]
|
||||
TW["Tailwind CSS<br/>主题与动画"]
|
||||
UTILS["lib/utils.ts<br/>cn工具函数"]
|
||||
COMPONENTS["components/ui/*<br/>具体组件实现"]
|
||||
COMPONENTS --> RADIX
|
||||
COMPONENTS --> CVA
|
||||
COMPONENTS --> CLX
|
||||
CLX --> TW
|
||||
UTILS --> CLX
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [package.json:11-27](file://frontend/package.json#L11-L27)
|
||||
- [lib/utils.ts:1-7](file://frontend/lib/utils.ts#L1-L7)
|
||||
- [tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 按钮 Button
|
||||
- 设计要点
|
||||
- 使用Slot实现asChild,允许将按钮渲染为链接或其他元素
|
||||
- 通过变体系统控制外观与尺寸,保持一致的交互反馈
|
||||
- 聚焦态与禁用态通过CSS类明确表达
|
||||
- 可访问性
|
||||
- 默认button语义,支持键盘激活
|
||||
- 聚焦可见性与环形边框确保键盘用户可见
|
||||
- 复杂度与性能
|
||||
- O(1) 渲染开销,变体计算在编译期完成
|
||||
- 使用示例
|
||||
- [按钮使用示例](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L199-L204)
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class Button {
|
||||
+variant : "default|destructive|outline|secondary|ghost|link"
|
||||
+size : "default|sm|lg|icon"
|
||||
+asChild : boolean
|
||||
+forwardRef<HTMLButtonElement>
|
||||
}
|
||||
class Slot {
|
||||
+asChild : boolean
|
||||
}
|
||||
Button --> Slot : "可选语义化渲染"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [components/ui/button.tsx:36-54](file://frontend/components/ui/button.tsx#L36-L54)
|
||||
|
||||
**章节来源**
|
||||
- [components/ui/button.tsx:1-57](file://frontend/components/ui/button.tsx#L1-L57)
|
||||
|
||||
### 对话框 Dialog
|
||||
- 设计要点
|
||||
- Portal确保覆盖层与内容在DOM层级上正确分离
|
||||
- 基于data-state的动画类实现进入/退出过渡
|
||||
- Close按钮包含不可见文本,提升屏幕阅读器可用性
|
||||
- 可访问性
|
||||
- 自动聚焦到内容区,Esc键关闭
|
||||
- Overlay点击可关闭,支持键盘导航
|
||||
- 使用示例
|
||||
- [对话框使用示例:1-123](file://frontend/components/ui/dialog.tsx#L1-L123)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant U as "用户"
|
||||
participant T as "触发器"
|
||||
participant P as "Portal"
|
||||
participant O as "Overlay"
|
||||
participant C as "Content"
|
||||
U->>T : 点击/按键激活
|
||||
T->>P : 打开对话框
|
||||
P->>O : 渲染覆盖层
|
||||
P->>C : 渲染内容区
|
||||
U->>O : 点击背景
|
||||
O->>P : 关闭对话框
|
||||
U->>C : Esc键
|
||||
C->>P : 关闭对话框
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [components/ui/dialog.tsx:9-54](file://frontend/components/ui/dialog.tsx#L9-L54)
|
||||
|
||||
**章节来源**
|
||||
- [components/ui/dialog.tsx:1-123](file://frontend/components/ui/dialog.tsx#L1-L123)
|
||||
|
||||
### 下拉菜单 Dropdown Menu
|
||||
- 设计要点
|
||||
- 支持子菜单、复选/单选项、快捷键提示与分隔符
|
||||
- 基于Portal与data-state的动画类实现流畅过渡
|
||||
- 可访问性
|
||||
- 键盘导航:上下左右移动、Enter确认、Esc返回
|
||||
- 焦点管理:打开时聚焦首个项,关闭时返回触发器
|
||||
- 使用示例
|
||||
- [下拉菜单使用示例:1-201](file://frontend/components/ui/dropdown-menu.tsx#L1-L201)
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["打开菜单"]) --> FocusFirst["聚焦首个可选项"]
|
||||
FocusFirst --> Nav{"键盘方向键"}
|
||||
Nav --> |上/下| MoveFocus["移动焦点"]
|
||||
Nav --> |左| SubOpen["打开子菜单"]
|
||||
Nav --> |右| Select["选择项"]
|
||||
Nav --> |Enter| Confirm["确认选择"]
|
||||
Nav --> |Esc| Close["关闭菜单"]
|
||||
MoveFocus --> Nav
|
||||
SubOpen --> Nav
|
||||
Select --> Close
|
||||
Confirm --> Close
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [components/ui/dropdown-menu.tsx:21-75](file://frontend/components/ui/dropdown-menu.tsx#L21-L75)
|
||||
|
||||
**章节来源**
|
||||
- [components/ui/dropdown-menu.tsx:1-201](file://frontend/components/ui/dropdown-menu.tsx#L1-L201)
|
||||
|
||||
### 选择器 Select
|
||||
- 设计要点
|
||||
- 触发器包含图标与占位符,内容区支持滚动按钮
|
||||
- Viewport根据触发器尺寸自适应
|
||||
- 可访问性
|
||||
- 键盘导航:Tab进入、方向键选择、Enter确认
|
||||
- 屏幕阅读器:通过SelectValue与ItemText传达当前值与选项
|
||||
- 使用示例
|
||||
- [选择器使用示例](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L150-L162)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant U as "用户"
|
||||
participant T as "触发器"
|
||||
participant P as "Portal"
|
||||
participant V as "Viewport"
|
||||
participant I as "选项项"
|
||||
U->>T : 点击/按键激活
|
||||
T->>P : 打开内容区
|
||||
P->>V : 渲染选项列表
|
||||
U->>I : 键盘/鼠标选择
|
||||
I->>T : 更新值并关闭
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [components/ui/select.tsx:15-100](file://frontend/components/ui/select.tsx#L15-L100)
|
||||
|
||||
**章节来源**
|
||||
- [components/ui/select.tsx:1-161](file://frontend/components/ui/select.tsx#L1-L161)
|
||||
|
||||
### 表格 Table
|
||||
- 设计要点
|
||||
- 外层容器提供横向滚动,适配窄屏设备
|
||||
- 行与单元格支持hover与选中态
|
||||
- 可访问性
|
||||
- 表格语义清晰,适合屏幕阅读器解析
|
||||
- 使用示例
|
||||
- [表格使用示例](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L229-L286)
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Container["表格容器<br/>overflow-auto"] --> Table["table 元素"]
|
||||
Table --> Header["thead<br/>边框样式"]
|
||||
Table --> Body["tbody<br/>悬停与选中态"]
|
||||
Body --> Row["tr<br/>边框与状态"]
|
||||
Row --> Cell["td/th<br/>对齐与内边距"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [components/ui/table.tsx:5-106](file://frontend/components/ui/table.tsx#L5-L106)
|
||||
|
||||
**章节来源**
|
||||
- [components/ui/table.tsx:1-118](file://frontend/components/ui/table.tsx#L1-L118)
|
||||
|
||||
### 标签 Tabs
|
||||
- 设计要点
|
||||
- 列表容器与触发器基于Radix UI状态同步
|
||||
- 激活态提供背景与阴影强调
|
||||
- 可访问性
|
||||
- 键盘导航:左右移动、Tab切换内容
|
||||
- 使用示例
|
||||
- [标签页使用示例:1-56](file://frontend/components/ui/tabs.tsx#L1-L56)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant U as "用户"
|
||||
participant L as "TabsList"
|
||||
participant T as "TabsTrigger"
|
||||
participant C as "TabsContent"
|
||||
U->>T : 点击/按键激活
|
||||
T->>L : 同步状态
|
||||
L->>C : 显示对应内容
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [components/ui/tabs.tsx:8-53](file://frontend/components/ui/tabs.tsx#L8-L53)
|
||||
|
||||
**章节来源**
|
||||
- [components/ui/tabs.tsx:1-56](file://frontend/components/ui/tabs.tsx#L1-L56)
|
||||
|
||||
### 卡ード Card
|
||||
- 设计要点
|
||||
- 分离头部、标题、描述、内容与底部区域,便于组合
|
||||
- 使用示例
|
||||
- [卡片使用示例](file://frontend/app/(dashboard)/dashboard/page.tsx#L177-L191)
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class Card {
|
||||
+HTMLDivElement
|
||||
}
|
||||
class CardHeader
|
||||
class CardTitle
|
||||
class CardDescription
|
||||
class CardContent
|
||||
class CardFooter
|
||||
Card --> CardHeader
|
||||
Card --> CardTitle
|
||||
Card --> CardDescription
|
||||
Card --> CardContent
|
||||
Card --> CardFooter
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [components/ui/card.tsx:5-77](file://frontend/components/ui/card.tsx#L5-L77)
|
||||
|
||||
**章节来源**
|
||||
- [components/ui/card.tsx:1-80](file://frontend/components/ui/card.tsx#L1-L80)
|
||||
|
||||
### 标签 Label
|
||||
- 设计要点
|
||||
- 基于peer-disabled语义,与受控输入联动
|
||||
- 使用示例
|
||||
- [标签使用示例](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L149-L150)
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Input["受控输入"] --> Peer["peer 伪类"]
|
||||
Peer --> Disabled{"禁用状态"}
|
||||
Disabled --> |是| LabelDisabled["标签禁用样式"]
|
||||
Disabled --> |否| LabelEnabled["标签启用样式"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [components/ui/label.tsx:9-23](file://frontend/components/ui/label.tsx#L9-L23)
|
||||
|
||||
**章节来源**
|
||||
- [components/ui/label.tsx:1-27](file://frontend/components/ui/label.tsx#L1-L27)
|
||||
|
||||
### 徽章 Badge
|
||||
- 设计要点
|
||||
- 通过变体系统提供默认/次要/破坏/描边等外观
|
||||
- 使用示例
|
||||
- [徽章使用示例](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L268-L278)
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class Badge {
|
||||
+variant : "default|secondary|destructive|outline"
|
||||
}
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [components/ui/badge.tsx:6-34](file://frontend/components/ui/badge.tsx#L6-L34)
|
||||
|
||||
**章节来源**
|
||||
- [components/ui/badge.tsx:1-37](file://frontend/components/ui/badge.tsx#L1-L37)
|
||||
|
||||
## 实际应用示例
|
||||
|
||||
### 仪表板页面组件应用
|
||||
仪表板页面展示了组件在真实业务场景中的综合应用:
|
||||
|
||||
#### 数据统计卡片组合
|
||||
- **组件组合**:Card + CardHeader + CardTitle + CardContent
|
||||
- **应用场景**:展示查询次数、引用次数、引用率、平均位置等关键指标
|
||||
- **实现特点**:使用动态图标与颜色方案增强视觉表达
|
||||
|
||||
#### 图表集成应用
|
||||
- **组件组合**:Card + Chart组件
|
||||
- **应用场景**:展示引用趋势和平台对比数据
|
||||
- **实现特点**:通过条件渲染处理空数据状态
|
||||
|
||||
#### 完整的数据展示流程
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Loading["加载状态"] --> Empty{"数据为空?"}
|
||||
Empty --> |是| EmptyState["空状态展示"]
|
||||
Empty --> |否| DataDisplay["数据展示"]
|
||||
DataDisplay --> StatCards["统计卡片"]
|
||||
DataDisplay --> Charts["图表展示"]
|
||||
EmptyState --> CreateQuery["创建查询引导"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [app/(dashboard)/dashboard/page.tsx:49-137](file://frontend/app/(dashboard)/dashboard/page.tsx#L49-L137)
|
||||
|
||||
#### 引用记录页面组件应用
|
||||
引用记录页面体现了组件在复杂数据管理场景中的应用:
|
||||
|
||||
##### 筛选表单组合
|
||||
- **组件组合**:Card + Label + Select + Input + Button
|
||||
- **应用场景**:查询词筛选、平台筛选、日期范围筛选
|
||||
- **实现特点**:响应式网格布局,支持表单重置
|
||||
|
||||
##### 数据表格应用
|
||||
- **组件组合**:Table + TableRow + TableCell + Badge
|
||||
- **应用场景**:展示引用检测结果的完整列表
|
||||
- **实现特点**:支持横向滚动,徽章用于状态标识
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant User as "用户"
|
||||
participant Form as "筛选表单"
|
||||
participant API as "API服务"
|
||||
participant Table as "数据表格"
|
||||
User->>Form : 设置筛选条件
|
||||
Form->>API : 发送筛选请求
|
||||
API-->>Form : 返回筛选结果
|
||||
Form->>Table : 更新表格数据
|
||||
Table->>User : 显示筛选后的记录
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [app/(dashboard)/dashboard/citations/page.tsx:147-207](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L147-L207)
|
||||
|
||||
**章节来源**
|
||||
- [app/(dashboard)/dashboard/page.tsx:1-227](file://frontend/app/(dashboard)/dashboard/page.tsx#L1-L227)
|
||||
- [app/(dashboard)/dashboard/citations/page.tsx:1-294](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L1-L294)
|
||||
|
||||
## 依赖关系分析
|
||||
- 组件依赖Radix UI实现可访问性与状态管理
|
||||
- 类名合并依赖clsx与tailwind-merge,确保样式不冲突
|
||||
- 主题变量与圆角半径由Tailwind配置提供
|
||||
- 页面通过引入UI组件实现功能组合
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
P["package.json<br/>依赖声明"] --> R["@radix-ui/react-*"]
|
||||
P --> CVA["class-variance-authority"]
|
||||
P --> CLSX["clsx"]
|
||||
P --> TWM["tailwind-merge"]
|
||||
P --> TW["tailwindcss"]
|
||||
BTN["button.tsx"] --> R
|
||||
BTN --> CVA
|
||||
BTN --> UTILS["lib/utils.ts"]
|
||||
SEL["select.tsx"] --> R
|
||||
SEL --> UTILS
|
||||
DDL["dropdown-menu.tsx"] --> R
|
||||
DDL --> UTILS
|
||||
DLG["dialog.tsx"] --> R
|
||||
DLG --> UTILS
|
||||
TBL["table.tsx"] --> UTILS
|
||||
CARD["card.tsx"] --> UTILS
|
||||
TABS["tabs.tsx"] --> R
|
||||
TABS --> UTILS
|
||||
LABEL["label.tsx"] --> R
|
||||
LABEL --> CVA
|
||||
LABEL --> UTILS
|
||||
BADGE["badge.tsx"] --> CVA
|
||||
BADGE --> UTILS
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [package.json:11-27](file://frontend/package.json#L11-L27)
|
||||
- [lib/utils.ts:1-7](file://frontend/lib/utils.ts#L1-L7)
|
||||
|
||||
**章节来源**
|
||||
- [package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [lib/utils.ts:1-7](file://frontend/lib/utils.ts#L1-L7)
|
||||
|
||||
## 性能考虑
|
||||
- 组件均采用forwardRef与透传属性,减少额外包装开销
|
||||
- 变体系统在编译期确定,运行时仅进行类名拼接
|
||||
- Portal仅在需要时渲染,避免不必要的DOM节点
|
||||
- 表格容器提供横向滚动,避免布局抖动
|
||||
- 建议在大量数据场景下使用虚拟化或分页
|
||||
|
||||
## 故障排除指南
|
||||
- 焦点问题
|
||||
- 确保对话框与下拉菜单在打开时自动聚焦到可交互元素
|
||||
- 检查Close按钮的不可见文本是否正确设置
|
||||
- 动画异常
|
||||
- 确认data-state类与动画类匹配
|
||||
- 检查Tailwind动画插件是否正确启用
|
||||
- 类名冲突
|
||||
- 使用cn工具函数合并类名,避免重复覆盖
|
||||
- 主题不生效
|
||||
- 检查Tailwind配置中的颜色与圆角变量
|
||||
- 确认content路径包含组件目录
|
||||
|
||||
**章节来源**
|
||||
- [components/ui/dialog.tsx:47-50](file://frontend/components/ui/dialog.tsx#L47-L50)
|
||||
- [tailwind.config.ts:10-54](file://frontend/tailwind.config.ts#L10-L54)
|
||||
- [lib/utils.ts:4-6](file://frontend/lib/utils.ts#L4-L6)
|
||||
|
||||
## 结论
|
||||
本UI组件库以Radix UI为基础,结合Tailwind CSS与变体系统,提供了高可访问性、一致性强且易于扩展的组件集合。通过清晰的组合模式与严格的样式约定,能够支撑从简单表单到复杂数据面板的各类界面需求。新增的仪表板页面使用示例进一步验证了组件在真实业务场景中的实用性与灵活性。
|
||||
|
||||
## 附录
|
||||
|
||||
### 使用规范与最佳实践
|
||||
- 组合模式
|
||||
- 使用Card组合多个小部件,如统计卡片与图表
|
||||
- 使用Table与Badge组合展示数据与状态
|
||||
- 状态管理
|
||||
- 将外部状态(如查询参数)与组件状态解耦
|
||||
- 在页面级组件中集中处理加载、错误与空状态
|
||||
- 无障碍支持
|
||||
- 为所有交互元素提供键盘可达性
|
||||
- 为图标与装饰性元素提供替代文本
|
||||
- 主题与响应式
|
||||
- 通过CSS变量与Tailwind变体实现主题切换
|
||||
- 使用Grid与Flex在不同断点下调整布局
|
||||
|
||||
### 自定义扩展指南
|
||||
- 新增组件
|
||||
- 基于现有组件结构,使用forwardRef与透传属性
|
||||
- 引入变体系统以支持外观与尺寸扩展
|
||||
- 主题定制
|
||||
- 在Tailwind配置中扩展colors与borderRadius
|
||||
- 通过CSS变量统一主题色板
|
||||
- 动画与过渡
|
||||
- 使用data-state类与Tailwind动画插件
|
||||
- 保持过渡时长与缓动曲线一致
|
||||
|
||||
**章节来源**
|
||||
- [app/(dashboard)/dashboard/page.tsx:1-227](file://frontend/app/(dashboard)/dashboard/page.tsx#L1-L227)
|
||||
- [app/(dashboard)/dashboard/citations/page.tsx:1-294](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L1-L294)
|
||||
- [tailwind.config.ts:10-54](file://frontend/tailwind.config.ts#L10-L54)
|
||||
|
|
@ -0,0 +1,349 @@
|
|||
# 前端系统架构
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [package.json](file://frontend/package.json)
|
||||
- [next.config.mjs](file://frontend/next.config.mjs)
|
||||
- [tailwind.config.ts](file://frontend/tailwind.config.ts)
|
||||
- [app/layout.tsx](file://frontend/app/layout.tsx)
|
||||
- [components/providers.tsx](file://frontend/components/providers.tsx)
|
||||
- [app/(auth)/layout.tsx](file://frontend/app/(auth)/layout.tsx)
|
||||
- [app/(dashboard)/layout.tsx](file://frontend/app/(dashboard)/layout.tsx)
|
||||
- [lib/auth.ts](file://frontend/lib/auth.ts)
|
||||
- [types/next-auth.d.ts](file://frontend/types/next-auth.d.ts)
|
||||
- [lib/api.ts](file://frontend/lib/api.ts)
|
||||
- [app/(auth)/login/page.tsx](file://frontend/app/(auth)/login/page.tsx)
|
||||
- [app/(dashboard)/dashboard/page.tsx](file://frontend/app/(dashboard)/dashboard/page.tsx)
|
||||
- [components/ui/button.tsx](file://frontend/components/ui/button.tsx)
|
||||
- [components/layout/header.tsx](file://frontend/components/layout/header.tsx)
|
||||
- [app/api/auth/[...nextauth]/route.ts](file://frontend/app/api/auth/[...nextauth]/route.ts)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [引言](#引言)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排除指南](#故障排除指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 引言
|
||||
本文件为 GEO 前端系统的架构文档,聚焦于基于 Next.js 14 的应用架构设计,涵盖 App Router 页面组织、服务器组件与客户端组件的混合使用模式;认证系统(NextAuth.js 集成、会话管理与路由保护);UI 组件库设计理念与复用策略;数据获取与状态管理;错误处理机制;以及响应式设计、可访问性与性能优化等最佳实践。
|
||||
|
||||
## 项目结构
|
||||
前端采用 Next.js 14 App Router 结构,页面按功能域分组(通过路由组 `(auth)` 和 `(dashboard)` 实现),根布局统一注入全局样式与 Provider,认证相关 API 路由集中于 `/api/auth/[...nextauth]`。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
A["app/layout.tsx<br/>根布局与全局样式"] --> B["components/providers.tsx<br/>SessionProvider"]
|
||||
A --> C["app/(auth)/layout.tsx<br/>认证页容器"]
|
||||
A --> D["app/(dashboard)/layout.tsx<br/>仪表盘容器"]
|
||||
D --> E["components/layout/header.tsx<br/>头部与登出"]
|
||||
C --> F["app/(auth)/login/page.tsx<br/>登录页"]
|
||||
D --> G["app/(dashboard)/dashboard/page.tsx<br/>仪表盘页"]
|
||||
B --> H["app/api/auth/[...nextauth]/route.ts<br/>NextAuth 路由处理器"]
|
||||
A --> I["tailwind.config.ts<br/>Tailwind 配置"]
|
||||
A --> J["next.config.mjs<br/>Next 配置"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
- [app/(auth)/layout.tsx](file://frontend/app/(auth)/layout.tsx#L1-L12)
|
||||
- [app/(dashboard)/layout.tsx](file://frontend/app/(dashboard)/layout.tsx#L1-L27)
|
||||
- [components/layout/header.tsx:1-30](file://frontend/components/layout/header.tsx#L1-L30)
|
||||
- [app/(auth)/login/page.tsx](file://frontend/app/(auth)/login/page.tsx#L1-L93)
|
||||
- [app/(dashboard)/dashboard/page.tsx](file://frontend/app/(dashboard)/dashboard/page.tsx#L1-L156)
|
||||
- [app/api/auth/[...nextauth]/route.ts](file://frontend/app/api/auth/[...nextauth]/route.ts#L1-L7)
|
||||
- [tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
- [next.config.mjs:1-5](file://frontend/next.config.mjs#L1-L5)
|
||||
|
||||
**章节来源**
|
||||
- [app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
- [app/(auth)/layout.tsx](file://frontend/app/(auth)/layout.tsx#L1-L12)
|
||||
- [app/(dashboard)/layout.tsx](file://frontend/app/(dashboard)/layout.tsx#L1-L27)
|
||||
- [tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
- [next.config.mjs:1-5](file://frontend/next.config.mjs#L1-L5)
|
||||
|
||||
## 核心组件
|
||||
- 根布局与全局样式:定义站点元数据、字体变量与全局样式入口,并包裹应用上下文 Provider。
|
||||
- 会话提供者:在客户端注入 SessionProvider,使整个应用可访问 NextAuth 会话状态。
|
||||
- 认证路由组:提供登录/注册等认证页面的统一容器样式。
|
||||
- 仪表盘路由组:提供侧边栏与头部导航,同时在服务器端校验会话,未登录则重定向至登录页。
|
||||
- 认证配置:NextAuth 选项,使用凭据提供者对接后端认证接口,JWT 会话策略,回调处理 token 与 session 映射。
|
||||
- 类型扩展:为 NextAuth 的 Session 与 JWT 扩展自定义字段,确保类型安全。
|
||||
- API 客户端:封装带鉴权头的通用请求方法,统一错误处理与响应解析。
|
||||
- UI 组件库:基于 Radix UI 与 Tailwind,使用 class-variance-authority 提供变体与尺寸控制。
|
||||
- 头部组件:展示当前用户信息与登出按钮,触发 NextAuth 的 signOut 流程。
|
||||
|
||||
**章节来源**
|
||||
- [app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
- [app/(auth)/layout.tsx](file://frontend/app/(auth)/layout.tsx#L1-L12)
|
||||
- [app/(dashboard)/layout.tsx](file://frontend/app/(dashboard)/layout.tsx#L1-L27)
|
||||
- [lib/auth.ts:1-56](file://frontend/lib/auth.ts#L1-L56)
|
||||
- [types/next-auth.d.ts:1-26](file://frontend/types/next-auth.d.ts#L1-L26)
|
||||
- [lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [components/ui/button.tsx:1-57](file://frontend/components/ui/button.tsx#L1-L57)
|
||||
- [components/layout/header.tsx:1-30](file://frontend/components/layout/header.tsx#L1-L30)
|
||||
|
||||
## 架构总览
|
||||
下图展示了从浏览器到认证服务与业务 API 的整体调用链路,以及客户端组件与服务器组件的职责边界。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "浏览器"
|
||||
U["用户界面<br/>客户端组件"]
|
||||
end
|
||||
subgraph "Next.js 应用"
|
||||
RL["根布局<br/>app/layout.tsx"]
|
||||
PR["会话提供者<br/>components/providers.tsx"]
|
||||
AL["认证布局<br/>app/(auth)/layout.tsx"]
|
||||
DL["仪表盘布局<br/>app/(dashboard)/layout.tsx"]
|
||||
LG["登录页<br/>app/(auth)/login/page.tsx"]
|
||||
DB["仪表盘页<br/>app/(dashboard)/dashboard/page.tsx"]
|
||||
AH["NextAuth 路由<br/>app/api/auth/[...nextauth]/route.ts"]
|
||||
UI["UI 组件库<br/>components/ui/*"]
|
||||
LH["头部组件<br/>components/layout/header.tsx"]
|
||||
end
|
||||
subgraph "认证服务"
|
||||
NA["NextAuth 服务<br/>lib/auth.ts"]
|
||||
end
|
||||
subgraph "后端 API"
|
||||
API["业务 API 客户端<br/>lib/api.ts"]
|
||||
BE["后端服务<br/>backend/app/*"]
|
||||
end
|
||||
U --> RL
|
||||
RL --> PR
|
||||
PR --> AL
|
||||
PR --> DL
|
||||
AL --> LG
|
||||
DL --> LH
|
||||
DL --> DB
|
||||
LG --> AH
|
||||
AH --> NA
|
||||
DB --> API
|
||||
NA --> BE
|
||||
API --> BE
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
- [app/(auth)/layout.tsx](file://frontend/app/(auth)/layout.tsx#L1-L12)
|
||||
- [app/(dashboard)/layout.tsx](file://frontend/app/(dashboard)/layout.tsx#L1-L27)
|
||||
- [app/(auth)/login/page.tsx](file://frontend/app/(auth)/login/page.tsx#L1-L93)
|
||||
- [app/(dashboard)/dashboard/page.tsx](file://frontend/app/(dashboard)/dashboard/page.tsx#L1-L156)
|
||||
- [components/layout/header.tsx:1-30](file://frontend/components/layout/header.tsx#L1-L30)
|
||||
- [app/api/auth/[...nextauth]/route.ts](file://frontend/app/api/auth/[...nextauth]/route.ts#L1-L7)
|
||||
- [lib/auth.ts:1-56](file://frontend/lib/auth.ts#L1-L56)
|
||||
- [lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 认证系统(NextAuth.js 集成)
|
||||
- 凭据提供者:使用邮箱/密码进行认证,调用后端登录接口,成功后返回包含用户信息与访问令牌的对象。
|
||||
- JWT 会话策略:在回调中将访问令牌与用户 ID 写入 JWT,并在 session 回调中回填到 session 对象。
|
||||
- 登录流程:客户端登录页通过 next-auth/react 的 signIn 触发凭据认证,成功后跳转至仪表盘。
|
||||
- 路由保护:仪表盘布局在服务器端通过 getServerSession 获取会话,若无会话则重定向至登录页。
|
||||
- NextAuth 路由:统一暴露 GET/POST,交由 NextAuth 处理认证生命周期。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as "客户端浏览器"
|
||||
participant LP as "登录页<br/>app/(auth)/login/page.tsx"
|
||||
participant RP as "NextAuth 路由<br/>app/api/auth/[...nextauth]/route.ts"
|
||||
participant NA as "NextAuth 配置<br/>lib/auth.ts"
|
||||
participant BE as "后端服务<br/>backend/app/api/auth.py"
|
||||
C->>LP : "提交邮箱/密码"
|
||||
LP->>RP : "signIn('credentials', {email,password})"
|
||||
RP->>NA : "调用 NextAuth 处理"
|
||||
NA->>BE : "调用后端登录接口"
|
||||
BE-->>NA : "返回访问令牌与用户信息"
|
||||
NA-->>RP : "生成 JWT 与 session"
|
||||
RP-->>LP : "认证结果"
|
||||
LP-->>C : "跳转到 /dashboard 或显示错误"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [app/(auth)/login/page.tsx](file://frontend/app/(auth)/login/page.tsx#L1-L93)
|
||||
- [app/api/auth/[...nextauth]/route.ts](file://frontend/app/api/auth/[...nextauth]/route.ts#L1-L7)
|
||||
- [lib/auth.ts:1-56](file://frontend/lib/auth.ts#L1-L56)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
|
||||
**章节来源**
|
||||
- [lib/auth.ts:1-56](file://frontend/lib/auth.ts#L1-L56)
|
||||
- [types/next-auth.d.ts:1-26](file://frontend/types/next-auth.d.ts#L1-L26)
|
||||
- [app/api/auth/[...nextauth]/route.ts](file://frontend/app/api/auth/[...nextauth]/route.ts#L1-L7)
|
||||
- [app/(auth)/login/page.tsx](file://frontend/app/(auth)/login/page.tsx#L1-L93)
|
||||
- [app/(dashboard)/layout.tsx](file://frontend/app/(dashboard)/layout.tsx#L1-L27)
|
||||
|
||||
### 数据获取与状态管理
|
||||
- 会话状态:仪表盘页通过 next-auth/react 的 useSession 获取当前会话,包含访问令牌。
|
||||
- API 客户端:封装 fetchWithAuth,自动添加 Authorization 头,统一处理非 2xx 错误并抛出异常。
|
||||
- 仪表盘数据:在依赖会话令牌时加载统计数据,包含加载态与错误态处理,失败时提供刷新操作。
|
||||
- 查询与引用:API 客户端提供查询列表、创建、更新、删除与引用统计、导出等方法,便于页面按需调用。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["进入仪表盘"]) --> CheckToken["检查会话令牌"]
|
||||
CheckToken --> HasToken{"存在令牌?"}
|
||||
HasToken --> |否| EndNoop["不执行数据加载"]
|
||||
HasToken --> |是| Fetch["调用 API 客户端获取统计数据"]
|
||||
Fetch --> Ok{"请求成功?"}
|
||||
Ok --> |是| SetData["设置统计数据并渲染图表"]
|
||||
Ok --> |否| ShowErr["显示错误并提供刷新按钮"]
|
||||
SetData --> EndDone(["完成"])
|
||||
ShowErr --> EndDone
|
||||
EndNoop --> EndDone
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [app/(dashboard)/dashboard/page.tsx](file://frontend/app/(dashboard)/dashboard/page.tsx#L1-L156)
|
||||
- [lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
**章节来源**
|
||||
- [app/(dashboard)/dashboard/page.tsx](file://frontend/app/(dashboard)/dashboard/page.tsx#L1-L156)
|
||||
- [lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
### UI 组件库与样式系统
|
||||
- 设计理念:以 Radix UI 为基础构建语义化与可访问性友好的基础控件;使用 Tailwind 实现原子化样式与主题变量。
|
||||
- 变体与尺寸:通过 class-variance-authority 为组件提供多种变体(如 default、destructive、outline 等)与尺寸(default、sm、lg、icon)。
|
||||
- 主题与暗色模式:Tailwind 配置启用基于 class 的深色模式,颜色与圆角通过 CSS 变量统一管理。
|
||||
- 复用策略:将通用 UI 抽象为可复用组件(如 Button、Input、Card、Dialog 等),在页面中按需组合使用。
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class Button {
|
||||
+variant : "default|destructive|outline|secondary|ghost|link"
|
||||
+size : "default|sm|lg|icon"
|
||||
+asChild : boolean
|
||||
+ref : HTMLButtonElement
|
||||
}
|
||||
class UIComponents {
|
||||
+Card
|
||||
+Input
|
||||
+Label
|
||||
+Select
|
||||
+Dialog
|
||||
+DropdownMenu
|
||||
+Tabs
|
||||
+Table
|
||||
}
|
||||
Button --> UIComponents : "作为基础控件被复用"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [components/ui/button.tsx:1-57](file://frontend/components/ui/button.tsx#L1-L57)
|
||||
- [tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
|
||||
**章节来源**
|
||||
- [components/ui/button.tsx:1-57](file://frontend/components/ui/button.tsx#L1-L57)
|
||||
- [tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
|
||||
### 服务器组件与客户端组件的混合使用
|
||||
- 服务器组件:仪表盘布局在服务器端通过 getServerSession 校验会话并进行重定向,避免客户端渲染无意义内容。
|
||||
- 客户端组件:登录页、仪表盘页、头部组件均标记为客户端组件,以便使用 hooks(如 useSession、useRouter)与交互逻辑。
|
||||
- Provider 注入:根布局注入 Providers,使子树中的客户端组件可共享会话状态。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant S as "服务器组件<br/>app/(dashboard)/layout.tsx"
|
||||
participant C as "客户端组件<br/>app/(dashboard)/dashboard/page.tsx"
|
||||
participant P as "Providers<br/>components/providers.tsx"
|
||||
S->>S : "getServerSession() 校验会话"
|
||||
S-->>S : "无会话 -> 重定向到 /login"
|
||||
S-->>P : "渲染根 Provider"
|
||||
P-->>C : "向客户端组件提供会话状态"
|
||||
C->>C : "useSession/useEffect 加载数据"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [app/(dashboard)/layout.tsx](file://frontend/app/(dashboard)/layout.tsx#L1-L27)
|
||||
- [app/(dashboard)/dashboard/page.tsx](file://frontend/app/(dashboard)/dashboard/page.tsx#L1-L156)
|
||||
- [components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
|
||||
**章节来源**
|
||||
- [app/(dashboard)/layout.tsx](file://frontend/app/(dashboard)/layout.tsx#L1-L27)
|
||||
- [app/(dashboard)/dashboard/page.tsx](file://frontend/app/(dashboard)/dashboard/page.tsx#L1-L156)
|
||||
- [components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
|
||||
### 错误处理机制
|
||||
- API 层:fetchWithAuth 在非 2xx 时解析错误消息并抛出异常,保证上层统一处理。
|
||||
- 页面层:仪表盘页捕获错误并提供“重新加载”操作;登录页在认证失败时显示错误提示。
|
||||
- 路由保护:服务器端无会话时直接重定向,避免进入受保护页面。
|
||||
|
||||
**章节来源**
|
||||
- [lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [app/(dashboard)/dashboard/page.tsx](file://frontend/app/(dashboard)/dashboard/page.tsx#L1-L156)
|
||||
- [app/(auth)/login/page.tsx](file://frontend/app/(auth)/login/page.tsx#L1-L93)
|
||||
- [app/(dashboard)/layout.tsx](file://frontend/app/(dashboard)/layout.tsx#L1-L27)
|
||||
|
||||
## 依赖分析
|
||||
- 核心框架:Next.js 14(App Router)、React 18。
|
||||
- 认证:NextAuth.js(凭据提供者、JWT 会话、回调映射)。
|
||||
- UI:Radix UI(对话框、下拉菜单、标签页、选择器等)、Lucide React 图标。
|
||||
- 样式:Tailwind CSS、Tailwind 插件(动画)、class-variance-authority、clsx、tailwind-merge。
|
||||
- 图表:Recharts 用于可视化展示。
|
||||
- 开发工具:ESLint、TypeScript、PostCSS、Tailwind。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
N["Next.js 14"] --> R["React 18"]
|
||||
N --> NA["NextAuth.js"]
|
||||
NA --> AC["凭据提供者"]
|
||||
UI["Radix UI + Lucide React"] --> TW["Tailwind CSS"]
|
||||
TW --> CN["class-variance-authority"]
|
||||
CN --> CL["clsx"]
|
||||
CL --> TM["tailwind-merge"]
|
||||
VIZ["Recharts"] --> UI
|
||||
TS["TypeScript"] --> N
|
||||
ESL["ESLint"] --> N
|
||||
PC["PostCSS"] --> TW
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
|
||||
**章节来源**
|
||||
- [package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
|
||||
## 性能考虑
|
||||
- App Router 与服务器组件:利用服务器端渲染与路由组隔离,减少不必要的客户端渲染与网络请求。
|
||||
- 客户端水合:仅在必要页面标记为客户端组件,避免过度水合。
|
||||
- 缓存与重试:可在 API 客户端增加缓存策略与重试逻辑(建议项)。
|
||||
- 图表渲染:对大数据集使用虚拟化或采样策略(建议项)。
|
||||
- 资源优化:开启图片与静态资源优化(Next.js 默认支持),按需加载第三方库。
|
||||
- 构建优化:使用生产构建与代码分割,避免打包体积过大。
|
||||
|
||||
## 故障排除指南
|
||||
- 登录失败:检查凭据是否正确,确认后端认证接口可用;查看登录页错误提示与 NextAuth 回调日志。
|
||||
- 会话丢失:确认 Cookie 设置、SameSite 与跨域配置;检查 NextAuth 回调是否正确写入 token。
|
||||
- 仪表盘空白:确认服务器端 getServerSession 返回有效会话;检查客户端 useSession 是否拿到访问令牌。
|
||||
- API 请求失败:查看 fetchWithAuth 抛出的错误信息,确认后端接口路径与鉴权头是否正确。
|
||||
- 样式异常:检查 Tailwind 配置 content 路径与 CSS 变量是否生效;确认暗色模式 class 是否正确切换。
|
||||
|
||||
**章节来源**
|
||||
- [app/(auth)/login/page.tsx](file://frontend/app/(auth)/login/page.tsx#L1-L93)
|
||||
- [app/(dashboard)/layout.tsx](file://frontend/app/(dashboard)/layout.tsx#L1-L27)
|
||||
- [lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
|
||||
## 结论
|
||||
本架构以 Next.js 14 App Router 为核心,结合服务器组件与客户端组件的混合模式,实现了清晰的页面组织与路由保护;通过 NextAuth.js 的凭据提供者与 JWT 会话策略,完成了前后端认证协作;UI 组件库以 Radix UI 与 Tailwind 为基础,具备良好的可维护性与一致性;API 客户端统一处理鉴权与错误,配合页面层的状态与错误处理,形成完整的前端数据流。建议在后续迭代中进一步完善缓存与重试、图表渲染优化与构建体积治理,持续提升用户体验与可维护性。
|
||||
|
||||
## 附录
|
||||
- 最佳实践清单
|
||||
- 使用路由组隔离功能域,保持页面组织清晰。
|
||||
- 将路由保护放在服务器端,优先保障安全性。
|
||||
- 仅在需要时标记客户端组件,减少水合成本。
|
||||
- 统一错误处理与用户反馈,提供明确的重试与刷新能力。
|
||||
- 严格类型约束,结合 TypeScript 与自定义类型扩展,降低运行时风险。
|
||||
- 持续优化构建产物与运行时性能,关注首屏与交互延迟。
|
||||
|
|
@ -0,0 +1,323 @@
|
|||
# 数据可视化
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [frontend/components/charts/trend-chart.tsx](file://frontend/components/charts/trend-chart.tsx)
|
||||
- [frontend/components/charts/platform-chart.tsx](file://frontend/components/charts/platform-chart.tsx)
|
||||
- [frontend/lib/platforms.ts](file://frontend/lib/platforms.ts)
|
||||
- [frontend/lib/api.ts](file://frontend/lib/api.ts)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx](file://frontend/app/(dashboard)/dashboard/page.tsx)
|
||||
- [frontend/app/(dashboard)/dashboard/citations/page.tsx](file://frontend/app/(dashboard)/dashboard/citations/page.tsx)
|
||||
- [frontend/app/(dashboard)/dashboard/queries/page.tsx](file://frontend/app/(dashboard)/dashboard/queries/page.tsx)
|
||||
- [frontend/app/(dashboard)/dashboard/reports/page.tsx](file://frontend/app/(dashboard)/dashboard/reports/page.tsx)
|
||||
- [frontend/tailwind.config.ts](file://frontend/tailwind.config.ts)
|
||||
- [frontend/components/providers.tsx](file://frontend/components/providers.tsx)
|
||||
- [frontend/app/layout.tsx](file://frontend/app/layout.tsx)
|
||||
- [frontend/package.json](file://frontend/package.json)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [引言](#引言)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 引言
|
||||
本文件系统性阐述基于 Recharts 的数据可视化实现,聚焦两类图表组件:趋势图表与平台对比图表。内容涵盖数据绑定机制、数据格式与配置选项、交互与动画、响应式适配、主题定制与样式、可访问性支持、使用示例、性能优化与调试方法。读者无需深入前端技术背景即可理解图表的设计思路与使用方式。
|
||||
|
||||
## 项目结构
|
||||
前端采用 Next.js 应用程序目录结构,图表组件位于 components/charts 下,分别负责趋势与平台对比展示;统计数据由页面通过 API 获取并传递给图表组件;Tailwind CSS 提供主题变量与样式扩展;Radix UI 组件与 Lucide 图标增强交互体验。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "应用层"
|
||||
Dashboard["仪表板页面<br/>dashboard/page.tsx"]
|
||||
Citations["引用记录页面<br/>citations/page.tsx"]
|
||||
Queries["查询词页面<br/>queries/page.tsx"]
|
||||
Reports["报告导出页面<br/>reports/page.tsx"]
|
||||
end
|
||||
subgraph "图表组件"
|
||||
Trend["趋势图表<br/>trend-chart.tsx"]
|
||||
Platform["平台对比图表<br/>platform-chart.tsx"]
|
||||
end
|
||||
subgraph "数据与工具"
|
||||
API["API 客户端<br/>lib/api.ts"]
|
||||
Platforms["平台映射<br/>lib/platforms.ts"]
|
||||
end
|
||||
subgraph "样式与主题"
|
||||
Tailwind["Tailwind 配置<br/>tailwind.config.ts"]
|
||||
Providers["会话提供者<br/>components/providers.tsx"]
|
||||
Layout["根布局<br/>app/layout.tsx"]
|
||||
end
|
||||
Dashboard --> Trend
|
||||
Dashboard --> Platform
|
||||
Trend --> API
|
||||
Platform --> API
|
||||
Platform --> Platforms
|
||||
API --> Layout
|
||||
Providers --> Layout
|
||||
Tailwind --> Layout
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx](file://frontend/app/(dashboard)/dashboard/page.tsx)
|
||||
- [frontend/components/charts/trend-chart.tsx](file://frontend/components/charts/trend-chart.tsx)
|
||||
- [frontend/components/charts/platform-chart.tsx](file://frontend/components/charts/platform-chart.tsx)
|
||||
- [frontend/lib/api.ts](file://frontend/lib/api.ts)
|
||||
- [frontend/lib/platforms.ts](file://frontend/lib/platforms.ts)
|
||||
- [frontend/tailwind.config.ts](file://frontend/tailwind.config.ts)
|
||||
- [frontend/components/providers.tsx](file://frontend/components/providers.tsx)
|
||||
- [frontend/app/layout.tsx](file://frontend/app/layout.tsx)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/package.json:25](file://frontend/package.json#L25)
|
||||
- [frontend/tailwind.config.ts:10-52](file://frontend/tailwind.config.ts#L10-L52)
|
||||
- [frontend/components/providers.tsx:6-8](file://frontend/components/providers.tsx#L6-L8)
|
||||
- [frontend/app/layout.tsx:22-36](file://frontend/app/layout.tsx#L22-L36)
|
||||
|
||||
## 核心组件
|
||||
- 趋势图表(TrendChart)
|
||||
- 功能:以折线图展示随时间变化的引用次数趋势。
|
||||
- 数据格式:数组对象,包含日期与引用次数字段。
|
||||
- 关键特性:网格、坐标轴、提示框、响应式容器、单调曲线、点标记与高亮点。
|
||||
- 平台对比图表(PlatformChart)
|
||||
- 功能:以柱状图展示不同平台的引用率(百分比)对比。
|
||||
- 数据格式:对象映射,键为平台标识,值包含查询数、引用数、引用率等统计。
|
||||
- 关键特性:网格、坐标轴、提示框、响应式容器、颜色映射、柱状圆角、单元格填充。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/components/charts/trend-chart.tsx:13-20](file://frontend/components/charts/trend-chart.tsx#L13-L20)
|
||||
- [frontend/components/charts/trend-chart.tsx:22-59](file://frontend/components/charts/trend-chart.tsx#L22-L59)
|
||||
- [frontend/components/charts/platform-chart.tsx:15-23](file://frontend/components/charts/platform-chart.tsx#L15-L23)
|
||||
- [frontend/components/charts/platform-chart.tsx:34-67](file://frontend/components/charts/platform-chart.tsx#L34-L67)
|
||||
|
||||
## 架构总览
|
||||
图表组件通过页面从 API 获取统计数据,再以 props 形式注入图表。图表内部使用 Recharts 组合子组件完成渲染与交互。主题与样式通过 Tailwind CSS 的 CSS 变量与类名实现深浅色模式适配。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Page as "仪表板页面"
|
||||
participant API as "API 客户端"
|
||||
participant Chart as "图表组件"
|
||||
participant Recharts as "Recharts 组件"
|
||||
Page->>API : 请求统计数据
|
||||
API-->>Page : 返回统计数据
|
||||
Page->>Chart : 传入数据 props
|
||||
Chart->>Recharts : 渲染图表组合子
|
||||
Recharts-->>Chart : 呈现图形与交互
|
||||
Chart-->>Page : 展示结果
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:128](file://frontend/app/(dashboard)/dashboard/page.tsx#L128)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:143](file://frontend/app/(dashboard)/dashboard/page.tsx#L143)
|
||||
- [frontend/lib/api.ts:47-49](file://frontend/lib/api.ts#L47-L49)
|
||||
- [frontend/components/charts/trend-chart.tsx:22-59](file://frontend/components/charts/trend-chart.tsx#L22-L59)
|
||||
- [frontend/components/charts/platform-chart.tsx:34-67](file://frontend/components/charts/platform-chart.tsx#L34-L67)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 趋势图表(TrendChart)
|
||||
- 数据绑定
|
||||
- 接收数据数组,每个元素包含日期与引用次数字段。
|
||||
- X 轴使用日期字符串作为数据键,并在渲染时格式化为“月/日”。
|
||||
- Y 轴禁止小数,确保引用次数为整数。
|
||||
- 交互与提示
|
||||
- 提示框使用卡片主题色与边框,标签格式化为“年-月-日”,数值格式化为“引用次数: 数值”。
|
||||
- 动画与视觉
|
||||
- 折线类型为单调曲线,线条宽度与点标记半径较小,高亮点更大以便聚焦。
|
||||
- 网格为虚线,颜色来自语义化类名。
|
||||
- 响应式适配
|
||||
- 使用响应式容器,宽度 100%,高度固定像素,自动适配父容器尺寸。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["组件接收数据"]) --> Render["渲染 LineChart"]
|
||||
Render --> Grid["绘制网格"]
|
||||
Render --> XAxis["配置 X 轴日期"]
|
||||
Render --> YAxis["配置 Y 轴整数"]
|
||||
Render --> Tooltip["配置提示框卡片样式"]
|
||||
Render --> Line["绘制折线单调曲线"]
|
||||
Line --> End(["完成渲染"])
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [frontend/components/charts/trend-chart.tsx:22-59](file://frontend/components/charts/trend-chart.tsx#L22-L59)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/components/charts/trend-chart.tsx:13-20](file://frontend/components/charts/trend-chart.tsx#L13-L20)
|
||||
- [frontend/components/charts/trend-chart.tsx:22-59](file://frontend/components/charts/trend-chart.tsx#L22-L59)
|
||||
|
||||
### 平台对比图表(PlatformChart)
|
||||
- 数据绑定
|
||||
- 输入为平台统计对象映射,组件内部将其转换为包含平台标签与引用率的数组。
|
||||
- 平台标签通过平台映射表进行本地化。
|
||||
- 交互与提示
|
||||
- 提示框格式化为“引用率: 数值%”。
|
||||
- 视觉与样式
|
||||
- Y 轴范围固定为 0–100,刻度格式化为百分比。
|
||||
- 柱状图圆角,颜色按预设调色板循环填充。
|
||||
- 响应式适配
|
||||
- 同样使用响应式容器,保证在不同屏幕尺寸下保持一致的视觉比例。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["接收平台统计对象"]) --> Map["遍历对象生成数组"]
|
||||
Map --> Localize["平台标签本地化"]
|
||||
Localize --> Render["渲染 BarChart"]
|
||||
Render --> Grid["绘制网格"]
|
||||
Render --> XAxis["配置 X 轴平台标签"]
|
||||
Render --> YAxis["配置 Y 轴0-100%"]
|
||||
Render --> Tooltip["配置提示框百分比"]
|
||||
Render --> Bars["绘制柱状条带颜色"]
|
||||
Bars --> End(["完成渲染"])
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [frontend/components/charts/platform-chart.tsx:34-67](file://frontend/components/charts/platform-chart.tsx#L34-L67)
|
||||
- [frontend/lib/platforms.ts:1-18](file://frontend/lib/platforms.ts#L1-L18)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/components/charts/platform-chart.tsx:15-23](file://frontend/components/charts/platform-chart.tsx#L15-L23)
|
||||
- [frontend/components/charts/platform-chart.tsx:34-67](file://frontend/components/charts/platform-chart.tsx#L34-L67)
|
||||
- [frontend/lib/platforms.ts:1-18](file://frontend/lib/platforms.ts#L1-L18)
|
||||
|
||||
### 数据预处理与格式转换
|
||||
- 平台对比图表的数据转换
|
||||
- 将输入对象映射转换为数组,计算引用率并四舍五入为整数百分比。
|
||||
- 使用平台映射表将平台键转换为中文标签。
|
||||
- 日期格式化
|
||||
- 趋势图表在 X 轴渲染时将日期字符串格式化为“月/日”,在提示框标签中格式化为“年-月-日”。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/components/charts/platform-chart.tsx:35-39](file://frontend/components/charts/platform-chart.tsx#L35-L39)
|
||||
- [frontend/lib/platforms.ts:1-8](file://frontend/lib/platforms.ts#L1-L8)
|
||||
- [frontend/components/charts/trend-chart.tsx:30-46](file://frontend/components/charts/trend-chart.tsx#L30-L46)
|
||||
|
||||
### 实时更新机制
|
||||
- 页面通过 API 客户端拉取最新统计数据,图表组件接收新数据后自动重新渲染。
|
||||
- 仪表板页面在挂载时发起数据请求,图表组件在接收到 props 更新后即时反映。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:128](file://frontend/app/(dashboard)/dashboard/page.tsx#L128)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:143](file://frontend/app/(dashboard)/dashboard/page.tsx#L143)
|
||||
- [frontend/lib/api.ts:47-49](file://frontend/lib/api.ts#L47-L49)
|
||||
|
||||
### 主题定制与样式配置
|
||||
- Tailwind CSS 扩展
|
||||
- 定义了语义化颜色变量与圆角变量,图表中的网格与提示框颜色直接引用这些变量,实现深浅色模式一致性。
|
||||
- 类名与内联样式结合
|
||||
- 网格与提示框使用类名与内联样式混合,确保在不同主题下具备合适的对比度与可读性。
|
||||
- 响应式容器
|
||||
- 图表统一使用响应式容器,保证在移动端与桌面端均能良好显示。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/tailwind.config.ts:10-52](file://frontend/tailwind.config.ts#L10-L52)
|
||||
- [frontend/components/charts/trend-chart.tsx:26-41](file://frontend/components/charts/trend-chart.tsx#L26-L41)
|
||||
- [frontend/components/charts/platform-chart.tsx:44-56](file://frontend/components/charts/platform-chart.tsx#L44-L56)
|
||||
|
||||
### 可访问性支持
|
||||
- 文本标签与格式化
|
||||
- 提示框标签与坐标轴刻度均采用人类可读的日期与百分比格式,提升信息可理解性。
|
||||
- 颜色与对比度
|
||||
- 使用 Tailwind 语义化颜色变量,确保在深浅主题下具备足够对比度。
|
||||
- 交互反馈
|
||||
- 图表点标记与高亮点提供视觉反馈,便于用户定位数据点。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/components/charts/trend-chart.tsx:30-46](file://frontend/components/charts/trend-chart.tsx#L30-L46)
|
||||
- [frontend/components/charts/platform-chart.tsx:48-57](file://frontend/components/charts/platform-chart.tsx#L48-L57)
|
||||
- [frontend/tailwind.config.ts:12-46](file://frontend/tailwind.config.ts#L12-L46)
|
||||
|
||||
### 使用示例
|
||||
- 在仪表板页面中引入并使用两个图表组件,分别传入趋势数据与平台统计数据。
|
||||
- 页面通过 API 客户端获取数据,然后将数据作为 props 传递给图表组件。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:5](file://frontend/app/(dashboard)/dashboard/page.tsx#L5)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:128](file://frontend/app/(dashboard)/dashboard/page.tsx#L128)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:143](file://frontend/app/(dashboard)/dashboard/page.tsx#L143)
|
||||
|
||||
## 依赖分析
|
||||
- Recharts 版本
|
||||
- 项目使用较新版本的 Recharts,具备良好的性能与丰富的交互能力。
|
||||
- 样式与主题
|
||||
- Tailwind CSS 提供主题变量,图表组件通过类名与 CSS 变量实现主题一致性。
|
||||
- 会话与认证
|
||||
- 通过会话提供者与 API 客户端配合,确保图表数据请求携带认证头。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
Package["package.json 依赖"] --> Recharts["recharts"]
|
||||
Package --> Tailwind["tailwindcss"]
|
||||
Tailwind --> Charts["图表组件样式"]
|
||||
Providers["providers.tsx"] --> API["api.ts"]
|
||||
API --> Charts
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [frontend/package.json:25](file://frontend/package.json#L25)
|
||||
- [frontend/tailwind.config.ts:10-52](file://frontend/tailwind.config.ts#L10-L52)
|
||||
- [frontend/components/providers.tsx:6-8](file://frontend/components/providers.tsx#L6-L8)
|
||||
- [frontend/lib/api.ts:1-21](file://frontend/lib/api.ts#L1-L21)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/package.json:11-27](file://frontend/package.json#L11-L27)
|
||||
- [frontend/tailwind.config.ts:10-52](file://frontend/tailwind.config.ts#L10-L52)
|
||||
- [frontend/components/providers.tsx:6-8](file://frontend/components/providers.tsx#L6-L8)
|
||||
- [frontend/lib/api.ts:1-21](file://frontend/lib/api.ts#L1-L21)
|
||||
|
||||
## 性能考虑
|
||||
- 数据规模控制
|
||||
- 趋势图表建议限制历史天数,避免过多点导致渲染卡顿。
|
||||
- 渲染优化
|
||||
- 使用响应式容器与固定高度,减少重排与重绘。
|
||||
- 折线与点标记半径较小,降低 SVG 渲染负担。
|
||||
- 网络与缓存
|
||||
- API 请求应合理设置缓存策略,避免频繁重复请求。
|
||||
- 主题切换
|
||||
- Tailwind CSS 变量在主题切换时无需重绘图表,提升切换流畅度。
|
||||
|
||||
[本节为通用指导,不直接分析具体文件]
|
||||
|
||||
## 故障排查指南
|
||||
- 图表不显示或空白
|
||||
- 检查传入数据是否为空或格式不符;确认页面已正确获取并传递数据。
|
||||
- 日期显示异常
|
||||
- 确认日期字符串格式与解析逻辑一致;检查提示框标签格式化函数。
|
||||
- 百分比显示异常
|
||||
- 确认数据转换逻辑是否将引用率乘以 100 并四舍五入为整数。
|
||||
- 主题颜色不生效
|
||||
- 检查 Tailwind CSS 变量定义与类名引用是否正确。
|
||||
- 认证失败导致数据为空
|
||||
- 确认会话提供者与 API 客户端的认证头设置。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/components/charts/trend-chart.tsx:30-46](file://frontend/components/charts/trend-chart.tsx#L30-L46)
|
||||
- [frontend/components/charts/platform-chart.tsx:37-39](file://frontend/components/charts/platform-chart.tsx#L37-L39)
|
||||
- [frontend/tailwind.config.ts:12-46](file://frontend/tailwind.config.ts#L12-L46)
|
||||
- [frontend/lib/api.ts:12-14](file://frontend/lib/api.ts#L12-L14)
|
||||
|
||||
## 结论
|
||||
本项目通过简洁的图表组件与清晰的数据绑定机制,实现了趋势与平台对比两类核心可视化需求。借助 Recharts 的强大能力与 Tailwind CSS 的主题体系,图表在交互、动画、响应式与可访问性方面均达到良好水平。后续可在数据规模控制、缓存策略与主题扩展方面进一步优化。
|
||||
|
||||
[本节为总结性内容,不直接分析具体文件]
|
||||
|
||||
## 附录
|
||||
- API 客户端方法
|
||||
- 查询词列表、引用记录列表、引用统计、报告导出等接口。
|
||||
- 页面职责
|
||||
- 仪表板页面负责聚合趋势与平台图表;引用记录与查询词页面负责数据筛选与管理;报告导出页面负责报表下载。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/lib/api.ts:23-57](file://frontend/lib/api.ts#L23-L57)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:128-143](file://frontend/app/(dashboard)/dashboard/page.tsx#L128-L143)
|
||||
- [frontend/app/(dashboard)/dashboard/citations/page.tsx:73-94](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L73-L94)
|
||||
- [frontend/app/(dashboard)/dashboard/queries/page.tsx:102-113](file://frontend/app/(dashboard)/dashboard/queries/page.tsx#L102-L113)
|
||||
- [frontend/app/(dashboard)/dashboard/reports/page.tsx:49-93](file://frontend/app/(dashboard)/dashboard/reports/page.tsx#L49-L93)
|
||||
|
|
@ -0,0 +1,409 @@
|
|||
# 认证系统前端实现
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [frontend/app/api/auth/[...nextauth]/route.ts](file://frontend/app/api/auth/[...nextauth]/route.ts)
|
||||
- [frontend/lib/auth.ts](file://frontend/lib/auth.ts)
|
||||
- [frontend/types/next-auth.d.ts](file://frontend/types/next-auth.d.ts)
|
||||
- [frontend/components/providers.tsx](file://frontend/components/providers.tsx)
|
||||
- [frontend/lib/api.ts](file://frontend/lib/api.ts)
|
||||
- [frontend/app/(auth)/layout.tsx](file://frontend/app/(auth)/layout.tsx)
|
||||
- [frontend/app/(auth)/login/page.tsx](file://frontend/app/(auth)/login/page.tsx)
|
||||
- [frontend/app/(dashboard)/layout.tsx](file://frontend/app/(dashboard)/layout.tsx)
|
||||
- [frontend/app/layout.tsx](file://frontend/app/layout.tsx)
|
||||
- [frontend/components/layout/header.tsx](file://frontend/components/layout/header.tsx)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx](file://frontend/app/(dashboard)/dashboard/page.tsx)
|
||||
- [frontend/package.json](file://frontend/package.json)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构概览](#架构概览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖关系分析](#依赖关系分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排除指南](#故障排除指南)
|
||||
9. [结论](#结论)
|
||||
|
||||
## 简介
|
||||
|
||||
本文件档详细说明了基于 NextAuth.js 的前端认证系统实现,包括 NextAuth.js 集成配置、会话管理、用户状态同步机制、认证提供者配置、OAuth 流程和 JWT 令牌处理。文档还涵盖了路由保护机制、权限验证、用户状态持久化、API 客户端认证头设置、请求拦截和错误处理,以及登录状态管理、会话过期处理和安全最佳实践。
|
||||
|
||||
## 项目结构
|
||||
|
||||
前端认证系统主要分布在以下目录和文件中:
|
||||
|
||||
- NextAuth 路由处理器:`frontend/app/api/auth/[...nextauth]/route.ts`
|
||||
- NextAuth 配置:`frontend/lib/auth.ts`
|
||||
- 类型声明扩展:`frontend/types/next-auth.d.ts`
|
||||
- 全局 Provider 包装:`frontend/components/providers.tsx`
|
||||
- API 客户端封装:`frontend/lib/api.ts`
|
||||
- 登录页面:`frontend/app/(auth)/login/page.tsx`
|
||||
- 仪表盘布局保护:`frontend/app/(dashboard)/layout.tsx`
|
||||
- 根布局与 Provider 注入:`frontend/app/layout.tsx`
|
||||
- 头部组件与登出:`frontend/components/layout/header.tsx`
|
||||
- 仪表盘首页与会话使用:`frontend/app/(dashboard)/dashboard/page.tsx`
|
||||
- 依赖包配置:`frontend/package.json`
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "前端应用"
|
||||
A["根布局<br/>app/layout.tsx"]
|
||||
B["全局Provider<br/>components/providers.tsx"]
|
||||
C["NextAuth路由<br/>app/api/auth/[...nextauth]/route.ts"]
|
||||
D["NextAuth配置<br/>lib/auth.ts"]
|
||||
E["类型声明<br/>types/next-auth.d.ts"]
|
||||
F["API客户端<br/>lib/api.ts"]
|
||||
G["登录页面<br/>app/(auth)/login/page.tsx"]
|
||||
H["仪表盘布局<br/>app/(dashboard)/layout.tsx"]
|
||||
I["头部组件<br/>components/layout/header.tsx"]
|
||||
J["仪表盘首页<br/>app/(dashboard)/dashboard/page.tsx"]
|
||||
end
|
||||
A --> B
|
||||
B --> C
|
||||
C --> D
|
||||
D --> E
|
||||
D --> F
|
||||
G --> D
|
||||
H --> D
|
||||
I --> D
|
||||
J --> F
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [frontend/app/layout.tsx:22-36](file://frontend/app/layout.tsx#L22-L36)
|
||||
- [frontend/components/providers.tsx:6-8](file://frontend/components/providers.tsx#L6-L8)
|
||||
- [frontend/app/api/auth/[...nextauth]/route.ts:1-7](file://frontend/app/api/auth/[...nextauth]/route.ts#L1-L7)
|
||||
- [frontend/lib/auth.ts:5-55](file://frontend/lib/auth.ts#L5-L55)
|
||||
- [frontend/types/next-auth.d.ts:1-26](file://frontend/types/next-auth.d.ts#L1-L26)
|
||||
- [frontend/lib/api.ts:23-57](file://frontend/lib/api.ts#L23-L57)
|
||||
- [frontend/app/(auth)/login/page.tsx:19-42](file://frontend/app/(auth)/login/page.tsx#L19-L42)
|
||||
- [frontend/app/(dashboard)/layout.tsx:7-26](file://frontend/app/(dashboard)/layout.tsx#L7-L26)
|
||||
- [frontend/components/layout/header.tsx:7-29](file://frontend/components/layout/header.tsx#L7-L29)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:20-44](file://frontend/app/(dashboard)/dashboard/page.tsx#L20-L44)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/layout.tsx:22-36](file://frontend/app/layout.tsx#L22-L36)
|
||||
- [frontend/components/providers.tsx:6-8](file://frontend/components/providers.tsx#L6-L8)
|
||||
- [frontend/app/api/auth/[...nextauth]/route.ts:1-7](file://frontend/app/api/auth/[...nextauth]/route.ts#L1-L7)
|
||||
- [frontend/lib/auth.ts:5-55](file://frontend/lib/auth.ts#L5-L55)
|
||||
- [frontend/types/next-auth.d.ts:1-26](file://frontend/types/next-auth.d.ts#L1-L26)
|
||||
- [frontend/lib/api.ts:23-57](file://frontend/lib/api.ts#L23-L57)
|
||||
- [frontend/app/(auth)/login/page.tsx:19-42](file://frontend/app/(auth)/login/page.tsx#L19-L42)
|
||||
- [frontend/app/(dashboard)/layout.tsx:7-26](file://frontend/app/(dashboard)/layout.tsx#L7-L26)
|
||||
- [frontend/components/layout/header.tsx:7-29](file://frontend/components/layout/header.tsx#L7-L29)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:20-44](file://frontend/app/(dashboard)/dashboard/page.tsx#L20-L44)
|
||||
|
||||
## 核心组件
|
||||
|
||||
### NextAuth.js 集成配置
|
||||
|
||||
NextAuth.js 在本项目中的核心配置位于 `lib/auth.ts`,采用凭据式认证提供者,并通过回调函数将后端返回的访问令牌和用户标识写入 JWT 和会话对象中。配置要点如下:
|
||||
|
||||
- 凭据式提供者:接收邮箱和密码,调用后端 `/api/v1/auth/login` 接口进行认证。
|
||||
- 会话策略:使用 JWT 策略,便于在客户端存储和传递访问令牌。
|
||||
- 回调函数:
|
||||
- `jwt`:当用户存在时,将 `accessToken` 和 `id` 写入 JWT。
|
||||
- `session`:将 `accessToken` 和 `id` 同步到会话对象,供前端组件使用。
|
||||
- 登录页重定向:登录失败时自动跳转至 `/login`。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["开始"]) --> Validate["校验凭据<br/>邮箱/密码"]
|
||||
Validate --> CallAPI["调用后端登录接口<br/>/api/v1/auth/login"]
|
||||
CallAPI --> HasToken{"返回包含访问令牌?"}
|
||||
HasToken --> |是| BuildUser["构建用户对象<br/>包含id/name/email/accessToken"]
|
||||
HasToken --> |否| ReturnNull["返回空"]
|
||||
BuildUser --> JWT["写入JWT回调<br/>保存accessToken/id"]
|
||||
JWT --> Session["写入会话回调<br/>同步accessToken/id"]
|
||||
Session --> Done(["完成"])
|
||||
ReturnNull --> Done
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [frontend/lib/auth.ts:13-32](file://frontend/lib/auth.ts#L13-L32)
|
||||
- [frontend/lib/auth.ts:39-50](file://frontend/lib/auth.ts#L39-L50)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/lib/auth.ts:5-55](file://frontend/lib/auth.ts#L5-L55)
|
||||
|
||||
### NextAuth 路由处理器
|
||||
|
||||
NextAuth 路由处理器位于 `app/api/auth/[...nextauth]/route.ts`,负责将 NextAuth 实例导出为 GET 和 POST 处理器,以支持 NextAuth 的所有认证流程。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/api/auth/[...nextauth]/route.ts:1-7](file://frontend/app/api/auth/[...nextauth]/route.ts#L1-L7)
|
||||
|
||||
### 类型声明扩展
|
||||
|
||||
为了在 TypeScript 中正确识别自定义的 `accessToken` 和 `id` 字段,需要扩展 NextAuth 的类型声明。类型声明位于 `types/next-auth.d.ts`,确保在使用 `useSession` 时能获得完整的类型提示。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/types/next-auth.d.ts:1-26](file://frontend/types/next-auth.d.ts#L1-L26)
|
||||
|
||||
### 全局 Provider 包装
|
||||
|
||||
根布局通过 `components/providers.tsx` 将 `SessionProvider` 注入到整个应用中,使所有子组件能够使用 `useSession`、`signIn`、`signOut` 等 NextAuth 提供的 Hook 和方法。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/layout.tsx:22-36](file://frontend/app/layout.tsx#L22-L36)
|
||||
- [frontend/components/providers.tsx:6-8](file://frontend/components/providers.tsx#L6-L8)
|
||||
|
||||
### API 客户端封装
|
||||
|
||||
API 客户端位于 `lib/api.ts`,统一处理认证头设置、请求拦截和错误处理。其特点包括:
|
||||
|
||||
- 基础 URL:从环境变量 `NEXT_PUBLIC_API_URL` 获取后端地址。
|
||||
- 认证头设置:当传入 token 时,在请求头添加 `Authorization: Bearer ${token}`。
|
||||
- 错误处理:对非 2xx 响应抛出错误,错误消息来自后端 JSON 或默认文本。
|
||||
- 功能模块:
|
||||
- 认证:注册、登录、获取当前用户信息。
|
||||
- 查询:列表、创建、更新、删除。
|
||||
- 引用:列表、统计。
|
||||
- 报告:导出 CSV。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant UI as "UI组件"
|
||||
participant API as "API客户端"
|
||||
participant BE as "后端服务"
|
||||
UI->>API : 调用带token的方法
|
||||
API->>API : 设置Authorization头
|
||||
API->>BE : 发送HTTP请求
|
||||
BE-->>API : 返回JSON响应
|
||||
API->>API : 检查响应状态
|
||||
API-->>UI : 成功返回数据或抛出错误
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [frontend/lib/api.ts:3-21](file://frontend/lib/api.ts#L3-L21)
|
||||
- [frontend/lib/api.ts:23-57](file://frontend/lib/api.ts#L23-L57)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
### 登录页面与认证流程
|
||||
|
||||
登录页面位于 `app/(auth)/login/page.tsx`,使用 `signIn('credentials', {...})` 触发 NextAuth 的凭据式认证流程。表单提交后,若认证成功则跳转到 `/dashboard` 并刷新页面;若失败则显示错误信息。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/(auth)/login/page.tsx:19-42](file://frontend/app/(auth)/login/page.tsx#L19-L42)
|
||||
|
||||
### 仪表盘布局保护
|
||||
|
||||
仪表盘布局 `app/(dashboard)/layout.tsx` 使用 `getServerSession(authOptions)` 在服务端检查会话有效性,未登录则重定向到 `/login`。这确保了前端路由的服务器端保护。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/(dashboard)/layout.tsx:7-26](file://frontend/app/(dashboard)/layout.tsx#L7-L26)
|
||||
|
||||
### 头部组件与登出
|
||||
|
||||
头部组件 `components/layout/header.tsx` 使用 `useSession` 获取当前会话信息,并通过 `signOut({ callbackUrl: '/login' })` 实现登出功能,登出后自动跳转到登录页。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/components/layout/header.tsx:7-29](file://frontend/components/layout/header.tsx#L7-L29)
|
||||
|
||||
### 仪表盘首页与会话使用
|
||||
|
||||
仪表盘首页 `app/(dashboard)/dashboard/page.tsx` 使用 `useSession` 获取 `accessToken`,并在依赖变化时调用 API 客户端获取统计数据。该组件展示了如何在客户端读取 NextAuth 会话并将其用于 API 请求。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:20-44](file://frontend/app/(dashboard)/dashboard/page.tsx#L20-L44)
|
||||
|
||||
## 架构概览
|
||||
|
||||
整体认证架构由前端 NextAuth.js、后端认证接口和 API 客户端组成。前端通过 NextAuth 管理会话和令牌,API 客户端负责携带令牌与后端交互。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "前端"
|
||||
U["用户界面"]
|
||||
NA["NextAuth.js<br/>凭据式认证"]
|
||||
SP["SessionProvider<br/>全局会话注入"]
|
||||
AC["API客户端<br/>fetchWithAuth"]
|
||||
end
|
||||
subgraph "后端"
|
||||
AUTH["认证接口<br/>/api/v1/auth/*"]
|
||||
DATA["业务接口<br/>/api/v1/*"]
|
||||
end
|
||||
U --> SP
|
||||
SP --> NA
|
||||
NA --> AC
|
||||
AC --> AUTH
|
||||
AC --> DATA
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [frontend/lib/auth.ts:5-55](file://frontend/lib/auth.ts#L5-L55)
|
||||
- [frontend/lib/api.ts:23-57](file://frontend/lib/api.ts#L23-L57)
|
||||
- [frontend/app/api/auth/[...nextauth]/route.ts:1-7](file://frontend/app/api/auth/[...nextauth]/route.ts#L1-L7)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### NextAuth.js 配置类图
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class AuthOptions {
|
||||
+providers
|
||||
+session
|
||||
+callbacks
|
||||
+pages
|
||||
}
|
||||
class CredentialsProvider {
|
||||
+name
|
||||
+credentials
|
||||
+authorize(credentials)
|
||||
}
|
||||
class JWTCallbacks {
|
||||
+jwt({token, user})
|
||||
}
|
||||
class SessionCallbacks {
|
||||
+session({session, token})
|
||||
}
|
||||
AuthOptions --> CredentialsProvider : "包含"
|
||||
AuthOptions --> JWTCallbacks : "回调"
|
||||
AuthOptions --> SessionCallbacks : "回调"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [frontend/lib/auth.ts:5-55](file://frontend/lib/auth.ts#L5-L55)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/lib/auth.ts:5-55](file://frontend/lib/auth.ts#L5-L55)
|
||||
|
||||
### 登录流程序列图
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant 用户 as "用户"
|
||||
participant 登录页 as "登录页面"
|
||||
participant NextAuth as "NextAuth"
|
||||
participant 后端 as "后端认证接口"
|
||||
用户->>登录页 : 输入邮箱/密码并提交
|
||||
登录页->>NextAuth : signIn('credentials', {email,password,redirect : false})
|
||||
NextAuth->>后端 : POST /api/v1/auth/login
|
||||
后端-->>NextAuth : 返回access_token和用户信息
|
||||
NextAuth-->>登录页 : 返回认证结果
|
||||
登录页->>登录页 : 成功则跳转并刷新页面
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [frontend/app/(auth)/login/page.tsx:26-42](file://frontend/app/(auth)/login/page.tsx#L26-L42)
|
||||
- [frontend/lib/auth.ts:13-32](file://frontend/lib/auth.ts#L13-L32)
|
||||
- [frontend/lib/api.ts:30-35](file://frontend/lib/api.ts#L30-L35)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/(auth)/login/page.tsx:19-42](file://frontend/app/(auth)/login/page.tsx#L19-L42)
|
||||
- [frontend/lib/auth.ts:13-32](file://frontend/lib/auth.ts#L13-L32)
|
||||
- [frontend/lib/api.ts:23-35](file://frontend/lib/api.ts#L23-L35)
|
||||
|
||||
### 会话管理与状态同步
|
||||
|
||||
会话管理通过 NextAuth 的回调函数实现 JWT 与会话对象之间的双向同步,确保前端组件可以稳定地读取 `accessToken` 和用户标识。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A["用户登录成功"] --> B["JWT回调<br/>写入accessToken/id"]
|
||||
B --> C["会话回调<br/>同步accessToken/id"]
|
||||
C --> D["useSession读取会话"]
|
||||
D --> E["API客户端携带令牌请求后端"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [frontend/lib/auth.ts:39-50](file://frontend/lib/auth.ts#L39-L50)
|
||||
- [frontend/lib/api.ts:12-14](file://frontend/lib/api.ts#L12-L14)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/lib/auth.ts:39-50](file://frontend/lib/auth.ts#L39-L50)
|
||||
- [frontend/lib/api.ts:12-14](file://frontend/lib/api.ts#L12-L14)
|
||||
|
||||
### 路由保护机制
|
||||
|
||||
- 服务器端保护:仪表盘布局在服务端检查会话,未登录直接重定向到登录页。
|
||||
- 客户端保护:通过 `useSession` 在客户端读取会话状态,控制页面渲染和导航。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/(dashboard)/layout.tsx:7-26](file://frontend/app/(dashboard)/layout.tsx#L7-L26)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:20-44](file://frontend/app/(dashboard)/dashboard/page.tsx#L20-L44)
|
||||
|
||||
### 权限验证与用户状态持久化
|
||||
|
||||
- 权限验证:通过 `useSession` 获取的 `accessToken` 控制 API 请求权限。
|
||||
- 用户状态持久化:NextAuth 的 JWT 策略将用户信息持久化在客户端,避免每次刷新丢失。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/lib/auth.ts:35-50](file://frontend/lib/auth.ts#L35-L50)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:20-44](file://frontend/app/(dashboard)/dashboard/page.tsx#L20-L44)
|
||||
|
||||
### API 客户端认证头设置与错误处理
|
||||
|
||||
- 认证头设置:在 `fetchWithAuth` 中根据是否传入 token 自动添加 `Authorization` 头。
|
||||
- 请求拦截:统一处理 Content-Type 和自定义请求头。
|
||||
- 错误处理:对非 2xx 响应抛出错误,错误消息来自后端 JSON 或默认文本。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/lib/api.ts:3-21](file://frontend/lib/api.ts#L3-L21)
|
||||
|
||||
## 依赖关系分析
|
||||
|
||||
前端认证系统的关键依赖关系如下:
|
||||
|
||||
- NextAuth.js 版本:`package.json` 中声明为 `^4.24.14`。
|
||||
- 全局 Provider:根布局通过 `components/providers.tsx` 注入 `SessionProvider`。
|
||||
- NextAuth 路由:`app/api/auth/[...nextauth]/route.ts` 导出 NextAuth 实例。
|
||||
- 类型扩展:`types/next-auth.d.ts` 扩展 NextAuth 类型,确保类型安全。
|
||||
- API 客户端:`lib/api.ts` 统一处理认证头和错误处理。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
P["package.json<br/>依赖声明"] --> NA["next-auth@^4.24.14"]
|
||||
L["lib/auth.ts<br/>NextAuth配置"] --> NA
|
||||
R["app/api/auth/[...nextauth]/route.ts<br/>路由处理器"] --> L
|
||||
T["types/next-auth.d.ts<br/>类型扩展"] --> NA
|
||||
A["lib/api.ts<br/>API客户端"] --> L
|
||||
S["components/providers.tsx<br/>SessionProvider"] --> NA
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [frontend/package.json:21-22](file://frontend/package.json#L21-L22)
|
||||
- [frontend/lib/auth.ts:1-3](file://frontend/lib/auth.ts#L1-L3)
|
||||
- [frontend/app/api/auth/[...nextauth]/route.ts:1-2](file://frontend/app/api/auth/[...nextauth]/route.ts#L1-L2)
|
||||
- [frontend/types/next-auth.d.ts:1](file://frontend/types/next-auth.d.ts#L1)
|
||||
- [frontend/lib/api.ts:3](file://frontend/lib/api.ts#L3)
|
||||
- [frontend/components/providers.tsx:3](file://frontend/components/providers.tsx#L3)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/package.json:11-27](file://frontend/package.json#L11-L27)
|
||||
- [frontend/lib/auth.ts:1-3](file://frontend/lib/auth.ts#L1-L3)
|
||||
- [frontend/app/api/auth/[...nextauth]/route.ts:1-2](file://frontend/app/api/auth/[...nextauth]/route.ts#L1-L2)
|
||||
- [frontend/types/next-auth.d.ts:1](file://frontend/types/next-auth.d.ts#L1)
|
||||
- [frontend/lib/api.ts:3](file://frontend/lib/api.ts#L3)
|
||||
- [frontend/components/providers.tsx:3](file://frontend/components/providers.tsx#L3)
|
||||
|
||||
## 性能考虑
|
||||
|
||||
- 会话策略:使用 JWT 策略减少频繁的服务端会话查询,提升客户端响应速度。
|
||||
- 请求复用:API 客户端统一处理认证头和错误,避免重复代码和网络开销。
|
||||
- 依赖更新:在 `useEffect` 中仅在 `accessToken` 变化时触发数据加载,避免不必要的请求。
|
||||
|
||||
[本节为通用性能建议,不涉及具体文件分析]
|
||||
|
||||
## 故障排除指南
|
||||
|
||||
- 登录失败:检查登录页面的错误提示逻辑,确认 `signIn('credentials')` 返回的错误信息。
|
||||
- 会话缺失:确认根布局已注入 `SessionProvider`,且 `useSession` 在受保护组件中正确使用。
|
||||
- API 请求失败:检查 `fetchWithAuth` 的错误处理逻辑,确认后端返回的错误消息格式。
|
||||
- 服务器端重定向:确认仪表盘布局的 `getServerSession` 检查逻辑,确保未登录时正确重定向。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/(auth)/login/page.tsx:36-42](file://frontend/app/(auth)/login/page.tsx#L36-L42)
|
||||
- [frontend/app/layout.tsx:32](file://frontend/app/layout.tsx#L32)
|
||||
- [frontend/lib/api.ts:16-21](file://frontend/lib/api.ts#L16-L21)
|
||||
- [frontend/app/(dashboard)/layout.tsx:12-15](file://frontend/app/(dashboard)/layout.tsx#L12-L15)
|
||||
|
||||
## 结论
|
||||
|
||||
本认证系统通过 NextAuth.js 的凭据式认证、JWT 会话策略和统一的 API 客户端封装,实现了完整的前端认证流程。系统在服务器端和客户端分别提供了路由保护,结合类型扩展确保了开发体验和运行时的类型安全。通过合理的错误处理和会话管理,系统具备良好的可维护性和扩展性。
|
||||
|
|
@ -0,0 +1,436 @@
|
|||
# 页面组件设计
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [frontend/app/layout.tsx](file://frontend/app/layout.tsx)
|
||||
- [frontend/app/(dashboard)/layout.tsx](file://frontend/app/(dashboard)/layout.tsx)
|
||||
- [frontend/components/layout/header.tsx](file://frontend/components/layout/header.tsx)
|
||||
- [frontend/components/layout/sidebar.tsx](file://frontend/components/layout/sidebar.tsx)
|
||||
- [frontend/lib/api.ts](file://frontend/lib/api.ts)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx](file://frontend/app/(dashboard)/dashboard/page.tsx)
|
||||
- [frontend/app/(dashboard)/dashboard/queries/page.tsx](file://frontend/app/(dashboard)/dashboard/queries/page.tsx)
|
||||
- [frontend/app/(dashboard)/dashboard/citations/page.tsx](file://frontend/app/(dashboard)/dashboard/citations/page.tsx)
|
||||
- [frontend/app/(dashboard)/dashboard/reports/page.tsx](file://frontend/app/(dashboard)/dashboard/reports/page.tsx)
|
||||
- [frontend/app/(dashboard)/dashboard/settings/page.tsx](file://frontend/app/(dashboard)/dashboard/settings/page.tsx)
|
||||
- [frontend/components/ui/table.tsx](file://frontend/components/ui/table.tsx)
|
||||
- [frontend/components/ui/dialog.tsx](file://frontend/components/ui/dialog.tsx)
|
||||
- [frontend/lib/platforms.ts](file://frontend/lib/platforms.ts)
|
||||
- [frontend/lib/utils.ts](file://frontend/lib/utils.ts)
|
||||
- [frontend/components/charts/trend-chart.tsx](file://frontend/components/charts/trend-chart.tsx)
|
||||
- [frontend/components/charts/platform-chart.tsx](file://frontend/components/charts/platform-chart.tsx)
|
||||
</cite>
|
||||
|
||||
## 更新摘要
|
||||
**变更内容**
|
||||
- 新增完整的仪表板页面组件系统实现
|
||||
- 更新数据总览页、查询管理页、引用记录页和报告导出页的具体实现
|
||||
- 完善图表组件的详细分析和使用说明
|
||||
- 增强API客户端的接口文档和错误处理机制
|
||||
- 优化页面级数据获取策略和状态管理
|
||||
|
||||
## 目录
|
||||
1. [引言](#引言)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构概览](#架构概览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖关系分析](#依赖关系分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排除指南](#故障排除指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 引言
|
||||
本文件系统性梳理 GEO 平台前端页面组件的设计与实现,覆盖仪表板、查询管理、引用数据、报告导出与设置页面。内容包括页面布局与导航结构、用户体验流程、页面级数据获取策略、状态管理与错误边界处理、页面间导航逻辑与路由参数传递、页面生命周期管理、性能优化与懒加载策略、SEO 配置以及开发规范与最佳实践。
|
||||
|
||||
**更新** 本次更新反映了应用的完整实现,所有页面组件均已开发完成并集成到Next.js应用架构中,包括数据总览、查询管理、引用记录、报告导出和设置页面的完整功能实现。
|
||||
|
||||
## 项目结构
|
||||
前端采用 Next.js App Router 的分组路由模式,将认证相关页面置于 `(auth)` 分组,仪表板相关页面置于 `(dashboard)` 分组。根布局负责全局样式与 Provider 包装;仪表板布局负责权限校验、侧边栏与头部导航的统一渲染。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
root_layout["根布局<br/>frontend/app/layout.tsx"] --> providers["Provider 包装<br/>frontend/components/providers.tsx"]
|
||||
dashboard_layout["仪表板布局<br/>frontend/app/(dashboard)/layout.tsx"] --> sidebar["侧边栏<br/>frontend/components/layout/sidebar.tsx"]
|
||||
dashboard_layout --> header["头部导航<br/>frontend/components/layout/header.tsx"]
|
||||
dashboard_layout --> pages["仪表板页面集合<br/>frontend/app/(dashboard)/dashboard/*"]
|
||||
pages --> dashboard_page["数据总览<br/>page.tsx"]
|
||||
pages --> queries_page["查询管理<br/>queries/page.tsx"]
|
||||
pages --> citations_page["引用记录<br/>citations/page.tsx"]
|
||||
pages --> reports_page["报告导出<br/>reports/page.tsx"]
|
||||
pages --> settings_page["设置<br/>settings/page.tsx"]
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [frontend/app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [frontend/app/(dashboard)/layout.tsx:1-27](file://frontend/app/(dashboard)/layout.tsx#L1-L27)
|
||||
- [frontend/components/layout/sidebar.tsx:1-54](file://frontend/components/layout/sidebar.tsx#L1-L54)
|
||||
- [frontend/components/layout/header.tsx:1-30](file://frontend/components/layout/header.tsx#L1-L30)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [frontend/app/(dashboard)/layout.tsx:1-27](file://frontend/app/(dashboard)/layout.tsx#L1-L27)
|
||||
|
||||
## 核心组件
|
||||
- 布局与导航
|
||||
- 根布局:定义站点元数据、字体变量与全局样式,注入 Provider。
|
||||
- 仪表板布局:服务端会话校验,未登录重定向至登录页;渲染侧边栏与头部,承载主内容区域。
|
||||
- 头部组件:展示用户名与登出按钮,使用客户端会话状态。
|
||||
- 侧边栏组件:基于路径高亮当前菜单项,提供统一导航入口。
|
||||
- UI 组件库
|
||||
- 表格组件:封装响应式表格容器与表头/体/行/单元格等基础结构。
|
||||
- 对话框组件:基于 Radix UI 实现模态对话框,支持触发器、内容、标题与描述。
|
||||
- 图表组件
|
||||
- 趋势图组件:基于 Recharts 实现折线图,展示过去30天引用趋势。
|
||||
- 平台对比图:基于 Recharts 实现柱状图,展示各平台引用率对比。
|
||||
- 工具与常量
|
||||
- 平台映射:提供平台键值到中文名称的映射与平台选项列表。
|
||||
- 工具函数:类名合并工具,用于组合 Tailwind 类。
|
||||
- API 客户端:统一封装鉴权头、错误处理与各模块接口(认证、查询、引用、报告)。
|
||||
|
||||
**更新** 新增了图表组件的详细实现分析,包括数据结构定义、响应式容器配置和交互功能。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/components/layout/header.tsx:1-30](file://frontend/components/layout/header.tsx#L1-L30)
|
||||
- [frontend/components/layout/sidebar.tsx:1-54](file://frontend/components/layout/sidebar.tsx#L1-L54)
|
||||
- [frontend/components/ui/table.tsx:1-118](file://frontend/components/ui/table.tsx#L1-L118)
|
||||
- [frontend/components/ui/dialog.tsx:1-123](file://frontend/components/ui/dialog.tsx#L1-L123)
|
||||
- [frontend/components/charts/trend-chart.tsx:1-60](file://frontend/components/charts/trend-chart.tsx#L1-L60)
|
||||
- [frontend/components/charts/platform-chart.tsx:1-68](file://frontend/components/charts/platform-chart.tsx#L1-L68)
|
||||
- [frontend/lib/platforms.ts:1-18](file://frontend/lib/platforms.ts#L1-L18)
|
||||
- [frontend/lib/utils.ts:1-7](file://frontend/lib/utils.ts#L1-L7)
|
||||
- [frontend/lib/api.ts:1-79](file://frontend/lib/api.ts#L1-L79)
|
||||
|
||||
## 架构概览
|
||||
整体采用"布局层 + 页面层 + 组件层 + 工具层"的分层设计。页面层通过客户端会话获取令牌,调用 API 客户端进行数据拉取与写入;UI 组件层提供可复用的基础控件;工具层提供通用能力(类名合并、平台映射、API 封装);图表组件独立封装,按需渲染。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "布局层"
|
||||
root_layout["根布局"]
|
||||
dashboard_layout["仪表板布局"]
|
||||
header["头部"]
|
||||
sidebar["侧边栏"]
|
||||
end
|
||||
subgraph "页面层"
|
||||
dashboard_page["数据总览页"]
|
||||
queries_page["查询管理页"]
|
||||
citations_page["引用记录页"]
|
||||
reports_page["报告导出页"]
|
||||
settings_page["设置页"]
|
||||
end
|
||||
subgraph "组件层"
|
||||
table["表格组件"]
|
||||
dialog["对话框组件"]
|
||||
trend_chart["趋势图组件"]
|
||||
platform_chart["平台对比图组件"]
|
||||
end
|
||||
subgraph "工具层"
|
||||
utils["工具函数"]
|
||||
platforms["平台映射"]
|
||||
api_client["API 客户端"]
|
||||
end
|
||||
root_layout --> dashboard_layout
|
||||
dashboard_layout --> header
|
||||
dashboard_layout --> sidebar
|
||||
dashboard_layout --> dashboard_page
|
||||
dashboard_layout --> queries_page
|
||||
dashboard_layout --> citations_page
|
||||
dashboard_layout --> reports_page
|
||||
dashboard_layout --> settings_page
|
||||
dashboard_page --> trend_chart
|
||||
dashboard_page --> platform_chart
|
||||
queries_page --> table
|
||||
queries_page --> dialog
|
||||
citations_page --> table
|
||||
reports_page --> api_client
|
||||
settings_page --> utils
|
||||
settings_page --> platforms
|
||||
api_client --> utils
|
||||
table --> utils
|
||||
dialog --> utils
|
||||
trend_chart --> utils
|
||||
platform_chart --> utils
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [frontend/app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [frontend/app/(dashboard)/layout.tsx:1-27](file://frontend/app/(dashboard)/layout.tsx#L1-L27)
|
||||
- [frontend/components/layout/header.tsx:1-30](file://frontend/components/layout/header.tsx#L1-L30)
|
||||
- [frontend/components/layout/sidebar.tsx:1-54](file://frontend/components/layout/sidebar.tsx#L1-L54)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:1-227](file://frontend/app/(dashboard)/dashboard/page.tsx#L1-L227)
|
||||
- [frontend/app/(dashboard)/dashboard/queries/page.tsx:1-526](file://frontend/app/(dashboard)/dashboard/queries/page.tsx#L1-L526)
|
||||
- [frontend/app/(dashboard)/dashboard/citations/page.tsx:1-294](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L1-L294)
|
||||
- [frontend/app/(dashboard)/dashboard/reports/page.tsx:1-200](file://frontend/app/(dashboard)/dashboard/reports/page.tsx#L1-L200)
|
||||
- [frontend/app/(dashboard)/dashboard/settings/page.tsx:1-172](file://frontend/app/(dashboard)/dashboard/settings/page.tsx#L1-L172)
|
||||
- [frontend/components/ui/table.tsx:1-118](file://frontend/components/ui/table.tsx#L1-L118)
|
||||
- [frontend/components/ui/dialog.tsx:1-123](file://frontend/components/ui/dialog.tsx#L1-L123)
|
||||
- [frontend/components/charts/trend-chart.tsx:1-60](file://frontend/components/charts/trend-chart.tsx#L1-L60)
|
||||
- [frontend/components/charts/platform-chart.tsx:1-68](file://frontend/components/charts/platform-chart.tsx#L1-L68)
|
||||
- [frontend/lib/utils.ts:1-7](file://frontend/lib/utils.ts#L1-L7)
|
||||
- [frontend/lib/platforms.ts:1-18](file://frontend/lib/platforms.ts#L1-L18)
|
||||
- [frontend/lib/api.ts:1-79](file://frontend/lib/api.ts#L1-L79)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 仪表板页面(数据总览)
|
||||
- 页面职责
|
||||
- 展示用户关键指标卡片(总查询次数、总引用次数、引用率、平均引用位置)。
|
||||
- 展示过去30天引用趋势折线图与平台引用率对比柱状图。
|
||||
- 数据流
|
||||
- 客户端从会话中获取访问令牌,调用引用统计接口获取聚合数据。
|
||||
- 加载状态、错误状态与空数据提示分别处理。
|
||||
- 用户体验
|
||||
- 卡片式布局,响应式网格适配不同屏幕尺寸。
|
||||
- 图表组件使用响应式容器,确保在小屏设备上正常显示。
|
||||
- 错误边界
|
||||
- 请求失败时显示错误提示与刷新按钮;加载中显示旋转指示器。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant U as "用户"
|
||||
participant P as "仪表板页面"
|
||||
participant S as "会话状态"
|
||||
participant A as "API 客户端"
|
||||
participant B as "后端"
|
||||
U->>P : 进入页面
|
||||
P->>S : 读取 accessToken
|
||||
alt 有令牌
|
||||
P->>A : 调用引用统计接口
|
||||
A->>B : 发起带授权头的请求
|
||||
B-->>A : 返回统计数据
|
||||
A-->>P : 解析并返回数据
|
||||
P-->>U : 渲染指标卡片与图表
|
||||
else 无令牌
|
||||
P-->>U : 显示加载占位
|
||||
end
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:29-47](file://frontend/app/(dashboard)/dashboard/page.tsx#L29-L47)
|
||||
- [frontend/lib/api.ts:67-70](file://frontend/lib/api.ts#L67-L70)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:1-227](file://frontend/app/(dashboard)/dashboard/page.tsx#L1-L227)
|
||||
- [frontend/components/charts/trend-chart.tsx:1-60](file://frontend/components/charts/trend-chart.tsx#L1-L60)
|
||||
- [frontend/components/charts/platform-chart.tsx:1-68](file://frontend/components/charts/platform-chart.tsx#L1-L68)
|
||||
|
||||
### 查询管理页面
|
||||
- 页面职责
|
||||
- 列表展示查询词,支持新增、编辑、删除与"立即查询"操作。
|
||||
- 提供平台多选、频率选择、品牌别名输入等配置项。
|
||||
- 数据流
|
||||
- 客户端加载查询词列表;新增/编辑通过 PUT/POST 写入;删除通过 DELETE。
|
||||
- 表单校验:关键词、目标品牌必填,至少选择一个平台。
|
||||
- 用户体验
|
||||
- 使用对话框承载表单,支持取消与保存;删除操作二次确认。
|
||||
- 列表支持平台徽标展示与状态徽章。
|
||||
- 错误边界
|
||||
- 表单校验失败与网络错误分别提示;保存与删除过程禁用按钮并显示加载状态。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["进入查询管理页"]) --> Load["加载查询词列表"]
|
||||
Load --> View{"列表为空?"}
|
||||
View --> |是| Empty["显示空状态与添加按钮"]
|
||||
View --> |否| List["渲染查询词表格"]
|
||||
AddEdit["打开新增/编辑对话框"] --> Form["填写表单关键词/品牌/平台/频率"]
|
||||
Form --> Validate{"校验通过?"}
|
||||
Validate --> |否| ShowError["显示错误提示"]
|
||||
Validate --> |是| Save["调用保存接口"]
|
||||
Save --> Refresh["刷新列表并关闭对话框"]
|
||||
Delete["打开删除确认"] --> Confirm{"确认删除?"}
|
||||
Confirm --> |否| Cancel["取消"]
|
||||
Confirm --> |是| Remove["调用删除接口"] --> Refresh
|
||||
Empty --> AddEdit
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [frontend/app/(dashboard)/dashboard/queries/page.tsx:143-156](file://frontend/app/(dashboard)/dashboard/queries/page.tsx#L143-L156)
|
||||
- [frontend/lib/api.ts:56-66](file://frontend/lib/api.ts#L56-L66)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/(dashboard)/dashboard/queries/page.tsx:1-526](file://frontend/app/(dashboard)/dashboard/queries/page.tsx#L1-L526)
|
||||
- [frontend/components/ui/dialog.tsx:1-123](file://frontend/components/ui/dialog.tsx#L1-L123)
|
||||
- [frontend/lib/platforms.ts:1-18](file://frontend/lib/platforms.ts#L1-L18)
|
||||
|
||||
### 引用记录页面
|
||||
- 页面职责
|
||||
- 展示跨平台的引用检测结果,支持按查询词、平台、日期范围筛选。
|
||||
- 数据流
|
||||
- 客户端加载查询词下拉选项与引用记录列表;筛选条件通过查询字符串拼接。
|
||||
- 支持重置筛选条件并重新加载。
|
||||
- 用户体验
|
||||
- 筛选区采用栅格布局,适配移动端;表格支持横向滚动。
|
||||
- 引用状态以图标与颜色区分,引用位置与文本片段清晰展示。
|
||||
- 错误边界
|
||||
- 加载失败时显示错误提示;空列表时提供引导文案。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant U as "用户"
|
||||
participant P as "引用记录页"
|
||||
participant A as "API 客户端"
|
||||
participant B as "后端"
|
||||
U->>P : 打开页面
|
||||
P->>A : 获取查询词列表
|
||||
A->>B : 发起请求
|
||||
B-->>A : 返回查询词数组
|
||||
A-->>P : 设置查询词选项
|
||||
U->>P : 设置筛选条件
|
||||
P->>A : 拼接查询参数并请求引用记录
|
||||
A->>B : 发起带参数的请求
|
||||
B-->>A : 返回引用记录数组
|
||||
A-->>P : 渲染表格
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [frontend/app/(dashboard)/dashboard/citations/page.tsx:75-105](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L75-L105)
|
||||
- [frontend/lib/api.ts:67-70](file://frontend/lib/api.ts#L67-L70)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/(dashboard)/dashboard/citations/page.tsx:1-294](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L1-L294)
|
||||
- [frontend/components/ui/table.tsx:1-118](file://frontend/components/ui/table.tsx#L1-L118)
|
||||
|
||||
### 报告导出页面
|
||||
- 页面职责
|
||||
- 选择查询词后导出 CSV 报告,文件包含字段:关键词、平台、是否引用、引用位置、引用文本、竞争品牌、查询时间。
|
||||
- 数据流
|
||||
- 客户端加载查询词列表;点击导出时使用原生 fetch 下载文件并触发浏览器下载。
|
||||
- 成功后显示成功提示,失败时显示错误信息。
|
||||
- 用户体验
|
||||
- 导出设置卡片与使用说明卡片并列展示,信息清晰。
|
||||
- 自动命名文件,便于识别与归档。
|
||||
- 错误边界
|
||||
- 未选择查询词时阻止导出并提示;网络异常或后端错误统一捕获并展示。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant U as "用户"
|
||||
participant P as "报告导出页"
|
||||
participant A as "API 客户端"
|
||||
participant B as "后端"
|
||||
U->>P : 选择查询词并点击导出
|
||||
alt 未选择查询词
|
||||
P-->>U : 显示错误提示
|
||||
else 已选择
|
||||
P->>A : 调用导出接口携带令牌
|
||||
A->>B : 发起带授权头的请求
|
||||
B-->>A : 返回二进制文件
|
||||
A-->>P : 触发浏览器下载
|
||||
P-->>U : 显示成功提示
|
||||
end
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [frontend/app/(dashboard)/dashboard/reports/page.tsx:50-94](file://frontend/app/(dashboard)/dashboard/reports/page.tsx#L50-L94)
|
||||
- [frontend/lib/api.ts:72-77](file://frontend/lib/api.ts#L72-L77)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/(dashboard)/dashboard/reports/page.tsx:1-200](file://frontend/app/(dashboard)/dashboard/reports/page.tsx#L1-L200)
|
||||
|
||||
### 设置页面
|
||||
- 页面职责
|
||||
- 展示用户信息与当前套餐状态;对比不同套餐的功能差异。
|
||||
- 数据流
|
||||
- 读取会话中的用户信息;根据当前计划键渲染对应套餐卡片。
|
||||
- 用户体验
|
||||
- 套餐卡片突出当前版本(MVP 阶段免费开放),提供功能清单与价格信息。
|
||||
- 提供 MVP 阶段说明与后续升级提示。
|
||||
- 错误边界
|
||||
- 无网络或会话异常时,显示默认占位信息。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/(dashboard)/dashboard/settings/page.tsx:1-172](file://frontend/app/(dashboard)/dashboard/settings/page.tsx#L1-L172)
|
||||
|
||||
## 依赖关系分析
|
||||
- 组件耦合
|
||||
- 页面组件依赖 UI 组件库与工具函数,保持低耦合与高内聚。
|
||||
- 仪表板与查询管理页面依赖 API 客户端;引用记录与报告导出页面同样依赖 API 客户端。
|
||||
- 外部依赖
|
||||
- 图表组件依赖 Recharts;对话框组件依赖 Radix UI;平台映射来自独立模块。
|
||||
- 潜在循环依赖
|
||||
- 未发现直接循环导入;布局与页面通过路由分组避免循环引用。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
api["API 客户端"] --> dashboard_page["仪表板页面"]
|
||||
api --> queries_page["查询管理页面"]
|
||||
api --> citations_page["引用记录页面"]
|
||||
api --> reports_page["报告导出页面"]
|
||||
table["表格组件"] --> queries_page
|
||||
table --> citations_page
|
||||
dialog["对话框组件"] --> queries_page
|
||||
utils["工具函数"] --> api
|
||||
utils --> table
|
||||
utils --> dialog
|
||||
platforms["平台映射"] --> queries_page
|
||||
platforms --> citations_page
|
||||
trend_chart["趋势图组件"] --> dashboard_page
|
||||
platform_chart["平台对比图组件"] --> dashboard_page
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [frontend/lib/api.ts:1-79](file://frontend/lib/api.ts#L1-L79)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:1-227](file://frontend/app/(dashboard)/dashboard/page.tsx#L1-L227)
|
||||
- [frontend/app/(dashboard)/dashboard/queries/page.tsx:1-526](file://frontend/app/(dashboard)/dashboard/queries/page.tsx#L1-L526)
|
||||
- [frontend/app/(dashboard)/dashboard/citations/page.tsx:1-294](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L1-L294)
|
||||
- [frontend/app/(dashboard)/dashboard/reports/page.tsx:1-200](file://frontend/app/(dashboard)/dashboard/reports/page.tsx#L1-L200)
|
||||
- [frontend/components/ui/table.tsx:1-118](file://frontend/components/ui/table.tsx#L1-L118)
|
||||
- [frontend/components/ui/dialog.tsx:1-123](file://frontend/components/ui/dialog.tsx#L1-L123)
|
||||
- [frontend/lib/utils.ts:1-7](file://frontend/lib/utils.ts#L1-L7)
|
||||
- [frontend/lib/platforms.ts:1-18](file://frontend/lib/platforms.ts#L1-L18)
|
||||
- [frontend/components/charts/trend-chart.tsx:1-60](file://frontend/components/charts/trend-chart.tsx#L1-L60)
|
||||
- [frontend/components/charts/platform-chart.tsx:1-68](file://frontend/components/charts/platform-chart.tsx#L1-L68)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/lib/api.ts:1-79](file://frontend/lib/api.ts#L1-L79)
|
||||
- [frontend/lib/utils.ts:1-7](file://frontend/lib/utils.ts#L1-L7)
|
||||
- [frontend/lib/platforms.ts:1-18](file://frontend/lib/platforms.ts#L1-L18)
|
||||
|
||||
## 性能考虑
|
||||
- 懒加载策略
|
||||
- 图表组件按需引入,避免首屏额外负担;页面组件在客户端首次渲染时才初始化。
|
||||
- 数据获取策略
|
||||
- 页面在会话令牌可用时再发起请求,减少无效网络调用;查询管理与引用记录页面在筛选条件变化时才重新请求。
|
||||
- 状态管理
|
||||
- 使用 React 本地状态管理加载、错误与交互状态,避免引入重型状态库。
|
||||
- UI 渲染优化
|
||||
- 表格组件提供横向滚动容器,避免大表格导致布局抖动;卡片与网格布局自适应不同屏幕尺寸。
|
||||
- SEO 配置
|
||||
- 根布局提供站点元数据(标题与描述),可在后续扩展 Open Graph、结构化数据等。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/layout.tsx:17-20](file://frontend/app/layout.tsx#L17-L20)
|
||||
- [frontend/app/(dashboard)/dashboard/queries/page.tsx:104-121](file://frontend/app/(dashboard)/dashboard/queries/page.tsx#L104-L121)
|
||||
- [frontend/app/(dashboard)/dashboard/citations/page.tsx:65-73](file://frontend/app/(dashboard)/dashboard/citations/page.tsx#L65-L73)
|
||||
|
||||
## 故障排除指南
|
||||
- 登录态缺失
|
||||
- 仪表板布局在服务端检查会话,未登录将重定向至登录页;客户端组件应始终检查会话令牌后再发起请求。
|
||||
- 请求失败
|
||||
- API 客户端对非 2xx 响应抛出错误,页面层捕获并显示错误提示;建议增加重试机制与更详细的错误码解析。
|
||||
- 表单校验失败
|
||||
- 查询管理页面对必填字段与平台选择进行校验,失败时显示错误提示;建议在提交前进行前端即时校验并聚焦首个错误项。
|
||||
- 导出失败
|
||||
- 报告导出页面对未选择查询词进行拦截;网络异常或后端错误统一捕获;建议在下载前预检接口可用性。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/(dashboard)/layout.tsx:12-15](file://frontend/app/(dashboard)/layout.tsx#L12-L15)
|
||||
- [frontend/lib/api.ts:3-40](file://frontend/lib/api.ts#L3-L40)
|
||||
- [frontend/app/(dashboard)/dashboard/queries/page.tsx:143-156](file://frontend/app/(dashboard)/dashboard/queries/page.tsx#L143-L156)
|
||||
- [frontend/app/(dashboard)/dashboard/reports/page.tsx:50-94](file://frontend/app/(dashboard)/dashboard/reports/page.tsx#L50-L94)
|
||||
|
||||
## 结论
|
||||
本设计以清晰的分层与职责划分实现了仪表板、查询管理、引用记录、报告导出与设置页面的组件化构建。通过统一的 API 客户端与 UI 组件库,提升了可维护性与一致性;结合会话驱动的数据获取与完善的错误边界处理,保障了用户体验与稳定性。所有页面组件均已实现并集成到Next.js应用架构中,包括数据总览、查询管理、引用记录、报告导出和设置页面的完整功能。后续可在 SEO、国际化、缓存策略与状态持久化方面进一步完善。
|
||||
|
||||
## 附录
|
||||
- 开发规范与最佳实践
|
||||
- 组件命名:使用语义化文件名与导出名称,如 `queries/page.tsx`。
|
||||
- 状态管理:优先使用 React 本地状态;复杂场景再考虑集中式状态库。
|
||||
- 错误处理:统一在页面层捕获并展示错误,避免错误冒泡到全局。
|
||||
- 表单处理:在提交前进行前端校验,必要时提供实时校验反馈。
|
||||
- 导航与路由:利用 Next.js 路由与分组,保持路径与页面职责一致。
|
||||
- 性能:按需引入重型组件(如图表),避免首屏阻塞;合理使用加载状态与骨架屏。
|
||||
- 可访问性:为按钮、对话框、表格等提供适当的 ARIA 属性与键盘导航支持。
|
||||
- 样式:统一使用工具函数合并类名,遵循 Tailwind 命名约定。
|
||||
|
|
@ -0,0 +1,455 @@
|
|||
# API接口设计
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [backend/app/api/reports.py](file://backend/app/api/reports.py)
|
||||
- [backend/app/api/deps.py](file://backend/app/api/deps.py)
|
||||
- [backend/app/schemas/auth.py](file://backend/app/schemas/auth.py)
|
||||
- [backend/app/schemas/query.py](file://backend/app/schemas/query.py)
|
||||
- [backend/app/schemas/citation.py](file://backend/app/schemas/citation.py)
|
||||
- [backend/app/services/auth.py](file://backend/app/services/auth.py)
|
||||
- [backend/app/services/query.py](file://backend/app/services/query.py)
|
||||
- [backend/app/services/citation.py](file://backend/app/services/citation.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/models/user.py](file://backend/app/models/user.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件为 GEO 平台的 API 接口设计规范文档,面向后端与前端开发者,系统性阐述 RESTful API 设计原则、路由组织结构、版本控制策略、请求/响应数据模型(Pydantic)、错误处理机制、认证与授权、API 文档与测试策略,以及使用示例与最佳实践。
|
||||
|
||||
## 项目结构
|
||||
后端采用 FastAPI 框架,按功能模块划分 API 路由与服务层,数据库使用 SQLAlchemy 异步引擎,配置通过 Pydantic Settings 管理。主要模块如下:
|
||||
- 应用入口与路由注册:backend/app/main.py
|
||||
- 认证与用户:backend/app/api/auth.py、backend/app/services/auth.py、backend/app/schemas/auth.py
|
||||
- 查询词管理:backend/app/api/queries.py、backend/app/services/query.py、backend/app/schemas/query.py
|
||||
- 引用数据与统计:backend/app/api/citations.py、backend/app/services/citation.py、backend/app/schemas/citation.py
|
||||
- 报告导出:backend/app/api/reports.py
|
||||
- 依赖注入与鉴权:backend/app/api/deps.py
|
||||
- 数据库与模型:backend/app/database.py、backend/app/models/*.py
|
||||
- 配置:backend/app/config.py
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
A["应用入口<br/>backend/app/main.py"] --> B["认证路由<br/>backend/app/api/auth.py"]
|
||||
A --> C["查询路由<br/>backend/app/api/queries.py"]
|
||||
A --> D["引用路由<br/>backend/app/api/citations.py"]
|
||||
A --> E["报告路由<br/>backend/app/api/reports.py"]
|
||||
B --> F["认证服务<br/>backend/app/services/auth.py"]
|
||||
C --> G["查询服务<br/>backend/app/services/query.py"]
|
||||
D --> H["引用服务<br/>backend/app/services/citation.py"]
|
||||
F --> I["数据库会话<br/>backend/app/database.py"]
|
||||
G --> I
|
||||
H --> I
|
||||
I --> J["配置<br/>backend/app/config.py"]
|
||||
I --> K["用户模型<br/>backend/app/models/user.py"]
|
||||
I --> L["引用记录模型<br/>backend/app/models/citation_record.py"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/api/reports.py:1-47](file://backend/app/api/reports.py#L1-L47)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/services/query.py:1-130](file://backend/app/services/query.py#L1-L130)
|
||||
- [backend/app/services/citation.py:1-269](file://backend/app/services/citation.py#L1-L269)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
|
||||
## 核心组件
|
||||
- 版本控制与路由前缀
|
||||
- 应用统一使用 /api/v1 前缀,并按资源划分子路由:
|
||||
- /api/v1/auth:认证相关
|
||||
- /api/v1/queries:查询词 CRUD
|
||||
- /api/v1/citations:引用数据列表、统计、立即执行
|
||||
- /api/v1/reports:报告导出
|
||||
- CORS 配置
|
||||
- 允许本地前端 localhost:3000 进行跨域访问,支持所有方法与头
|
||||
- 健康检查
|
||||
- GET /health 返回 {"status":"ok"}
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:24-48](file://backend/app/main.py#L24-L48)
|
||||
|
||||
## 架构总览
|
||||
下图展示从客户端到 API、服务层与数据库的调用链路,以及认证中间件如何注入当前用户上下文。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant API as "FastAPI 应用"
|
||||
participant Router as "API 路由"
|
||||
participant Deps as "依赖注入/鉴权"
|
||||
participant Service as "业务服务"
|
||||
participant DB as "数据库"
|
||||
Client->>API : "HTTP 请求"
|
||||
API->>Router : "路由分发"
|
||||
Router->>Deps : "获取当前用户(鉴权)"
|
||||
Deps-->>Router : "User 对象"
|
||||
Router->>Service : "调用业务逻辑"
|
||||
Service->>DB : "SQLAlchemy 异步查询/写入"
|
||||
DB-->>Service : "结果集"
|
||||
Service-->>Router : "领域对象/聚合"
|
||||
Router-->>Client : "JSON 响应/状态码"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:38-42](file://backend/app/main.py#L38-L42)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/api/auth.py:13-42](file://backend/app/api/auth.py#L13-L42)
|
||||
- [backend/app/api/queries.py:15-85](file://backend/app/api/queries.py#L15-L85)
|
||||
- [backend/app/api/citations.py:25-77](file://backend/app/api/citations.py#L25-L77)
|
||||
- [backend/app/api/reports.py:16-46](file://backend/app/api/reports.py#L16-L46)
|
||||
- [backend/app/services/auth.py:37-69](file://backend/app/services/auth.py#L37-L69)
|
||||
- [backend/app/services/query.py:12-130](file://backend/app/services/query.py#L12-L130)
|
||||
- [backend/app/services/citation.py:24-269](file://backend/app/services/citation.py#L24-L269)
|
||||
- [backend/app/database.py:23-29](file://backend/app/database.py#L23-L29)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 认证与用户管理
|
||||
- 路由与方法
|
||||
- POST /api/v1/auth/register:注册,返回用户信息,状态码 201
|
||||
- POST /api/v1/auth/login:登录,返回访问令牌与用户信息,状态码 200;失败 401
|
||||
- GET /api/v1/auth/me:获取当前用户信息,需携带 Bearer Token
|
||||
- 鉴权流程
|
||||
- 使用 OAuth2PasswordBearer 指向 /api/v1/auth/login
|
||||
- 依赖 get_current_user 校验 JWT 并加载用户
|
||||
- 数据模型
|
||||
- 注册输入:邮箱、密码、姓名(最小长度约束)
|
||||
- 登录输入:邮箱、密码
|
||||
- 用户输出:id、email、name、plan、max_queries、is_active、created_at
|
||||
- 令牌输出:access_token、token_type、user
|
||||
- 错误处理
|
||||
- 注册重复邮箱:400
|
||||
- 登录失败:401(带 WWW-Authenticate 头)
|
||||
- 鉴权失败:401(凭据无效)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant Auth as "认证路由"
|
||||
participant Deps as "get_current_user"
|
||||
participant Svc as "认证服务"
|
||||
participant DB as "数据库"
|
||||
Client->>Auth : "POST /api/v1/auth/login"
|
||||
Auth->>Svc : "authenticate_user(email,password)"
|
||||
Svc->>DB : "查询用户并校验密码"
|
||||
DB-->>Svc : "用户对象"
|
||||
Svc-->>Auth : "用户或None"
|
||||
Auth-->>Client : "200 OK + {access_token,user}"
|
||||
Note over Client,Auth : "后续请求携带 Authorization : Bearer <token>"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/auth.py:13-42](file://backend/app/api/auth.py#L13-L42)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/services/auth.py:55-69](file://backend/app/services/auth.py#L55-L69)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:13-42](file://backend/app/api/auth.py#L13-L42)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/schemas/auth.py:7-34](file://backend/app/schemas/auth.py#L7-L34)
|
||||
- [backend/app/services/auth.py:37-69](file://backend/app/services/auth.py#L37-L69)
|
||||
|
||||
### 查询词管理
|
||||
- 路由与方法
|
||||
- GET /api/v1/queries:分页列出查询词,支持 skip/limit
|
||||
- POST /api/v1/queries:创建查询词,状态码 201
|
||||
- GET /api/v1/queries/{query_id}:获取单个查询词
|
||||
- PUT /api/v1/queries/{query_id}:更新查询词
|
||||
- DELETE /api/v1/queries/{query_id}:删除查询词,状态码 204
|
||||
- 输入/输出模型
|
||||
- 创建输入:keyword、target_brand、brand_aliases、platforms、frequency(默认 weekly)
|
||||
- 更新输入:可选字段 keyword、target_brand、brand_aliases、platforms、frequency、status
|
||||
- 输出:完整查询词详情(含频率、状态、时间戳等)
|
||||
- 权限与限制
|
||||
- 用户最大查询数受用户计划限制,超过抛出 403
|
||||
- 所有操作均进行所有权校验(user_id 匹配)
|
||||
- 错误处理
|
||||
- 未找到:404
|
||||
- 超出配额:403
|
||||
- 参数校验失败:422(Pydantic 自动)
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["创建查询"]) --> CheckCount["检查用户当前查询数"]
|
||||
CheckCount --> CountOK{"是否小于等于 max_queries?"}
|
||||
CountOK -- 否 --> Forbidden["返回 403: 超出配额"]
|
||||
CountOK -- 是 --> CalcNext["根据频率计算 next_query_at"]
|
||||
CalcNext --> Create["持久化 Query 记录"]
|
||||
Create --> Return201["返回 201 + QueryResponse"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/queries.py:26-39](file://backend/app/api/queries.py#L26-L39)
|
||||
- [backend/app/services/query.py:45-81](file://backend/app/services/query.py#L45-L81)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/queries.py:15-85](file://backend/app/api/queries.py#L15-L85)
|
||||
- [backend/app/schemas/query.py:11-94](file://backend/app/schemas/query.py#L11-L94)
|
||||
- [backend/app/services/query.py:12-130](file://backend/app/services/query.py#L12-L130)
|
||||
|
||||
### 引用数据与统计
|
||||
- 路由与方法
|
||||
- GET /api/v1/citations:分页列出引用记录,支持 query_id、platform、日期范围过滤
|
||||
- GET /api/v1/citations/stats:统计摘要(总查询、总引用、引用率、平台分布、趋势)
|
||||
- POST /api/v1/queries/{query_id}/run-now:立即触发查询任务,状态码 202
|
||||
- 数据模型
|
||||
- 引用记录输出:id、query_id、platform、cited、citation_position、citation_text、competitor_brands、queried_at
|
||||
- 统计输出:总览指标、平台维度统计、周粒度趋势
|
||||
- 立即执行输出:task_id、status、message
|
||||
- 安全与权限
|
||||
- 列表与统计均进行所有权校验;当提供 query_id 时额外校验查询归属
|
||||
- 导出 CSV 亦进行所有权校验
|
||||
- 错误处理
|
||||
- 未找到查询:404
|
||||
- 查询非激活或无平台配置:400/404
|
||||
- 未授权/无效凭据:401
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant Cit as "引用路由"
|
||||
participant Svc as "引用服务"
|
||||
participant DB as "数据库"
|
||||
Client->>Cit : "GET /api/v1/citations/stats?query_id={id}"
|
||||
Cit->>Svc : "get_citation_stats(user_id, query_id?)"
|
||||
Svc->>DB : "聚合统计(条件含 user_id 与可选 query_id)"
|
||||
DB-->>Svc : "统计结果"
|
||||
Svc-->>Cit : "CitationStatsResponse"
|
||||
Cit-->>Client : "200 OK"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/citations.py:49-56](file://backend/app/api/citations.py#L49-L56)
|
||||
- [backend/app/services/citation.py:76-201](file://backend/app/services/citation.py#L76-L201)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/citations.py:25-77](file://backend/app/api/citations.py#L25-L77)
|
||||
- [backend/app/schemas/citation.py:7-50](file://backend/app/schemas/citation.py#L7-L50)
|
||||
- [backend/app/services/citation.py:24-269](file://backend/app/services/citation.py#L24-L269)
|
||||
|
||||
### 报告导出
|
||||
- 路由与方法
|
||||
- GET /api/v1/reports/export/csv:导出 CSV 文件流,支持文件名与 Content-Disposition 头
|
||||
- 错误处理
|
||||
- 不支持的格式:400
|
||||
- 查询不存在:404
|
||||
- 流式响应:StreamingResponse,媒体类型 text/csv
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/reports.py:16-46](file://backend/app/api/reports.py#L16-L46)
|
||||
- [backend/app/services/citation.py:237-269](file://backend/app/services/citation.py#L237-L269)
|
||||
|
||||
### 立即执行查询(独立路由)
|
||||
- 路由与方法
|
||||
- POST /api/v1/queries/{query_id}/run-now:加入查询任务队列,状态码 202
|
||||
- 行为
|
||||
- 校验查询归属与状态,为每个平台创建一个 QueryTask
|
||||
- 错误处理
|
||||
- 查询不存在或非激活:404
|
||||
- 无平台配置:400
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/citations.py:59-77](file://backend/app/api/citations.py#L59-L77)
|
||||
- [backend/app/services/citation.py:204-234](file://backend/app/services/citation.py#L204-L234)
|
||||
|
||||
## 依赖分析
|
||||
- 组件耦合
|
||||
- API 层仅依赖依赖注入与服务层,不直接操作数据库
|
||||
- 服务层通过异步 Session 访问数据库,避免阻塞
|
||||
- 配置集中于 Settings,便于环境隔离
|
||||
- 关键依赖关系
|
||||
- OAuth2PasswordBearer 指向登录端点,确保令牌一致
|
||||
- 所有受保护路由均通过 get_current_user 注入 User 上下文
|
||||
- 业务服务内部进行所有权校验,保证数据隔离
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
API_Auth["API 认证"] --> Deps["依赖注入"]
|
||||
API_Queries["API 查询"] --> Deps
|
||||
API_Citations["API 引用"] --> Deps
|
||||
API_Reports["API 报告"] --> Deps
|
||||
Deps --> Svc_Auth["认证服务"]
|
||||
Deps --> Svc_Query["查询服务"]
|
||||
Deps --> Svc_Citation["引用服务"]
|
||||
Svc_Auth --> DB["数据库"]
|
||||
Svc_Query --> DB
|
||||
Svc_Citation --> DB
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/deps.py:13-43](file://backend/app/api/deps.py#L13-L43)
|
||||
- [backend/app/services/auth.py:37-69](file://backend/app/services/auth.py#L37-L69)
|
||||
- [backend/app/services/query.py:12-130](file://backend/app/services/query.py#L12-L130)
|
||||
- [backend/app/services/citation.py:24-269](file://backend/app/services/citation.py#L24-L269)
|
||||
- [backend/app/database.py:23-29](file://backend/app/database.py#L23-L29)
|
||||
|
||||
## 性能考虑
|
||||
- 分页与过滤
|
||||
- 列表接口支持 skip/limit,建议前端按需请求,避免一次性拉取大量数据
|
||||
- 引用列表支持多维过滤(query_id、platform、日期),建议在高频查询场景下配合索引
|
||||
- 数据库索引
|
||||
- 引用记录表对 query_id、queried_at、platform 建有索引,有利于统计与分页
|
||||
- 异步 I/O
|
||||
- 使用 SQLAlchemy 异步引擎与异步 Session,减少阻塞
|
||||
- 缓存与队列
|
||||
- 立即执行查询通过任务队列异步处理,避免长耗时请求阻塞 API
|
||||
|
||||
## 故障排查指南
|
||||
- 401 未授权
|
||||
- 检查 Authorization 头是否为 Bearer Token
|
||||
- 检查 token 是否过期或签名错误
|
||||
- 403 禁止访问
|
||||
- 用户配额不足或权限不足(如创建查询超限)
|
||||
- 404 未找到
|
||||
- 资源 ID 不存在或不属于当前用户
|
||||
- 400 参数错误
|
||||
- 平台列表为空或包含非法值;导出格式不支持
|
||||
- 500 服务器错误
|
||||
- 检查数据库连接与服务日志
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:26-30](file://backend/app/api/auth.py#L26-L30)
|
||||
- [backend/app/api/queries.py:34-38](file://backend/app/api/queries.py#L34-L38)
|
||||
- [backend/app/api/citations.py:67-71](file://backend/app/api/citations.py#L67-L71)
|
||||
- [backend/app/api/reports.py:23-27](file://backend/app/api/reports.py#L23-L27)
|
||||
- [backend/app/api/deps.py:20-41](file://backend/app/api/deps.py#L20-L41)
|
||||
|
||||
## 结论
|
||||
本规范以 RESTful 设计为核心,结合 FastAPI 的类型安全与自动文档能力,构建了清晰的路由分层、严格的认证授权与完善的错误处理机制。通过 Pydantic 模型与 SQLAlchemy 异步 ORM,实现了高一致性与高性能的数据访问。建议在生产环境中进一步完善速率限制、审计日志与监控告警体系。
|
||||
|
||||
## 附录
|
||||
|
||||
### RESTful 设计原则与路由组织
|
||||
- URL 命名规范
|
||||
- 使用名词复数形式表示资源集合,如 /queries、/citations
|
||||
- 资源标识符使用路径参数,如 /queries/{query_id}
|
||||
- HTTP 方法使用
|
||||
- GET:读取资源列表或单个资源
|
||||
- POST:创建资源
|
||||
- PUT:更新资源
|
||||
- DELETE:删除资源
|
||||
- 状态码标准
|
||||
- 200:成功获取或更新
|
||||
- 201:创建成功
|
||||
- 202:异步任务已接受
|
||||
- 204:删除成功且无内容
|
||||
- 400:参数或业务错误
|
||||
- 401:未授权
|
||||
- 403:禁止访问
|
||||
- 404:资源不存在
|
||||
- 422:数据校验失败(Pydantic)
|
||||
- 500:服务器内部错误
|
||||
|
||||
### API 版本控制与路由前缀
|
||||
- 版本控制策略
|
||||
- 采用 URL 前缀 /api/v1 进行版本隔离,便于未来演进
|
||||
- 路由前缀管理
|
||||
- 认证:/api/v1/auth
|
||||
- 查询词:/api/v1/queries
|
||||
- 引用数据:/api/v1/citations
|
||||
- 报告:/api/v1/reports
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:38-42](file://backend/app/main.py#L38-L42)
|
||||
|
||||
### 请求与响应数据模型(Pydantic)
|
||||
- 认证
|
||||
- 注册输入:邮箱、密码、姓名(最小长度约束)
|
||||
- 登录输入:邮箱、密码
|
||||
- 用户输出:id、email、name、plan、max_queries、is_active、created_at
|
||||
- 令牌输出:access_token、token_type、user
|
||||
- 查询词
|
||||
- 创建输入:keyword、target_brand、brand_aliases、platforms、frequency(默认 weekly)
|
||||
- 更新输入:可选字段 keyword、target_brand、brand_aliases、platforms、frequency、status
|
||||
- 输出:完整查询词详情(含频率、状态、时间戳等)
|
||||
- 引用数据
|
||||
- 列表项:id、query_id、platform、cited、citation_position、citation_text、competitor_brands、queried_at
|
||||
- 统计:总览指标、平台维度统计、周粒度趋势
|
||||
- 立即执行:task_id、status、message
|
||||
|
||||
章节来源
|
||||
- [backend/app/schemas/auth.py:7-34](file://backend/app/schemas/auth.py#L7-L34)
|
||||
- [backend/app/schemas/query.py:11-94](file://backend/app/schemas/query.py#L11-L94)
|
||||
- [backend/app/schemas/citation.py:7-50](file://backend/app/schemas/citation.py#L7-L50)
|
||||
|
||||
### 错误处理机制
|
||||
- 异常类型分类
|
||||
- 业务异常:如配额不足、查询不存在、平台非法
|
||||
- 凭据异常:JWT 解析失败、用户不存在
|
||||
- 错误响应格式
|
||||
- 统一为 JSON:{ "detail": "错误描述" }
|
||||
- 未授权携带 WWW-Authenticate 头
|
||||
- HTTP 状态码映射
|
||||
- 400:参数/业务错误
|
||||
- 401:凭据无效
|
||||
- 403:权限不足
|
||||
- 404:资源不存在
|
||||
- 422:数据校验失败
|
||||
- 500:服务器错误
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:17-18](file://backend/app/api/auth.py#L17-L18)
|
||||
- [backend/app/api/queries.py:34-38](file://backend/app/api/queries.py#L34-L38)
|
||||
- [backend/app/api/citations.py:67-71](file://backend/app/api/citations.py#L67-L71)
|
||||
- [backend/app/api/reports.py:23-27](file://backend/app/api/reports.py#L23-L27)
|
||||
- [backend/app/api/deps.py:20-41](file://backend/app/api/deps.py#L20-L41)
|
||||
|
||||
### 认证与授权实现
|
||||
- 认证方式
|
||||
- JWT Bearer Token,密钥与过期时间由配置管理
|
||||
- 授权策略
|
||||
- 所有受保护路由通过 get_current_user 注入当前用户
|
||||
- 业务层进行所有权校验(user_id 匹配),防止越权访问
|
||||
- 最佳实践
|
||||
- 生产环境更换默认密钥与设置合理过期时间
|
||||
- 前端统一在请求头携带 Authorization: Bearer <token>
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/deps.py:13-43](file://backend/app/api/deps.py#L13-L43)
|
||||
- [backend/app/services/auth.py:24-34](file://backend/app/services/auth.py#L24-L34)
|
||||
- [backend/app/config.py:9-10](file://backend/app/config.py#L9-L10)
|
||||
|
||||
### API 文档生成与测试策略
|
||||
- 文档生成
|
||||
- FastAPI 自动生成 OpenAPI 规范与交互式文档,默认启用
|
||||
- 测试策略
|
||||
- 单元测试:针对服务层函数(如 get_citations、create_query)进行异步测试
|
||||
- 集成测试:通过 API 路由层发起请求,验证鉴权、权限与错误码
|
||||
- 建议使用 pytest 异步运行器与测试数据库
|
||||
|
||||
### API 使用示例与最佳实践
|
||||
- 示例
|
||||
- 登录获取令牌后,在后续请求头中添加 Authorization: Bearer <token>
|
||||
- 创建查询时指定 platforms 与 frequency,注意平台集合的有效性
|
||||
- 导出报告时仅支持 csv 格式
|
||||
- 最佳实践
|
||||
- 前端分页请求 skip/limit,避免一次性加载过多数据
|
||||
- 在高频查询场景下利用过滤参数缩小数据集
|
||||
- 对外暴露的路由保持幂等性与明确的状态码语义
|
||||
|
|
@ -0,0 +1,431 @@
|
|||
# 后端系统架构
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [backend/app/api/deps.py](file://backend/app/api/deps.py)
|
||||
- [backend/app/schemas/auth.py](file://backend/app/schemas/auth.py)
|
||||
- [backend/app/models/user.py](file://backend/app/models/user.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [backend/app/services/auth.py](file://backend/app/services/auth.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [引言](#引言)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖关系分析](#依赖关系分析)
|
||||
7. [性能考量](#性能考量)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 引言
|
||||
本文件为 GEO 平台后端系统的架构文档,基于 FastAPI 构建,采用异步 SQLAlchemy ORM、APScheduler 定时任务与多平台适配器模式,实现查询词管理、引用检测与报告统计等功能。文档覆盖应用配置、中间件、路由组织、生命周期管理、数据库连接与 ORM、异步处理、认证与权限控制、API 设计与错误处理、系统监控与日志、性能优化策略,并给出架构决策的技术背景与权衡。
|
||||
|
||||
## 项目结构
|
||||
后端采用分层与功能域结合的组织方式:
|
||||
- 应用入口与生命周期:app/main.py
|
||||
- 配置中心:app/config.py
|
||||
- 数据库与依赖注入:app/database.py
|
||||
- API 层:app/api/ 下按功能模块划分(auth、queries、citations、deps)
|
||||
- 模型层:app/models/(SQLAlchemy ORM 映射)
|
||||
- 服务层:app/services/(业务逻辑封装)
|
||||
- 工作器与调度:app/workers/(APScheduler 调度器、引用检测引擎、平台适配器)
|
||||
- 测试:tests/(pytest)
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "应用入口"
|
||||
MAIN["app/main.py"]
|
||||
end
|
||||
subgraph "配置与数据库"
|
||||
CFG["app/config.py"]
|
||||
DB["app/database.py"]
|
||||
end
|
||||
subgraph "API 层"
|
||||
AUTH["app/api/auth.py"]
|
||||
QUERIES["app/api/queries.py"]
|
||||
CITATIONS["app/api/citations.py"]
|
||||
DEPS["app/api/deps.py"]
|
||||
end
|
||||
subgraph "模型与服务"
|
||||
MODEL_USER["app/models/user.py"]
|
||||
MODEL_QUERY["app/models/query.py"]
|
||||
MODEL_CIT["app/models/citation_record.py"]
|
||||
SVC_AUTH["app/services/auth.py"]
|
||||
end
|
||||
subgraph "工作器与调度"
|
||||
SCHED["app/workers/scheduler.py"]
|
||||
ENGINE["app/workers/citation_engine.py"]
|
||||
end
|
||||
MAIN --> AUTH
|
||||
MAIN --> QUERIES
|
||||
MAIN --> CITATIONS
|
||||
MAIN --> SCHED
|
||||
AUTH --> SVC_AUTH
|
||||
AUTH --> DB
|
||||
QUERIES --> DB
|
||||
CITATIONS --> DB
|
||||
DEPS --> DB
|
||||
SVC_AUTH --> CFG
|
||||
SCHED --> DB
|
||||
SCHED --> ENGINE
|
||||
ENGINE --> MODEL_QUERY
|
||||
ENGINE --> MODEL_CIT
|
||||
DB --> CFG
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/api/deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
|
||||
## 核心组件
|
||||
- 应用入口与生命周期:通过 lifespan 钩子在启动时初始化模型与调度器,在关闭时优雅停止。
|
||||
- 中间件:启用 CORS,允许前端 localhost:3000 访问。
|
||||
- 路由组织:按模块拆分,统一前缀与标签,便于 API 文档生成与维护。
|
||||
- 数据库:异步 SQLAlchemy 引擎与会话工厂,依赖注入式获取会话。
|
||||
- 认证与权限:OAuth2 密码流 + JWT,依赖注入解析当前用户,未授权时抛出 401。
|
||||
- 引擎与调度:APScheduler 定时扫描到期查询,调用 CitationEngine 执行并持久化结果。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:13-48](file://backend/app/main.py#L13-L48)
|
||||
- [backend/app/database.py:6-29](file://backend/app/database.py#L6-L29)
|
||||
- [backend/app/api/deps.py:13-43](file://backend/app/api/deps.py#L13-L43)
|
||||
|
||||
## 架构总览
|
||||
系统采用“API 层-服务层-模型层-基础设施”的分层架构,配合异步 I/O 与定时任务,实现高并发与可扩展的查询与检测能力。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
CLIENT["客户端/前端"]
|
||||
FASTAPI["FastAPI 应用<br/>lifespan/CORS"]
|
||||
ROUTER_AUTH["认证路由"]
|
||||
ROUTER_QUERIES["查询路由"]
|
||||
ROUTER_CIT["引用路由"]
|
||||
DEPS["依赖注入<br/>OAuth2/JWT 解析"]
|
||||
SVC_AUTH["认证服务<br/>密码哈希/JWT"]
|
||||
DB["异步数据库<br/>Session 工厂"]
|
||||
SCHED["查询调度器<br/>APScheduler"]
|
||||
ENGINE["引用检测引擎<br/>平台适配器"]
|
||||
CLIENT --> FASTAPI
|
||||
FASTAPI --> ROUTER_AUTH
|
||||
FASTAPI --> ROUTER_QUERIES
|
||||
FASTAPI --> ROUTER_CIT
|
||||
ROUTER_AUTH --> DEPS
|
||||
ROUTER_QUERIES --> DEPS
|
||||
ROUTER_CIT --> DEPS
|
||||
ROUTER_AUTH --> SVC_AUTH
|
||||
ROUTER_QUERIES --> DB
|
||||
ROUTER_CIT --> DB
|
||||
DEPS --> DB
|
||||
SVC_AUTH --> DB
|
||||
SCHED --> DB
|
||||
SCHED --> ENGINE
|
||||
ENGINE --> DB
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:24-48](file://backend/app/main.py#L24-L48)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/services/auth.py:16-69](file://backend/app/services/auth.py#L16-L69)
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 应用入口与生命周期
|
||||
- 使用 lifespan 钩子在启动时导入模型并启动查询调度器;在关闭时优雅停止调度器与引擎资源。
|
||||
- 注册 CORS 中间件,允许前端跨域访问。
|
||||
- 统一注册认证、查询、引用、报告路由,并为“立即执行”路由复用同一前缀。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant App as "FastAPI 应用"
|
||||
participant Life as "lifespan"
|
||||
participant Sched as "查询调度器"
|
||||
Client->>App : 启动请求
|
||||
App->>Life : 进入 lifespan
|
||||
Life->>Sched : start()
|
||||
App-->>Client : 200 OK
|
||||
Note over App,Sched : 应用运行中
|
||||
Client->>App : 关闭请求
|
||||
App->>Life : 退出 lifespan
|
||||
Life->>Sched : shutdown()
|
||||
App-->>Client : 200 OK
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/workers/scheduler.py:86-90](file://backend/app/workers/scheduler.py#L86-L90)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:13-48](file://backend/app/main.py#L13-L48)
|
||||
|
||||
### 配置与数据库
|
||||
- 配置项:数据库连接、Redis、JWT 秘钥与过期时间、浏览器路径、平台 API Key 等。
|
||||
- 数据库:异步引擎、会话工厂、基础模型类;提供依赖注入函数以获取会话。
|
||||
- 会话行为:非自动提交/刷新/回滚,显式管理事务边界。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["应用启动"]) --> LoadCfg["加载配置"]
|
||||
LoadCfg --> InitEngine["创建异步引擎"]
|
||||
InitEngine --> InitSession["创建会话工厂"]
|
||||
InitSession --> RegisterDep["注册 get_db 依赖"]
|
||||
RegisterDep --> Ready(["就绪"])
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/config.py:4-17](file://backend/app/config.py#L4-L17)
|
||||
- [backend/app/database.py:6-29](file://backend/app/database.py#L6-L29)
|
||||
|
||||
章节来源
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
|
||||
### 认证系统
|
||||
- 登录:校验邮箱与密码,成功则签发 JWT。
|
||||
- 注册:检查邮箱唯一性,哈希密码后创建用户。
|
||||
- 当前用户:通过 OAuth2 密码流获取令牌,解码 JWT 提取用户 ID,查询数据库返回当前用户。
|
||||
- 错误处理:未通过凭据验证时返回 401。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant Auth as "认证路由"
|
||||
participant Svc as "认证服务"
|
||||
participant DB as "数据库"
|
||||
Client->>Auth : POST /api/v1/auth/login
|
||||
Auth->>Svc : authenticate_user(email, password)
|
||||
Svc->>DB : 查询用户
|
||||
DB-->>Svc : 用户对象
|
||||
Svc-->>Auth : 用户或空
|
||||
Auth-->>Client : {access_token, user} 或 401
|
||||
Client->>Auth : GET /api/v1/auth/me (携带 Bearer Token)
|
||||
Auth->>Svc : verify_token(token)
|
||||
Svc-->>Auth : 载荷
|
||||
Auth->>DB : 查询用户 by id
|
||||
DB-->>Auth : 用户对象
|
||||
Auth-->>Client : 用户信息
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/auth.py:13-43](file://backend/app/api/auth.py#L13-L43)
|
||||
- [backend/app/services/auth.py:37-69](file://backend/app/services/auth.py#L37-L69)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/schemas/auth.py:1-34](file://backend/app/schemas/auth.py#L1-L34)
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/api/deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
|
||||
### 查询与引用 API
|
||||
- 查询 API:支持分页、创建、读取、更新、删除,均需当前用户权限。
|
||||
- 引用 API:支持分页查询、统计、立即执行查询任务(返回任务状态)。
|
||||
- 权限控制:所有路由依赖 get_current_user,未通过验证返回 401。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant Q as "查询路由"
|
||||
participant D as "依赖注入"
|
||||
participant S as "服务层"
|
||||
participant DB as "数据库"
|
||||
Client->>Q : POST /api/v1/queries/
|
||||
Q->>D : get_current_user()
|
||||
D-->>Q : 当前用户
|
||||
Q->>S : create_query(...)
|
||||
S->>DB : 写入
|
||||
DB-->>S : 成功
|
||||
S-->>Q : 查询对象
|
||||
Q-->>Client : 201 + 查询对象
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/queries.py:26-39](file://backend/app/api/queries.py#L26-L39)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
|
||||
### 引擎与调度
|
||||
- 调度器:每小时扫描 queries 表中 status='active' 且 next_query_at <= now() 的记录,逐条执行。
|
||||
- 引擎:对每个平台执行查询,进行品牌匹配与竞争品牌检测,写入 citation_records,并更新查询时间字段。
|
||||
- 平台适配器:抽象不同平台的查询接口,统一返回原始响应供匹配器处理。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Tick["定时触发(每小时)"] --> Scan["扫描到期查询"]
|
||||
Scan --> Found{"找到待执行查询?"}
|
||||
Found -- 否 --> Wait["等待下一轮"]
|
||||
Found -- 是 --> Exec["遍历查询平台执行"]
|
||||
Exec --> Record["写入引用记录"]
|
||||
Record --> Update["更新查询时间字段"]
|
||||
Update --> Done["完成"]
|
||||
Wait --> Tick
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/scheduler.py:51-85](file://backend/app/workers/scheduler.py#L51-L85)
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
|
||||
### 数据模型与关系
|
||||
- 用户:主键 UUID、邮箱唯一、密码哈希、计划与配额、活跃状态、时间戳。
|
||||
- 查询:外键用户、关键词、目标品牌、别名、平台集合、频率、状态与时间字段。
|
||||
- 引用记录:外键查询、平台、是否引用、位置、文本、竞争品牌、原始响应、时间戳。
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
USERS {
|
||||
uuid id PK
|
||||
string email UK
|
||||
string password_hash
|
||||
string name
|
||||
string plan
|
||||
int max_queries
|
||||
bool is_active
|
||||
timestamp created_at
|
||||
timestamp updated_at
|
||||
}
|
||||
QUERIES {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
timestamp last_queried_at
|
||||
timestamp next_query_at
|
||||
timestamp created_at
|
||||
timestamp updated_at
|
||||
}
|
||||
CITATION_RECORDS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
bool cited
|
||||
int citation_position
|
||||
text citation_text
|
||||
jsonb competitor_brands
|
||||
text raw_response
|
||||
timestamp queried_at
|
||||
}
|
||||
USERS ||--o{ QUERIES : "拥有"
|
||||
QUERIES ||--o{ CITATION_RECORDS : "产生"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
||||
章节来源
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
|
||||
## 依赖关系分析
|
||||
- 组件内聚:API 路由与服务层职责清晰,模型仅负责映射。
|
||||
- 组件耦合:API 依赖服务,服务依赖数据库与配置;调度器依赖引擎与数据库;引擎依赖平台适配器。
|
||||
- 依赖注入:通过 FastAPI 依赖系统注入数据库会话与当前用户。
|
||||
- 循环依赖:未见明显循环依赖。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
API_AUTH["api/auth.py"] --> SVC_AUTH["services/auth.py"]
|
||||
API_AUTH --> DEPS["api/deps.py"]
|
||||
API_QUERIES["api/queries.py"] --> DEPS
|
||||
API_CIT["api/citations.py"] --> DEPS
|
||||
SVC_AUTH --> DB["database.py"]
|
||||
DEPS --> DB
|
||||
SCHED["workers/scheduler.py"] --> DB
|
||||
SCHED --> ENGINE["workers/citation_engine.py"]
|
||||
ENGINE --> MODELS["models/*.py"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/api/deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/api/deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
|
||||
## 性能考量
|
||||
- 异步 I/O:数据库与平台查询均采用异步,提升并发吞吐。
|
||||
- 会话管理:显式事务边界,避免长事务占用连接池。
|
||||
- 定时任务:APScheduler 异步调度,事件循环兼容处理,降低阻塞风险。
|
||||
- 索引优化:查询与引用表建立复合索引,加速过滤与排序。
|
||||
- 缓存建议:可引入 Redis 缓存热点查询结果与用户会话信息(当前配置已准备)。
|
||||
- 日志采样:生产环境建议开启采样与结构化日志,避免高频日志影响性能。
|
||||
|
||||
## 故障排查指南
|
||||
- 认证失败:检查 JWT 秘钥、过期时间与前端令牌传递;确认 OAuth2 tokenUrl 与 Bearer 头正确。
|
||||
- 数据库连接:核对 DATABASE_URL;确认容器网络可达;查看连接池与超时配置。
|
||||
- 定时任务异常:关注调度器日志,检查查询状态与平台适配器可用性;确认 next_query_at 计算逻辑。
|
||||
- 引擎执行失败:查看平台适配器错误与原始响应;检查品牌匹配器与竞争品牌检测逻辑。
|
||||
- CORS 问题:确认前端域名与请求头是否在允许范围内。
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/config.py:7-13](file://backend/app/config.py#L7-L13)
|
||||
- [backend/app/workers/scheduler.py:42-90](file://backend/app/workers/scheduler.py#L42-L90)
|
||||
- [backend/app/workers/citation_engine.py:211-227](file://backend/app/workers/citation_engine.py#L211-L227)
|
||||
|
||||
## 结论
|
||||
该架构以 FastAPI 为核心,结合异步数据库、定时任务与多平台适配器,形成高可用、可扩展的查询与引用检测系统。通过明确的分层与依赖注入,系统具备良好的可测试性与可维护性。建议在生产环境中完善日志与监控、接入缓存与告警,并持续优化索引与查询计划。
|
||||
|
||||
## 附录
|
||||
- API 设计原则:统一前缀与标签、明确响应模型、一致的状态码与错误消息。
|
||||
- 错误处理:在路由层捕获业务异常并转换为标准 HTTP 状态码;在依赖层统一 401 未授权。
|
||||
- 响应格式:遵循 Pydantic 模型序列化,确保前后端契约一致。
|
||||
- 架构决策背景:选择异步栈以提升 I/O 密集场景性能;APScheduler 简化定时任务编排;JWT 适合无状态认证场景。
|
||||
|
|
@ -0,0 +1,460 @@
|
|||
# 工作器系统
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/platforms/base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [backend/app/workers/platforms/kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [backend/app/workers/platforms/wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
- [backend/app/workers/__init__.py](file://backend/app/workers/__init__.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/models/query_task.py](file://backend/app/models/query_task.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [引言](#引言)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 引言
|
||||
本文件为 GEO 工作器系统的全面架构文档,聚焦以下主题:
|
||||
- APScheduler 任务调度器的配置与使用:调度策略、并发控制与错误恢复
|
||||
- 工作器抽象基类(BaseWorker)设计:通用接口、生命周期与状态跟踪
|
||||
- 平台适配器架构:BasePlatformAdapter 抽象、接口规范与扩展机制
|
||||
- Kimi 平台适配器实现:Playwright 自动化、页面交互与重试策略
|
||||
- 文心平台适配器设计:API 封装、响应解析与配置管理
|
||||
- 工作器注册、启动与停止流程
|
||||
- 性能监控、资源管理与故障诊断
|
||||
|
||||
## 项目结构
|
||||
后端采用 FastAPI + SQLAlchemy Async + APScheduler 架构,工作器模块位于 backend/app/workers,包含调度器、引用检测引擎与平台适配器;数据库模型位于 backend/app/models,API 路由位于 backend/app/api。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "应用层"
|
||||
API["FastAPI 应用<br/>lifespan 启停"]
|
||||
QUERIES["查询 API 路由"]
|
||||
CITATIONS["引用 API 路由"]
|
||||
end
|
||||
subgraph "工作器层"
|
||||
SCHED["调度器 QueryScheduler"]
|
||||
ENGINE["引用检测引擎 CitationEngine"]
|
||||
KIMI["Kimi 适配器"]
|
||||
WENXIN["文心适配器"]
|
||||
end
|
||||
subgraph "数据层"
|
||||
MODELS["SQLAlchemy 模型<br/>Query/QueryTask/CitationRecord"]
|
||||
DB["AsyncSessionLocal"]
|
||||
end
|
||||
API --> SCHED
|
||||
API --> QUERIES
|
||||
API --> CITATIONS
|
||||
SCHED --> ENGINE
|
||||
ENGINE --> KIMI
|
||||
ENGINE --> WENXIN
|
||||
ENGINE --> MODELS
|
||||
SCHED --> MODELS
|
||||
MODELS --> DB
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/workers/scheduler.py:25-40](file://backend/app/workers/scheduler.py#L25-L40)
|
||||
- [backend/app/workers/citation_engine.py:148-158](file://backend/app/workers/citation_engine.py#L148-L158)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/database.py:12-18](file://backend/app/database.py#L12-L18)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/workers/__init__.py:1-13](file://backend/app/workers/__init__.py#L1-L13)
|
||||
|
||||
## 核心组件
|
||||
- 调度器 QueryScheduler:基于 APScheduler AsyncIOScheduler,每小时扫描并执行到期的查询任务,负责事件循环与异步任务派发。
|
||||
- 引用检测引擎 CitationEngine:编排平台适配器、品牌匹配、竞争品牌检测与结果持久化,维护 QueryTask 状态与 Query 下次执行时间。
|
||||
- 平台适配器:BasePlatformAdapter 定义统一接口,KimiAdapter/WenxinAdapter 实现浏览器自动化与响应稳定检测。
|
||||
- 数据模型:Query(查询)、QueryTask(任务)、CitationRecord(引用记录)三者通过外键关联,配合索引优化查询性能。
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/platforms/base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
||||
## 架构总览
|
||||
系统通过 FastAPI 应用在 lifespan 中启动调度器,调度器周期性扫描数据库中到期的查询,调用 CitationEngine 执行跨平台检索与分析,并将结果写入数据库。平台适配器通过 Playwright 控制 Chromium 浏览器进行页面交互,具备指数退避重试与响应稳定性检测。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant App as "FastAPI 应用"
|
||||
participant Sched as "QueryScheduler"
|
||||
participant Engine as "CitationEngine"
|
||||
participant DB as "数据库"
|
||||
participant Plat as "平台适配器"
|
||||
App->>Sched : "启动调度器"
|
||||
loop 每小时
|
||||
Sched->>DB : "查询 active 且 next_query_at <= now()"
|
||||
DB-->>Sched : "查询列表"
|
||||
loop 遍历查询
|
||||
Sched->>Engine : "execute_query(query, db)"
|
||||
Engine->>DB : "创建/更新 QueryTask"
|
||||
Engine->>Plat : "adapter.query(keyword)"
|
||||
Plat-->>Engine : "原始响应文本"
|
||||
Engine->>Engine : "品牌匹配/竞争品牌检测"
|
||||
Engine->>DB : "写入 CitationRecord"
|
||||
Engine->>DB : "更新 Query.next_query_at"
|
||||
end
|
||||
end
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:17-21](file://backend/app/main.py#L17-L21)
|
||||
- [backend/app/workers/scheduler.py:30-90](file://backend/app/workers/scheduler.py#L30-L90)
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [backend/app/models/query.py:27-31](file://backend/app/models/query.py#L27-L31)
|
||||
- [backend/app/models/query_task.py:24-32](file://backend/app/models/query_task.py#L24-L32)
|
||||
- [backend/app/models/citation_record.py:24-29](file://backend/app/models/citation_record.py#L24-L29)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 调度器(APScheduler)配置与使用
|
||||
- 调度策略
|
||||
- 使用 AsyncIOScheduler,作业类型为间隔触发(每小时一次),作业 ID 为“check_queries”,名称为“检查并执行到期的查询任务”。
|
||||
- 通过 replace_existing=True 确保重复启动时替换旧作业。
|
||||
- 并发控制
|
||||
- 同步包装函数 _run_check 在没有运行中事件循环时使用新事件循环执行;否则在当前事件循环创建任务,避免阻塞。
|
||||
- 每次检查独立创建任务,避免阻塞后续调度。
|
||||
- 错误恢复
|
||||
- 单个查询执行失败会记录错误并继续处理下一个查询,不影响整体调度。
|
||||
- 关闭时优雅停止调度器并关闭引擎资源。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["启动调度器"]) --> AddJob["添加间隔作业<br/>每小时触发"]
|
||||
AddJob --> StartOK["启动成功"]
|
||||
StartOK --> Loop["每小时执行一次"]
|
||||
Loop --> CheckDB["查询到期的 Query"]
|
||||
CheckDB --> ForEach{"是否有待执行查询?"}
|
||||
ForEach --> |是| ExecOne["_execute_single_query()"]
|
||||
ForEach --> |否| Sleep["等待下一小时"]
|
||||
ExecOne --> TryExec["捕获异常并记录"]
|
||||
TryExec --> Next["继续下一个查询"]
|
||||
Sleep --> Loop
|
||||
Next --> Loop
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/scheduler.py:30-90](file://backend/app/workers/scheduler.py#L30-L90)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
|
||||
### 引擎(CitationEngine)设计与流程
|
||||
- 组件职责
|
||||
- 维护平台适配器映射(wenxin/kimi)
|
||||
- 品牌匹配器(精确/别名/模糊)与竞争品牌检测
|
||||
- 任务状态管理(QueryTask)与下次查询时间计算
|
||||
- 生命周期与状态
|
||||
- 每个查询对应多个平台任务,逐个执行并更新 QueryTask 状态(pending → running → success/failed)
|
||||
- 成功时写入 CitationRecord,失败时仍写入一条 cited=False 的记录用于占位
|
||||
- 数据持久化
|
||||
- 使用 AsyncSessionLocal 进行事务性读写,提交后刷新对象状态
|
||||
- 平台扩展
|
||||
- 新增平台只需在 platforms 字典中注册适配器实例,无需修改引擎主流程
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class CitationEngine {
|
||||
+platforms : dict
|
||||
+matcher : BrandMatcher
|
||||
+competitor_detector : CompetitorDetector
|
||||
+execute_query(query, db) CitationRecord[]
|
||||
+execute_single_platform(keyword, platform, target_brand, aliases) dict
|
||||
+close() void
|
||||
}
|
||||
class BrandMatcher {
|
||||
+target_brand : str
|
||||
+brand_aliases : str[]
|
||||
+match(text) dict
|
||||
}
|
||||
class CompetitorDetector {
|
||||
+KNOWN_BRANDS : dict
|
||||
+detect(text, target_brand) str[]
|
||||
}
|
||||
class KimiAdapter {
|
||||
+platform_name : str
|
||||
+platform_url : str
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class WenxinAdapter {
|
||||
+platform_name : str
|
||||
+platform_url : str
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
CitationEngine --> KimiAdapter : "使用"
|
||||
CitationEngine --> WenxinAdapter : "使用"
|
||||
CitationEngine --> BrandMatcher : "使用"
|
||||
CitationEngine --> CompetitorDetector : "使用"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/platforms/kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
|
||||
### 平台适配器架构(BasePlatformAdapter)
|
||||
- 接口规范
|
||||
- 必须实现异步 query(keyword) -> str,返回平台原始响应文本
|
||||
- 可选实现 close() 清理资源
|
||||
- 扩展机制
|
||||
- 新平台继承 BasePlatformAdapter,设置 platform_name 与 platform_url,实现 query 与可选 close
|
||||
- 在 CitationEngine.platforms 中注册实例即可启用
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class BasePlatformAdapter {
|
||||
<<abstract>>
|
||||
+platform_name : str
|
||||
+platform_url : str
|
||||
+query(keyword) str*
|
||||
+close() void*
|
||||
}
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/platforms/base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/platforms/base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
|
||||
### Kimi 平台适配器实现
|
||||
- 浏览器自动化
|
||||
- 使用 Playwright 启动 headless Chromium,设置视口与 UA
|
||||
- 导航至平台首页,动态查找输入框与发送按钮,支持多种选择器与回退策略
|
||||
- 页面交互逻辑
|
||||
- 输入关键词后尝试点击发送按钮或按下 Enter 键
|
||||
- 等待回复稳定:连续多次检测文本不再变化,超时则返回当前文本
|
||||
- 错误重试策略
|
||||
- 最多重试 3 次,指数退避(2^attempt 秒延迟)
|
||||
- 超时与异常转换为 RuntimeError 并记录日志
|
||||
- 资源管理
|
||||
- 每次查询结束后关闭 page/context,保证资源释放
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Adapter as "KimiAdapter"
|
||||
participant PW as "Playwright"
|
||||
participant Page as "Page"
|
||||
Adapter->>Adapter : "_ensure_browser()"
|
||||
Adapter->>PW : "启动 Chromium"
|
||||
Adapter->>Page : "new_context() + new_page()"
|
||||
Adapter->>Page : "goto(platform_url)"
|
||||
Adapter->>Page : "查找输入框/发送按钮"
|
||||
Adapter->>Page : "fill(keyword) / press Enter / click Send"
|
||||
Adapter->>Adapter : "_wait_for_response_stable()"
|
||||
Adapter-->>Adapter : "返回稳定文本"
|
||||
Adapter->>Page : "close()"
|
||||
Adapter->>PW : "stop()"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/platforms/kimi.py:21-125](file://backend/app/workers/platforms/kimi.py#L21-L125)
|
||||
- [backend/app/workers/platforms/kimi.py:126-197](file://backend/app/workers/platforms/kimi.py#L126-L197)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/platforms/kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
|
||||
### 文心平台适配器设计
|
||||
- 设计要点
|
||||
- 结构与 Kimi 类似,使用 Playwright 控制浏览器,支持多选择器定位输入框与发送按钮
|
||||
- 等待回复稳定算法一致,超时返回当前文本
|
||||
- 指数退避重试,异常转为 RuntimeError
|
||||
- 配置管理
|
||||
- 通过 Config 设置 Playwright 浏览器路径等参数,适配容器环境
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/platforms/wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
- [backend/app/config.py:11-13](file://backend/app/config.py#L11-L13)
|
||||
|
||||
### 工作器注册、启动与停止流程
|
||||
- 注册
|
||||
- 通过 workers/__init__.py 暴露 CitationEngine、KimiAdapter、WenxinAdapter、QueryScheduler、query_scheduler
|
||||
- 启动
|
||||
- FastAPI lifespan 中调用 query_scheduler.start(),内部注册 APScheduler 作业并启动
|
||||
- 停止
|
||||
- 应用退出时调用 query_scheduler.shutdown(),优雅关闭调度器与引擎资源
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Main as "main.py"
|
||||
participant Lifespan as "lifespan"
|
||||
participant Sched as "QueryScheduler"
|
||||
participant Engine as "CitationEngine"
|
||||
Main->>Lifespan : "应用启动"
|
||||
Lifespan->>Sched : "start()"
|
||||
Sched->>Sched : "add_job(check_queries)"
|
||||
Sched->>Engine : "初始化引擎"
|
||||
Main-->>Lifespan : "yield"
|
||||
Lifespan->>Sched : "shutdown()"
|
||||
Sched->>Engine : "close()"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/workers/scheduler.py:30-90](file://backend/app/workers/scheduler.py#L30-L90)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/workers/__init__.py:1-13](file://backend/app/workers/__init__.py#L1-L13)
|
||||
|
||||
## 依赖分析
|
||||
- 外部依赖
|
||||
- Web/ASGI:FastAPI、Uvicorn
|
||||
- ORM/数据库:SQLAlchemy 2.x、asyncpg、Alembic
|
||||
- 任务调度:APScheduler
|
||||
- 浏览器自动化:Playwright
|
||||
- 配置:Pydantic Settings、python-dotenv
|
||||
- 内部模块耦合
|
||||
- scheduler 依赖 database 与 models,调用 CitationEngine
|
||||
- citation_engine 依赖 platforms 子模块与 models
|
||||
- platforms 依赖 base 抽象类
|
||||
- main 依赖 scheduler 并在 lifespan 中启停
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
REQ["requirements.txt"] --> FAST["FastAPI"]
|
||||
REQ --> SQL["SQLAlchemy/asyncpg/Alembic"]
|
||||
REQ --> APS["APScheduler"]
|
||||
REQ --> PW["Playwright"]
|
||||
REQ --> PYD["Pydantic Settings"]
|
||||
MAIN["main.py"] --> SCHED["scheduler.py"]
|
||||
SCHED --> ENGINE["citation_engine.py"]
|
||||
ENGINE --> KIMI["platforms/kimi.py"]
|
||||
ENGINE --> WENXIN["platforms/wenxin.py"]
|
||||
ENGINE --> MODELS["models/*.py"]
|
||||
SCHED --> MODELS
|
||||
MODELS --> DB["database.py"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [backend/app/main.py:10-21](file://backend/app/main.py#L10-L21)
|
||||
- [backend/app/workers/scheduler.py:18-20](file://backend/app/workers/scheduler.py#L18-L20)
|
||||
- [backend/app/workers/citation_engine.py:13-14](file://backend/app/workers/citation_engine.py#L13-L14)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/database.py:6-18](file://backend/app/database.py#L6-L18)
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
|
||||
## 性能考虑
|
||||
- 调度频率与并发
|
||||
- 每小时一次的调度频率适合周期性任务;如需更频繁,可调整 APScheduler 触发器
|
||||
- 检查过程为异步批处理,单个查询失败不会阻塞其他查询
|
||||
- 数据库访问
|
||||
- 使用 AsyncSessionLocal,开启索引(如 queries.status、queries.next_query_at)提升查询效率
|
||||
- 引擎在事务内批量写入,减少往返开销
|
||||
- 浏览器资源
|
||||
- 每次查询后及时关闭 page/context,避免内存泄漏
|
||||
- 重试采用指数退避,降低平台限流风险
|
||||
- 平台扩展
|
||||
- 新增平台仅需实现 query/close,通过引擎映射注册,不影响现有流程
|
||||
|
||||
## 故障排查指南
|
||||
- 调度器未启动
|
||||
- 确认 lifespan 正常执行,检查日志中“查询调度器已启动”
|
||||
- 如无输出,检查 main.py 中 lifespan 注册与 FastAPI 版本兼容性
|
||||
- 查询未执行
|
||||
- 检查 Query.status 是否为 active,next_query_at 是否小于等于当前时间
|
||||
- 确认数据库连接字符串与表结构正确
|
||||
- 平台适配器异常
|
||||
- Playwright 未安装:根据日志提示运行安装命令
|
||||
- 页面选择器失效:平台 UI 变更导致,需更新选择器策略
|
||||
- 超时与不稳定:适当增加等待时间或放宽稳定性阈值
|
||||
- 引擎写入失败
|
||||
- 检查数据库事务提交与异常捕获,确认 CitationRecord 字段完整性
|
||||
- 资源泄露
|
||||
- 确认每次查询后 page/context 已关闭,必要时调用 adapter.close()
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:42-90](file://backend/app/workers/scheduler.py#L42-L90)
|
||||
- [backend/app/workers/platforms/kimi.py:21-32](file://backend/app/workers/platforms/kimi.py#L21-L32)
|
||||
- [backend/app/workers/platforms/wenxin.py:21-32](file://backend/app/workers/platforms/wenxin.py#L21-L32)
|
||||
- [backend/app/workers/citation_engine.py:209-228](file://backend/app/workers/citation_engine.py#L209-L228)
|
||||
|
||||
## 结论
|
||||
本系统以 APScheduler 为核心调度器,结合 CitationEngine 的平台编排能力与 Playwright 的浏览器自动化,实现了跨平台的引用检测与分析。通过清晰的抽象与模块化设计,系统具备良好的可扩展性与可维护性。建议在生产环境中进一步完善监控指标、日志分级与平台 UI 变更的自适应策略。
|
||||
|
||||
## 附录
|
||||
- API 路由概览
|
||||
- 查询管理:GET/POST/PUT/DELETE /api/v1/queries
|
||||
- 引用数据:GET /api/v1/citations 与 GET /api/v1/citations/stats
|
||||
- 立即执行:POST /api/v1/queries/{query_id}/run-now
|
||||
- 数据模型 ER 关系
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
QUERY {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
json brand_aliases
|
||||
json platforms
|
||||
string frequency
|
||||
string status
|
||||
timestamp last_queried_at
|
||||
timestamp next_query_at
|
||||
}
|
||||
QUERY_TASK {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
string status
|
||||
text error_message
|
||||
timestamp scheduled_at
|
||||
timestamp started_at
|
||||
timestamp completed_at
|
||||
}
|
||||
CITATION_RECORD {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
boolean cited
|
||||
int citation_position
|
||||
text citation_text
|
||||
json competitor_brands
|
||||
text raw_response
|
||||
timestamp queried_at
|
||||
}
|
||||
QUERY ||--o{ QUERY_TASK : "包含"
|
||||
QUERY ||--o{ CITATION_RECORD : "包含"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
|
@ -0,0 +1,750 @@
|
|||
# 引用检测引擎
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/platforms/base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [backend/app/workers/platforms/kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [backend/app/workers/platforms/wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
- [backend/app/workers/platforms/tongyi.py](file://backend/app/workers/platforms/tongyi.py)
|
||||
- [backend/app/workers/platforms/doubao.py](file://backend/app/workers/platforms/doubao.py)
|
||||
- [backend/app/workers/platforms/qingyan.py](file://backend/app/workers/platforms/qingyan.py)
|
||||
- [backend/app/workers/platforms/tiangong.py](file://backend/app/workers/platforms/tiangong.py)
|
||||
- [backend/app/workers/platforms/xinghuo.py](file://backend/app/workers/platforms/xinghuo.py)
|
||||
- [backend/app/workers/platforms/search_engine.py](file://backend/app/workers/platforms/search_engine.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/services/citation.py](file://backend/app/services/citation.py)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/models/query_task.py](file://backend/app/models/query_task.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [tests/test_citation_engine.py](file://tests/test_citation_engine.py)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
</cite>
|
||||
|
||||
## 更新摘要
|
||||
**变更内容**
|
||||
- 新增7个搜索引擎适配器:通义千问、豆包、智谱清言、天工、讯飞星火
|
||||
- 改进错误处理机制,实现指数退避重试策略
|
||||
- 新增字符清理机制,防止PostgreSQL插入失败
|
||||
- 新增异步任务执行功能,支持立即查询执行
|
||||
- 优化搜索引擎模式,统一平台查询策略
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本技术文档面向"GEO引用检测引擎",系统性阐述引用检测算法的实现原理与工程实践,涵盖文本预处理、品牌识别与上下文分析;详解BrandMatcher类的设计与实现(精确匹配、别名匹配、模糊匹配策略与正则规则);解析CompetitorDetector的竞争品牌识别机制(竞争关系定义、相似度计算与过滤规则);说明置信度评分体系(评分算法、阈值与结果排序);梳理异步处理与并发控制策略;并提供错误处理、日志记录与性能监控建议,以及与AI平台的集成接口与数据流转过程。
|
||||
|
||||
**更新** 新增搜索引擎适配器支持,改进错误处理机制,新增异步任务执行功能。
|
||||
|
||||
## 项目结构
|
||||
后端采用FastAPI + SQLAlchemy异步ORM + APScheduler定时任务的分层架构:
|
||||
- API层:提供引用数据查询、统计与立即执行接口
|
||||
- 服务层:封装数据访问与聚合统计逻辑
|
||||
- 工作器层:引用检测引擎、平台适配器、调度器
|
||||
- 模型层:查询、任务、引用记录等持久化模型
|
||||
- 配置与入口:应用生命周期管理、CORS、路由注册
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "API层"
|
||||
API_C["API: 引用数据<br/>citations.py"]
|
||||
API_Q["API: 查询管理<br/>queries.py"]
|
||||
end
|
||||
subgraph "服务层"
|
||||
SVC["服务: 引用数据<br/>services/citation.py"]
|
||||
SVC_Q["服务: 查询管理<br/>services/query.py"]
|
||||
end
|
||||
subgraph "工作器层"
|
||||
ENG["引擎: 引用检测<br/>workers/citation_engine.py"]
|
||||
SCH["调度器<br/>workers/scheduler.py"]
|
||||
ADP_BASE["适配器基类<br/>workers/platforms/base.py"]
|
||||
ADP_KIMI["适配器: Kimi<br/>workers/platforms/kimi.py"]
|
||||
ADP_WENXIN["适配器: 文心一言<br/>workers/platforms/wenxin.py"]
|
||||
ADP_TONGYI["适配器: 通义千问<br/>workers/platforms/tongyi.py"]
|
||||
ADP_DOUBAO["适配器: 豆包<br/>workers/platforms/doubao.py"]
|
||||
ADP_QINGYAN["适配器: 智谱清言<br/>workers/platforms/qingyan.py"]
|
||||
ADP_TIANGONG["适配器: 天工<br/>workers/platforms/tiangong.py"]
|
||||
ADP_XINGHUO["适配器: 讯飞星火<br/>workers/platforms/xinghuo.py"]
|
||||
SEARCH_ENGINE["搜索引擎模块<br/>workers/platforms/search_engine.py"]
|
||||
end
|
||||
subgraph "模型层"
|
||||
M_QUERY["模型: 查询<br/>models/query.py"]
|
||||
M_TASK["模型: 任务<br/>models/query_task.py"]
|
||||
M_CIT["模型: 引用记录<br/>models/citation_record.py"]
|
||||
end
|
||||
subgraph "配置与入口"
|
||||
CFG["配置<br/>config.py"]
|
||||
MAIN["入口<br/>main.py"]
|
||||
end
|
||||
API_C --> SVC
|
||||
API_Q --> SVC_Q
|
||||
SVC --> ENG
|
||||
SVC_Q --> ENG
|
||||
ENG --> ADP_KIMI
|
||||
ENG --> ADP_WENXIN
|
||||
ENG --> ADP_TONGYI
|
||||
ENG --> ADP_DOUBAO
|
||||
ENG --> ADP_QINGYAN
|
||||
ENG --> ADP_TIANGONG
|
||||
ENG --> ADP_XINGHUO
|
||||
ENG --> SEARCH_ENGINE
|
||||
SCH --> ENG
|
||||
ENG --> M_CIT
|
||||
SVC --> M_CIT
|
||||
API_C --> M_CIT
|
||||
ENG --> M_QUERY
|
||||
ENG --> M_TASK
|
||||
MAIN --> SCH
|
||||
MAIN --> API_C
|
||||
MAIN --> API_Q
|
||||
CFG --> MAIN
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/api/queries.py:90-108](file://backend/app/api/queries.py#L90-L108)
|
||||
- [backend/app/services/citation.py:1-269](file://backend/app/services/citation.py#L1-L269)
|
||||
- [backend/app/services/query.py:1-123](file://backend/app/services/query.py#L1-L123)
|
||||
- [backend/app/workers/citation_engine.py:1-330](file://backend/app/workers/citation_engine.py#L1-L330)
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:1-37](file://backend/app/workers/platforms/kimi.py#L1-L37)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-37](file://backend/app/workers/platforms/wenxin.py#L1-L37)
|
||||
- [backend/app/workers/platforms/tongyi.py:1-38](file://backend/app/workers/platforms/tongyi.py#L1-L38)
|
||||
- [backend/app/workers/platforms/doubao.py:1-38](file://backend/app/workers/platforms/doubao.py#L1-L38)
|
||||
- [backend/app/workers/platforms/qingyan.py:1-38](file://backend/app/workers/platforms/qingyan.py#L1-L38)
|
||||
- [backend/app/workers/platforms/tiangong.py:1-38](file://backend/app/workers/platforms/tiangong.py#L1-L38)
|
||||
- [backend/app/workers/platforms/xinghuo.py:1-38](file://backend/app/workers/platforms/xinghuo.py#L1-L38)
|
||||
- [backend/app/workers/platforms/search_engine.py:1-174](file://backend/app/workers/platforms/search_engine.py#L1-L174)
|
||||
- [backend/app/workers/scheduler.py:1-121](file://backend/app/workers/scheduler.py#L1-L121)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/api/queries.py:90-108](file://backend/app/api/queries.py#L90-L108)
|
||||
- [backend/app/services/citation.py:1-269](file://backend/app/services/citation.py#L1-L269)
|
||||
- [backend/app/services/query.py:1-123](file://backend/app/services/query.py#L1-L123)
|
||||
- [backend/app/workers/citation_engine.py:1-330](file://backend/app/workers/citation_engine.py#L1-L330)
|
||||
- [backend/app/workers/scheduler.py:1-121](file://backend/app/workers/scheduler.py#L1-L121)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
## 核心组件
|
||||
- 引用检测引擎:负责跨平台查询、品牌匹配、竞争品牌识别与结果落库
|
||||
- 品牌匹配器BrandMatcher:精确匹配、别名匹配、模糊匹配三阶段策略
|
||||
- 竞争品牌检测器CompetitorDetector:基于预定义品牌类别集合进行竞争关系识别
|
||||
- 平台适配器:Kimi、文心一言、通义千问、豆包、智谱清言、天工、讯飞星火适配器,封装Playwright自动化查询流程
|
||||
- 搜索引擎模块:提供DuckDuckGo和Wikipedia搜索功能,作为备用查询源
|
||||
- 定时调度器:基于APScheduler的周期性任务调度
|
||||
- 服务与API:提供查询历史、统计与立即执行能力
|
||||
|
||||
**更新** 新增7个搜索引擎适配器,改进错误处理机制,新增异步任务执行功能。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/citation_engine.py:161-330](file://backend/app/workers/citation_engine.py#L161-L330)
|
||||
- [backend/app/workers/platforms/tongyi.py:1-38](file://backend/app/workers/platforms/tongyi.py#L1-L38)
|
||||
- [backend/app/workers/platforms/doubao.py:1-38](file://backend/app/workers/platforms/doubao.py#L1-L38)
|
||||
- [backend/app/workers/platforms/qingyan.py:1-38](file://backend/app/workers/platforms/qingyan.py#L1-L38)
|
||||
- [backend/app/workers/platforms/tiangong.py:1-38](file://backend/app/workers/platforms/tiangong.py#L1-L38)
|
||||
- [backend/app/workers/platforms/xinghuo.py:1-38](file://backend/app/workers/platforms/xinghuo.py#L1-L38)
|
||||
- [backend/app/workers/platforms/search_engine.py:1-174](file://backend/app/workers/platforms/search_engine.py#L1-L174)
|
||||
- [backend/app/workers/scheduler.py:1-121](file://backend/app/workers/scheduler.py#L1-L121)
|
||||
- [backend/app/services/citation.py:219-295](file://backend/app/services/citation.py#L219-L295)
|
||||
- [backend/app/api/queries.py:90-108](file://backend/app/api/queries.py#L90-L108)
|
||||
|
||||
## 架构总览
|
||||
引擎通过API触发或定时调度器自动触发,调用平台适配器获取AI回复,随后由BrandMatcher进行品牌识别,CompetitorDetector识别竞争品牌,最终将结果写入数据库并返回。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant API as "API : 引用数据"
|
||||
participant Svc as "服务 : 引用数据"
|
||||
participant Eng as "引擎 : 引用检测"
|
||||
participant Plat as "平台适配器"
|
||||
participant Search as "搜索引擎模块"
|
||||
participant DB as "数据库"
|
||||
Client->>API : GET /api/v1/citations/stats
|
||||
API->>Svc : 统计查询
|
||||
Svc->>DB : 查询引用记录
|
||||
DB-->>Svc : 结果集
|
||||
Svc-->>API : 统计数据
|
||||
API-->>Client : 响应
|
||||
Client->>API : POST /api/v1/queries/{id}/run-now
|
||||
API->>Svc : 触发立即查询
|
||||
Svc-->>API : 任务ID
|
||||
API-->>Client : 202 Accepted
|
||||
Note over Eng,DB : 定时调度器每小时检查到期查询
|
||||
Eng->>Plat : query(keyword)
|
||||
Note over Plat,Search : 搜索引擎模式:通过fetch_search_content获取真实内容
|
||||
Plat->>Search : fetch_search_content(keyword)
|
||||
Search-->>Plat : 搜索结果摘要
|
||||
Plat-->>Eng : 原始回复文本
|
||||
Eng->>Eng : BrandMatcher.match(text)
|
||||
Eng->>Eng : CompetitorDetector.detect(text, target)
|
||||
Eng->>DB : 写入CitationRecord
|
||||
DB-->>Eng : 确认
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/api/queries.py:90-108](file://backend/app/api/queries.py#L90-L108)
|
||||
- [backend/app/services/citation.py:219-295](file://backend/app/services/citation.py#L219-L295)
|
||||
- [backend/app/workers/citation_engine.py:177-254](file://backend/app/workers/citation_engine.py#L177-L254)
|
||||
- [backend/app/workers/platforms/tongyi.py:16-33](file://backend/app/workers/platforms/tongyi.py#L16-L33)
|
||||
- [backend/app/workers/platforms/search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)
|
||||
- [backend/app/workers/platforms/kimi.py:16-33](file://backend/app/workers/platforms/kimi.py#L16-L33)
|
||||
- [backend/app/workers/platforms/wenxin.py:16-33](file://backend/app/workers/platforms/wenxin.py#L16-L33)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### BrandMatcher:品牌匹配器
|
||||
- 设计要点
|
||||
- 三阶段匹配策略:精确匹配 > 别名匹配 > 模糊匹配
|
||||
- 上下文提取:定位首次出现段落位置与截取片段
|
||||
- 置信度评分:精确=1.0,别名=0.9,模糊=基于相似度四舍五入
|
||||
- 实现细节
|
||||
- 精确匹配:直接包含判断
|
||||
- 别名匹配:遍历别名列表
|
||||
- 模糊匹配:对候选词(按非文字字符拆分且长度≥2)使用序列相似度比较,阈值>0.4
|
||||
- 候选词提取:支持中文与英文混合场景
|
||||
- 上下文截取:按换行切分段落,取首个命中段落前200字符
|
||||
- 复杂度分析
|
||||
- 候选词提取:O(n),n为文本长度
|
||||
- 模糊匹配:O(m*k),m为候选词数量,k为别名/目标词数量
|
||||
- 总体近似线性,受文本长度与候选词规模影响
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["进入 match(text)"]) --> Empty{"text为空?"}
|
||||
Empty --> |是| RetEmpty["返回未命中"]
|
||||
Empty --> |否| Exact["精确匹配 target_brand"]
|
||||
Exact --> |命中| PosCtx["提取段落位置与上下文"] --> RetExact["返回精确匹配结果"]
|
||||
Exact --> |未命中| Alias["遍历别名进行匹配"]
|
||||
Alias --> |命中| PosCtx2["提取段落位置与上下文"] --> RetAlias["返回别名匹配结果"]
|
||||
Alias --> |未命中| Fuzzy["提取候选词并计算相似度"]
|
||||
Fuzzy --> Best{"最佳相似度>0.4?"}
|
||||
Best --> |是| PosCtx3["提取段落位置与上下文"] --> RetFuzzy["返回模糊匹配结果"]
|
||||
Best --> |否| RetEmpty
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/citation_engine.py:39-113](file://backend/app/workers/citation_engine.py#L39-L113)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/citation_engine.py:32-133](file://backend/app/workers/citation_engine.py#L32-L133)
|
||||
- [tests/test_citation_engine.py:6-127](file://tests/test_citation_engine.py#L6-L127)
|
||||
|
||||
### CompetitorDetector:竞争品牌检测器
|
||||
- 设计要点
|
||||
- 基于预定义品牌类别集合(如保险、金融、科技)
|
||||
- 排除目标品牌,返回去重后的竞争品牌列表
|
||||
- 实现细节
|
||||
- 遍历类别与品牌,进行包含判断
|
||||
- 使用集合去重,最终排序输出
|
||||
- 复杂度分析
|
||||
- O(C*B),C为类别数,B为类别内品牌数
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start2(["进入 detect(text, target)"]) --> Empty2{"text为空?"}
|
||||
Empty2 --> |是| RetEmpty2["返回空列表"]
|
||||
Empty2 --> |否| LoopCat["遍历已知品牌类别"]
|
||||
LoopCat --> LoopBrand["遍历类别内品牌"]
|
||||
LoopBrand --> Filter{"品牌==target?"}
|
||||
Filter --> |是| NextBrand["跳过"]
|
||||
Filter --> |否| Contain{"品牌出现在text中?"}
|
||||
Contain --> |是| Add["加入集合"]
|
||||
Contain --> |否| NextBrand
|
||||
NextBrand --> LoopBrand
|
||||
LoopBrand --> Done["排序并返回列表"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/citation_engine.py:145-158](file://backend/app/workers/citation_engine.py#L145-L158)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/citation_engine.py:135-159](file://backend/app/workers/citation_engine.py#L135-L159)
|
||||
- [tests/test_citation_engine.py:39-109](file://tests/test_citation_engine.py#L39-L109)
|
||||
|
||||
### CitationEngine:引用检测引擎
|
||||
- 职责
|
||||
- 组织跨平台查询与检测流程
|
||||
- 维护查询任务状态与时间字段
|
||||
- 将结果持久化为引用记录
|
||||
- 实施字符清理机制,防止数据库插入失败
|
||||
- 关键流程
|
||||
- 初始化平台适配器与匹配器
|
||||
- 遍历平台列表,执行单平台查询与检测
|
||||
- 写入CitationRecord,更新Query时间字段
|
||||
- 异常处理:记录失败状态与错误信息
|
||||
- 并发与异步
|
||||
- 引擎方法为异步,平台适配器亦为异步
|
||||
- 平台间顺序执行(当前实现),可扩展为并行执行以提升吞吐
|
||||
- 字符清理
|
||||
- 新增`_sanitize_raw_response`函数,移除无效控制字符
|
||||
- 防止PostgreSQL UTF-8插入失败
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Q as "Query"
|
||||
participant E as "CitationEngine"
|
||||
participant P as "平台适配器"
|
||||
participant M as "BrandMatcher"
|
||||
participant C as "CompetitorDetector"
|
||||
participant R as "CitationRecord"
|
||||
E->>E : 创建BrandMatcher(target, aliases)
|
||||
loop 遍历平台
|
||||
E->>P : query(keyword)
|
||||
P-->>E : 原始回复文本
|
||||
E->>E : _sanitize_raw_response(raw_response)
|
||||
E->>M : match(sanitized_text)
|
||||
M-->>E : 匹配结果
|
||||
E->>C : detect(sanitized_text, target)
|
||||
C-->>E : 竞争品牌
|
||||
E->>R : 写入记录
|
||||
end
|
||||
E->>Q : 更新last_queried_at/next_query_at
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/citation_engine.py:177-254](file://backend/app/workers/citation_engine.py#L177-L254)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/citation_engine.py:161-330](file://backend/app/workers/citation_engine.py#L161-L330)
|
||||
|
||||
### 平台适配器:Kimi、文心一言与新增搜索引擎适配器
|
||||
- 抽象基类
|
||||
- BasePlatformAdapter定义平台名称、URL与抽象query/close方法
|
||||
- 具体实现
|
||||
- **传统适配器**(Kimi、文心一言):启动Playwright与无头浏览器,导航至平台URL
|
||||
- **搜索引擎适配器**(通义千问、豆包、智谱清言、天工、讯飞星火):通过fetch_search_content获取真实内容
|
||||
- 自动查找输入框、填充关键词、提交查询(回车或点击发送)
|
||||
- 等待回复稳定(连续多次检测文本不变),返回最新回复
|
||||
- 失败重试与指数退避,超时警告
|
||||
- 资源清理:关闭page/context/browser与playwright实例
|
||||
- 错误处理
|
||||
- 无法找到输入框、页面超时、最终失败抛出运行时异常
|
||||
- 日志记录每次重试与最终失败原因
|
||||
- 指数退避重试机制,最多3次尝试
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class BasePlatformAdapter {
|
||||
+string platform_name
|
||||
+string platform_url
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class KimiAdapter {
|
||||
+platform_name = "kimi"
|
||||
+platform_url = "https : //kimi.moonshot.cn"
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class WenxinAdapter {
|
||||
+platform_name = "wenxin"
|
||||
+platform_url = "https : //yiyan.baidu.com"
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class TongyiAdapter {
|
||||
+platform_name = "tongyi"
|
||||
+platform_url = "https : //tongyi.aliyun.com/qianwen"
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class DoubaoAdapter {
|
||||
+platform_name = "doubao"
|
||||
+platform_url = "https : //www.doubao.com/"
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class QingyanAdapter {
|
||||
+platform_name = "qingyan"
|
||||
+platform_url = "https : //chatglm.cn/"
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class TiangongAdapter {
|
||||
+platform_name = "tiangong"
|
||||
+platform_url = "https : //www.tiangong.cn/"
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class XinghuoAdapter {
|
||||
+platform_name = "xinghuo"
|
||||
+platform_url = "https : //xinghuo.xfyun.cn/"
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
KimiAdapter --|> BasePlatformAdapter
|
||||
WenxinAdapter --|> BasePlatformAdapter
|
||||
TongyiAdapter --|> BasePlatformAdapter
|
||||
DoubaoAdapter --|> BasePlatformAdapter
|
||||
QingyanAdapter --|> BasePlatformAdapter
|
||||
TiangongAdapter --|> BasePlatformAdapter
|
||||
XinghuoAdapter --|> BasePlatformAdapter
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:1-37](file://backend/app/workers/platforms/kimi.py#L1-L37)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-37](file://backend/app/workers/platforms/wenxin.py#L1-L37)
|
||||
- [backend/app/workers/platforms/tongyi.py:1-38](file://backend/app/workers/platforms/tongyi.py#L1-L38)
|
||||
- [backend/app/workers/platforms/doubao.py:1-38](file://backend/app/workers/platforms/doubao.py#L1-L38)
|
||||
- [backend/app/workers/platforms/qingyan.py:1-38](file://backend/app/workers/platforms/qingyan.py#L1-L38)
|
||||
- [backend/app/workers/platforms/tiangong.py:1-38](file://backend/app/workers/platforms/tiangong.py#L1-L38)
|
||||
- [backend/app/workers/platforms/xinghuo.py:1-38](file://backend/app/workers/platforms/xinghuo.py#L1-L38)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:1-37](file://backend/app/workers/platforms/kimi.py#L1-L37)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-37](file://backend/app/workers/platforms/wenxin.py#L1-L37)
|
||||
- [backend/app/workers/platforms/tongyi.py:1-38](file://backend/app/workers/platforms/tongyi.py#L1-L38)
|
||||
- [backend/app/workers/platforms/doubao.py:1-38](file://backend/app/workers/platforms/doubao.py#L1-L38)
|
||||
- [backend/app/workers/platforms/qingyan.py:1-38](file://backend/app/workers/platforms/qingyan.py#L1-L38)
|
||||
- [backend/app/workers/platforms/tiangong.py:1-38](file://backend/app/workers/platforms/tiangong.py#L1-L38)
|
||||
- [backend/app/workers/platforms/xinghuo.py:1-38](file://backend/app/workers/platforms/xinghuo.py#L1-L38)
|
||||
|
||||
### 搜索引擎模块:fetch_search_content
|
||||
- 功能
|
||||
- 提供统一的搜索引擎接口,支持DuckDuckGo和Wikipedia搜索
|
||||
- 作为备用查询源,当平台适配器无法正常工作时使用
|
||||
- 实现细节
|
||||
- DuckDuckGo HTML搜索:无需API Key,自动回退到Wikipedia
|
||||
- Wikipedia API搜索:稳定可靠,返回百科内容摘要
|
||||
- HTML内容清理:去除标签和实体,保留可读文本
|
||||
- 字符串清理:移除引用标记和多余空白
|
||||
- 错误处理
|
||||
- 搜索失败时自动回退到Wikipedia
|
||||
- 所有搜索源均失败时抛出运行时异常
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/search_engine.py:1-174](file://backend/app/workers/platforms/search_engine.py#L1-L174)
|
||||
|
||||
### 定时调度器:QueryScheduler
|
||||
- 职责
|
||||
- 每小时扫描到期查询(status=active且next_query_at<=now)
|
||||
- 逐条执行CitationEngine.execute_query
|
||||
- 应用生命周期内启动与关闭
|
||||
- 兜底检查:每分钟检查遗留的pending任务
|
||||
- 并发控制
|
||||
- 使用AsyncIOScheduler与事件循环
|
||||
- 检查与执行过程均为异步,避免阻塞
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
StartS(["启动调度器"]) --> AddJob["添加每小时任务"]
|
||||
AddJob --> AddJob2["添加每分钟任务"]
|
||||
AddJob2 --> Loop["每小时执行检查"]
|
||||
Loop --> Select["查询到期的Query"]
|
||||
Select --> Exec["逐条执行execute_query"]
|
||||
Exec --> Loop
|
||||
Loop --> PendingCheck["每分钟检查遗留任务"]
|
||||
PendingCheck --> ExecPending["执行遗留任务"]
|
||||
ExecPending --> Loop
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/scheduler.py:33-121](file://backend/app/workers/scheduler.py#L33-L121)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/scheduler.py:1-121](file://backend/app/workers/scheduler.py#L1-L121)
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
|
||||
### 服务与API:引用数据与统计
|
||||
- API
|
||||
- 列表查询:支持按query_id、平台、时间范围筛选,分页
|
||||
- 统计接口:总查询次数、引用次数、引用率、平均位置、按平台统计、趋势
|
||||
- 立即执行:将查询任务加入队列,返回任务ID
|
||||
- 服务
|
||||
- 权限校验:仅允许用户访问自己的查询数据
|
||||
- 统计聚合:使用SQL聚合函数计算各项指标
|
||||
- CSV导出:按查询导出引用记录CSV
|
||||
- 异步任务执行:使用asyncio.create_task后台执行查询
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant API as "API : 引用数据"
|
||||
participant Svc as "服务 : 引用数据"
|
||||
participant DB as "数据库"
|
||||
Client->>API : GET /api/v1/citations/?query_id=...
|
||||
API->>Svc : get_citations(...)
|
||||
Svc->>DB : 查询引用记录(带权限校验)
|
||||
DB-->>Svc : 记录列表与总数
|
||||
Svc-->>API : 返回数据
|
||||
API-->>Client : 响应
|
||||
Client->>API : GET /api/v1/citations/stats?query_id=...
|
||||
API->>Svc : get_citation_stats(...)
|
||||
Svc->>DB : 聚合统计
|
||||
DB-->>Svc : 统计结果
|
||||
Svc-->>API : 返回统计
|
||||
API-->>Client : 响应
|
||||
Client->>API : POST /api/v1/queries/{id}/run-now
|
||||
API->>Svc : trigger_query_now
|
||||
Svc->>DB : 创建QueryTask
|
||||
Svc->>Svc : asyncio.create_task执行查询
|
||||
Svc-->>API : 202 Accepted + task_id
|
||||
API-->>Client : 响应
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/api/queries.py:90-108](file://backend/app/api/queries.py#L90-L108)
|
||||
- [backend/app/services/citation.py:219-295](file://backend/app/services/citation.py#L219-L295)
|
||||
- [backend/app/services/citation.py:264-295](file://backend/app/services/citation.py#L264-L295)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/api/queries.py:90-108](file://backend/app/api/queries.py#L90-L108)
|
||||
- [backend/app/services/citation.py:1-269](file://backend/app/services/citation.py#L1-L269)
|
||||
- [backend/app/services/query.py:1-123](file://backend/app/services/query.py#L1-L123)
|
||||
|
||||
### 数据模型:查询、任务与引用记录
|
||||
- Query:查询词、目标品牌、别名、平台、频率、状态与时间字段
|
||||
- QueryTask:查询任务,记录平台、状态、错误信息与时间戳
|
||||
- CitationRecord:引用记录,包含是否引用、引用位置、引用文本、竞争品牌、原始响应与查询时间
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
QUERIES {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
json brand_aliases
|
||||
json platforms
|
||||
string frequency
|
||||
string status
|
||||
timestamp last_queried_at
|
||||
timestamp next_query_at
|
||||
}
|
||||
QUERY_TASKS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
string status
|
||||
text error_message
|
||||
timestamp scheduled_at
|
||||
timestamp started_at
|
||||
timestamp completed_at
|
||||
}
|
||||
CITATION_RECORDS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
boolean cited
|
||||
int citation_position
|
||||
text citation_text
|
||||
json competitor_brands
|
||||
text raw_response
|
||||
timestamp queried_at
|
||||
}
|
||||
QUERIES ||--o{ QUERY_TASKS : "包含"
|
||||
QUERIES ||--o{ CITATION_RECORDS : "包含"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
|
||||
## 依赖分析
|
||||
- 外部依赖
|
||||
- Web框架:FastAPI、Uvicorn
|
||||
- ORM与迁移:SQLAlchemy、Alembic
|
||||
- 任务调度:APScheduler
|
||||
- 浏览器自动化:Playwright
|
||||
- HTTP客户端:httpx
|
||||
- 缓存:Redis(配置项存在)
|
||||
- 配置:pydantic-settings
|
||||
- 内部模块耦合
|
||||
- API依赖服务层;服务层依赖模型与引擎;引擎依赖适配器与模型
|
||||
- 调度器独立于API,通过引擎执行查询
|
||||
- 适配器与引擎解耦,便于扩展新平台
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
FastAPI --> API
|
||||
API --> Service
|
||||
Service --> Engine
|
||||
Engine --> AdapterBase
|
||||
Engine --> Models
|
||||
Engine --> SearchEngine
|
||||
Scheduler --> Engine
|
||||
AdapterKimi --> AdapterBase
|
||||
AdapterWenxin --> AdapterBase
|
||||
AdapterTongyi --> AdapterBase
|
||||
AdapterDoubao --> AdapterBase
|
||||
AdapterQingyan --> AdapterBase
|
||||
AdapterTiangong --> AdapterBase
|
||||
AdapterXinghuo --> AdapterBase
|
||||
SearchEngine --> Engine
|
||||
Config --> Main
|
||||
Main --> Scheduler
|
||||
Main --> API
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/api/queries.py:90-108](file://backend/app/api/queries.py#L90-L108)
|
||||
- [backend/app/services/citation.py:1-269](file://backend/app/services/citation.py#L1-L269)
|
||||
- [backend/app/services/query.py:1-123](file://backend/app/services/query.py#L1-L123)
|
||||
- [backend/app/workers/citation_engine.py:1-330](file://backend/app/workers/citation_engine.py#L1-L330)
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:1-37](file://backend/app/workers/platforms/kimi.py#L1-L37)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-37](file://backend/app/workers/platforms/wenxin.py#L1-L37)
|
||||
- [backend/app/workers/platforms/tongyi.py:1-38](file://backend/app/workers/platforms/tongyi.py#L1-L38)
|
||||
- [backend/app/workers/platforms/doubao.py:1-38](file://backend/app/workers/platforms/doubao.py#L1-L38)
|
||||
- [backend/app/workers/platforms/qingyan.py:1-38](file://backend/app/workers/platforms/qingyan.py#L1-L38)
|
||||
- [backend/app/workers/platforms/tiangong.py:1-38](file://backend/app/workers/platforms/tiangong.py#L1-L38)
|
||||
- [backend/app/workers/platforms/xinghuo.py:1-38](file://backend/app/workers/platforms/xinghuo.py#L1-L38)
|
||||
- [backend/app/workers/platforms/search_engine.py:1-174](file://backend/app/workers/platforms/search_engine.py#L1-L174)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
**章节来源**
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
|
||||
## 性能考虑
|
||||
- 品牌匹配
|
||||
- 候选词提取与模糊匹配复杂度与文本长度、候选词数量相关,建议对长文本分段处理或限制最大长度
|
||||
- 模糊匹配阈值可按业务需求调整,平衡召回与精度
|
||||
- 平台适配器
|
||||
- Playwright初始化成本较高,适配器内部维护浏览器实例,避免重复启动
|
||||
- 等待回复稳定的轮询间隔与超时可按平台特性微调
|
||||
- **新增** 搜索引擎适配器避免了浏览器自动化成本,提升了整体性能
|
||||
- 引擎与调度
|
||||
- 当前平台遍历为串行,可扩展为并发执行以提升吞吐(注意平台限流与稳定性)
|
||||
- 定时任务每小时检查一次,可根据业务需要调整频率
|
||||
- **新增** 异步任务执行机制,使用asyncio.create_task提升响应速度
|
||||
- 数据库
|
||||
- 引用记录、查询与任务均建立索引,统计查询使用聚合函数,建议定期分析与更新统计信息
|
||||
- **新增** 字符清理机制,避免无效字符导致的数据库插入失败
|
||||
|
||||
## 故障排查指南
|
||||
- 浏览器与Playwright
|
||||
- 现象:启动浏览器失败或提示需安装浏览器
|
||||
- 处理:按日志提示运行Playwright浏览器安装命令
|
||||
- 相关路径:[backend/app/workers/platforms/kimi.py:21-32](file://backend/app/workers/platforms/kimi.py#L21-L32)、[backend/app/workers/platforms/wenxin.py:21-32](file://backend/app/workers/platforms/wenxin.py#L21-L32)
|
||||
- 页面交互失败
|
||||
- 现象:找不到输入框、页面超时
|
||||
- 处理:检查平台页面结构变化,适配器内置多选择器与超时重试,必要时调整选择器或等待策略
|
||||
- 相关路径:[backend/app/workers/platforms/kimi.py:67-88](file://backend/app/workers/platforms/kimi.py#L67-L88)、[backend/app/workers/platforms/wenxin.py:67-87](file://backend/app/workers/platforms/wenxin.py#L67-L87)
|
||||
- **新增** 搜索引擎适配器故障
|
||||
- 现象:DuckDuckGo搜索失败
|
||||
- 处理:自动回退到Wikipedia搜索,检查网络连接和API可用性
|
||||
- 相关路径:[backend/app/workers/platforms/search_engine.py:139-144](file://backend/app/workers/platforms/search_engine.py#L139-L144)
|
||||
- **新增** 指数退避重试机制
|
||||
- 现象:平台查询不稳定
|
||||
- 处理:检查日志中的重试记录,确认指数退避是否正常工作
|
||||
- 相关路径:[backend/app/workers/platforms/tongyi.py:18-29](file://backend/app/workers/platforms/tongyi.py#L18-L29)、[backend/app/workers/platforms/doubao.py:18-29](file://backend/app/workers/platforms/doubao.py#L18-L29)
|
||||
- **新增** 字符清理问题
|
||||
- 现象:数据库插入失败,提示UTF-8编码错误
|
||||
- 处理:检查`_sanitize_raw_response`函数是否正确清理无效字符
|
||||
- 相关路径:[backend/app/workers/citation_engine.py:11-16](file://backend/app/workers/citation_engine.py#L11-L16)
|
||||
- 引擎执行异常
|
||||
- 现象:平台查询失败,记录失败状态与错误信息
|
||||
- 处理:查看任务表中的错误信息,确认平台可用性与网络状况
|
||||
- 相关路径:[backend/app/workers/citation_engine.py:231-247](file://backend/app/workers/citation_engine.py#L231-L247)
|
||||
- API权限与参数
|
||||
- 现象:查询不到数据或返回空
|
||||
- 处理:确认查询ID归属当前用户,参数范围合理
|
||||
- 相关路径:[backend/app/services/citation.py:14-42](file://backend/app/services/citation.py#L14-L42)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/kimi.py:21-32](file://backend/app/workers/platforms/kimi.py#L21-L32)
|
||||
- [backend/app/workers/platforms/wenxin.py:21-32](file://backend/app/workers/platforms/wenxin.py#L21-L32)
|
||||
- [backend/app/workers/platforms/search_engine.py:139-144](file://backend/app/workers/platforms/search_engine.py#L139-L144)
|
||||
- [backend/app/workers/platforms/tongyi.py:18-29](file://backend/app/workers/platforms/tongyi.py#L18-L29)
|
||||
- [backend/app/workers/platforms/doubao.py:18-29](file://backend/app/workers/platforms/doubao.py#L18-L29)
|
||||
- [backend/app/workers/citation_engine.py:11-16](file://backend/app/workers/citation_engine.py#L11-L16)
|
||||
- [backend/app/workers/citation_engine.py:231-247](file://backend/app/workers/citation_engine.py#L231-L247)
|
||||
- [backend/app/services/citation.py:14-42](file://backend/app/services/citation.py#L14-L42)
|
||||
|
||||
## 结论
|
||||
该引擎以清晰的分层设计实现了从AI平台抓取、品牌识别、竞争品牌检测到结果落库与统计展示的完整链路。BrandMatcher与CompetitorDetector提供了稳健的文本处理与识别能力,平台适配器封装了复杂的浏览器自动化流程,搜索引擎适配器提供了更稳定的替代方案,调度器保障了周期性任务的可靠执行。**新增**的异步任务执行机制和字符清理机制进一步提升了系统的稳定性和性能。建议后续在并发执行、阈值调优与平台扩展方面持续优化。
|
||||
|
||||
## 附录
|
||||
|
||||
### 算法优化建议
|
||||
- 品牌匹配
|
||||
- 引入编辑距离或音近相似度算法,进一步提升模糊匹配鲁棒性
|
||||
- 对候选词进行词干化或拼音首字母预处理,减少误判
|
||||
- 并发与限流
|
||||
- 平台查询改为并发执行,并引入令牌桶限流防止被封禁
|
||||
- **新增** 搜索引擎适配器天然支持并发,可优先考虑使用
|
||||
- 缓存与降噪
|
||||
- 对热点关键词与平台响应进行缓存,降低重复请求
|
||||
- 上下文截取长度可动态调整,兼顾性能与准确性
|
||||
- **新增** 字符清理机制可进一步优化,减少无效字符传输
|
||||
|
||||
### 自定义扩展指南
|
||||
- 新增平台适配器
|
||||
- 继承BasePlatformAdapter,实现query与close方法
|
||||
- 在CitationEngine中注册平台映射
|
||||
- **新增** 可选择实现搜索引擎模式或传统浏览器自动化模式
|
||||
- 参考路径:[backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)、[backend/app/workers/citation_engine.py:165-173](file://backend/app/workers/citation_engine.py#L165-L173)
|
||||
- 调整匹配策略
|
||||
- 修改BrandMatcher的阈值、候选词提取规则或上下文截取长度
|
||||
- 参考路径:[backend/app/workers/citation_engine.py:115-132](file://backend/app/workers/citation_engine.py#L115-L132)、[backend/app/workers/citation_engine.py:120-132](file://backend/app/workers/citation_engine.py#L120-L132)
|
||||
- 竞争品牌库扩展
|
||||
- 在CompetitorDetector中增加新的行业类别与品牌集合
|
||||
- 参考路径:[backend/app/workers/citation_engine.py:139-143](file://backend/app/workers/citation_engine.py#L139-L143)
|
||||
- **新增** 搜索引擎模块扩展
|
||||
- 在search_engine.py中添加新的搜索源
|
||||
- 实现相应的清理和格式化函数
|
||||
- 参考路径:[backend/app/workers/platforms/search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)
|
||||
|
||||
### 与AI平台的集成接口与数据流转
|
||||
- 接口职责
|
||||
- 平台适配器:统一query接口,屏蔽平台差异
|
||||
- 引擎:组织流程、落库、异常处理
|
||||
- API/服务:对外提供查询与统计能力
|
||||
- **新增** 搜索引擎模块:提供备用查询源
|
||||
- 数据流
|
||||
- 用户发起查询 → API/服务 → 引擎 → 平台适配器 → AI平台/搜索引擎 → 引擎 → 数据库 → API/服务 → 前端展示
|
||||
- **新增** 搜索引擎模式:通过fetch_search_content获取真实内容
|
||||
- 监控与日志
|
||||
- 平台适配器与引擎均包含日志记录,便于追踪失败原因与性能瓶颈
|
||||
- **新增** 指数退避重试机制,支持自动故障恢复
|
||||
- **新增** 字符清理机制,确保数据质量
|
||||
- 参考路径:[backend/app/workers/platforms/tongyi.py:16-33](file://backend/app/workers/platforms/tongyi.py#L16-L33)、[backend/app/workers/platforms/search_engine.py:163-174](file://backend/app/workers/platforms/search_engine.py#L163-L174)、[backend/app/workers/citation_engine.py:11-16](file://backend/app/workers/citation_engine.py#L11-L16)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/citation_engine.py:161-330](file://backend/app/workers/citation_engine.py#L161-L330)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/api/queries.py:90-108](file://backend/app/api/queries.py#L90-L108)
|
||||
- [backend/app/services/citation.py:1-269](file://backend/app/services/citation.py#L1-L269)
|
||||
- [backend/app/services/query.py:1-123](file://backend/app/services/query.py#L1-L123)
|
||||
- [backend/app/workers/platforms/search_engine.py:1-174](file://backend/app/workers/platforms/search_engine.py#L1-L174)
|
||||
|
|
@ -0,0 +1,462 @@
|
|||
# 数据模型设计
|
||||
|
||||
<cite>
|
||||
**本文档中引用的文件**
|
||||
- [user.py](file://backend/app/models/user.py)
|
||||
- [query.py](file://backend/app/models/query.py)
|
||||
- [citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [query_task.py](file://backend/app/models/query_task.py)
|
||||
- [subscription.py](file://backend/app/models/subscription.py)
|
||||
- [database.py](file://backend/app/database.py)
|
||||
- [__init__.py](file://backend/app/models/__init__.py)
|
||||
- [488d0bd5ab01_initial_migration.py](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py)
|
||||
- [config.py](file://backend/app/config.py)
|
||||
- [query.py](file://backend/app/schemas/query.py)
|
||||
- [citation.py](file://backend/app/schemas/citation.py)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构概览](#架构概览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖关系分析](#依赖关系分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排除指南](#故障排除指南)
|
||||
9. [结论](#结论)
|
||||
|
||||
## 简介
|
||||
|
||||
GEO平台的数据模型基于SQLAlchemy ORM构建,采用异步PostgreSQL数据库设计。该系统围绕用户查询管理、引用记录跟踪和任务调度构建,支持多平台内容监控和分析功能。数据模型设计遵循以下核心原则:异步数据库访问、强类型字段定义、级联关系管理和性能优化索引策略。
|
||||
|
||||
## 项目结构
|
||||
|
||||
GEO项目的数据层采用模块化设计,所有ORM模型位于`backend/app/models/`目录下,通过统一的Base类继承实现。数据库连接配置在`database.py`中定义,使用SQLAlchemy的异步引擎和会话管理。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "数据模型层"
|
||||
User[User模型]
|
||||
Query[Query模型]
|
||||
CitationRecord[CitationRecord模型]
|
||||
QueryTask[QueryTask模型]
|
||||
Subscription[Subscription模型]
|
||||
end
|
||||
subgraph "基础设施层"
|
||||
Database[数据库引擎]
|
||||
Config[配置管理]
|
||||
Alembic[迁移管理]
|
||||
end
|
||||
subgraph "应用层"
|
||||
API[API服务]
|
||||
Schemas[Schemas验证]
|
||||
Workers[工作进程]
|
||||
end
|
||||
User --> Query
|
||||
Query --> CitationRecord
|
||||
Query --> QueryTask
|
||||
User --> Subscription
|
||||
Database --> User
|
||||
Database --> Query
|
||||
Database --> CitationRecord
|
||||
Database --> QueryTask
|
||||
Database --> Subscription
|
||||
Config --> Database
|
||||
Alembic --> Database
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [__init__.py:1-14](file://backend/app/models/__init__.py#L1-L14)
|
||||
|
||||
**章节来源**
|
||||
- [database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
## 核心组件
|
||||
|
||||
GEO平台包含五个核心数据模型,每个模型都经过精心设计以满足特定的业务需求:
|
||||
|
||||
### 用户模型(User)
|
||||
用户模型是整个系统的核心实体,负责管理平台用户的基本信息、订阅状态和查询配额。模型包含完整的身份认证信息和权限控制字段。
|
||||
|
||||
### 查询模型(Query)
|
||||
查询模型代表用户的搜索请求,包含关键词、目标品牌、平台配置和调度参数。该模型支持复杂的JSONB字段存储动态配置数据。
|
||||
|
||||
### 引用记录模型(CitationRecord)
|
||||
引用记录模型用于存储从各个平台抓取的内容引用信息,包括品牌提及、置信度评分和上下文数据。支持文本搜索和位置标记功能。
|
||||
|
||||
### 查询任务模型(QueryTask)
|
||||
查询任务模型管理异步查询任务的生命周期,包括调度状态、执行历史和错误处理。支持多平台并发执行和状态跟踪。
|
||||
|
||||
### 订阅模型(Subscription)
|
||||
订阅模型处理用户付费计划和账单管理,包含计划类型、状态、日期范围和支付信息。与用户模型建立一对一关系。
|
||||
|
||||
**章节来源**
|
||||
- [user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [subscription.py:11-37](file://backend/app/models/subscription.py#L11-L37)
|
||||
|
||||
## 架构概览
|
||||
|
||||
GEO平台采用分层架构设计,数据模型层与业务逻辑层分离,确保了良好的可维护性和扩展性。
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class User {
|
||||
+UUID id
|
||||
+String email
|
||||
+String password_hash
|
||||
+String name
|
||||
+String plan
|
||||
+Integer max_queries
|
||||
+Boolean is_active
|
||||
+DateTime created_at
|
||||
+DateTime updated_at
|
||||
+queries : List[Query]
|
||||
+subscriptions : List[Subscription]
|
||||
}
|
||||
class Query {
|
||||
+UUID id
|
||||
+UUID user_id
|
||||
+String keyword
|
||||
+String target_brand
|
||||
+List brand_aliases
|
||||
+List platforms
|
||||
+String frequency
|
||||
+String status
|
||||
+DateTime last_queried_at
|
||||
+DateTime next_query_at
|
||||
+DateTime created_at
|
||||
+DateTime updated_at
|
||||
+user : User
|
||||
+citation_records : List[CitationRecord]
|
||||
+query_tasks : List[QueryTask]
|
||||
}
|
||||
class CitationRecord {
|
||||
+UUID id
|
||||
+UUID query_id
|
||||
+String platform
|
||||
+Boolean cited
|
||||
+Integer citation_position
|
||||
+String citation_text
|
||||
+List competitor_brands
|
||||
+String raw_response
|
||||
+DateTime queried_at
|
||||
+query : Query
|
||||
}
|
||||
class QueryTask {
|
||||
+UUID id
|
||||
+UUID query_id
|
||||
+String platform
|
||||
+String status
|
||||
+String error_message
|
||||
+DateTime scheduled_at
|
||||
+DateTime started_at
|
||||
+DateTime completed_at
|
||||
+query : Query
|
||||
}
|
||||
class Subscription {
|
||||
+UUID id
|
||||
+UUID user_id
|
||||
+String plan
|
||||
+String status
|
||||
+Date start_date
|
||||
+Date end_date
|
||||
+Float amount
|
||||
+String payment_method
|
||||
+String payment_id
|
||||
+DateTime created_at
|
||||
+user : User
|
||||
}
|
||||
User "1" --> "many" Query : "has"
|
||||
User "1" --> "many" Subscription : "has"
|
||||
Query "1" --> "many" CitationRecord : "has"
|
||||
Query "1" --> "many" QueryTask : "has"
|
||||
Query "1" --> "1" User : "belongs to"
|
||||
Subscription "1" --> "1" User : "belongs to"
|
||||
CitationRecord "1" --> "1" Query : "belongs to"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [subscription.py:11-37](file://backend/app/models/subscription.py#L11-L37)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 用户模型(User)设计
|
||||
|
||||
用户模型是系统的基础实体,采用UUID作为主键,确保分布式环境下的唯一性。邮箱字段设置为唯一约束,支持用户名密码认证。
|
||||
|
||||
#### 字段设计分析
|
||||
|
||||
| 字段名 | 类型 | 约束 | 默认值 | 描述 |
|
||||
|--------|------|------|--------|------|
|
||||
| id | UUID | 主键, 唯一 | 自动生成 | 用户唯一标识符 |
|
||||
| email | String(255) | 唯一, 非空 | - | 用户登录邮箱 |
|
||||
| password_hash | String(255) | 非空 | - | 密码哈希值 |
|
||||
| name | String(100) | 可空 | - | 用户姓名 |
|
||||
| plan | String(20) | 非空 | "free" | 用户套餐类型 |
|
||||
| max_queries | Integer | 非空 | 5 | 每月最大查询次数 |
|
||||
| is_active | Boolean | 非空 | True | 账户激活状态 |
|
||||
|
||||
#### 关系映射
|
||||
|
||||
用户模型与查询模型和订阅模型建立一对多关系,使用级联删除确保数据一致性。
|
||||
|
||||
**章节来源**
|
||||
- [user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
|
||||
### 查询模型(Query)设计
|
||||
|
||||
查询模型代表用户的搜索请求,支持多平台内容监控和定期执行。
|
||||
|
||||
#### 字段设计分析
|
||||
|
||||
| 字段名 | 类型 | 约束 | 默认值 | 描述 |
|
||||
|--------|------|------|--------|------|
|
||||
| id | UUID | 主键 | 自动生成 | 查询唯一标识符 |
|
||||
| user_id | UUID | 外键, 非空 | - | 所属用户ID |
|
||||
| keyword | String(200) | 非空 | - | 搜索关键词 |
|
||||
| target_brand | String(100) | 非空 | - | 目标品牌名称 |
|
||||
| brand_aliases | JSONB | 默认空列表 | [] | 品牌别名列表 |
|
||||
| platforms | JSONB | 非空, 默认["wenxin","kimi"] | ["wenxin","kimi"] | 监控平台列表 |
|
||||
| frequency | String(20) | 非空 | "weekly" | 查询频率 |
|
||||
| status | String(20) | 非空 | "active" | 查询状态 |
|
||||
| last_queried_at | DateTime | 可空 | - | 最后查询时间 |
|
||||
| next_query_at | DateTime | 可空 | - | 下次查询时间 |
|
||||
|
||||
#### 关系映射
|
||||
|
||||
查询模型与用户、引用记录和查询任务建立复杂的关系:
|
||||
- 与用户:一对多,外键约束确保数据完整性
|
||||
- 与引用记录:一对多,级联删除孤儿记录
|
||||
- 与查询任务:一对多,支持并行任务执行
|
||||
|
||||
#### 索引策略
|
||||
|
||||
查询模型包含三个关键索引:
|
||||
- `idx_queries_user_id`: 加速用户查询
|
||||
- `idx_queries_status`: 支持状态过滤查询
|
||||
- `idx_queries_next_query_at`: 优化调度查询
|
||||
|
||||
**章节来源**
|
||||
- [query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
|
||||
### 引用记录模型(CitationRecord)设计
|
||||
|
||||
引用记录模型存储从各平台抓取的品牌提及信息,支持详细的上下文分析。
|
||||
|
||||
#### 字段设计分析
|
||||
|
||||
| 字段名 | 类型 | 约束 | 默认值 | 描述 |
|
||||
|--------|------|------|--------|------|
|
||||
| id | UUID | 主键 | 自动生成 | 记录唯一标识符 |
|
||||
| query_id | UUID | 外键, 非空 | - | 所属查询ID |
|
||||
| platform | String(50) | 非空 | - | 内容来源平台 |
|
||||
| cited | Boolean | 非空, 默认False | False | 是否提及目标品牌 |
|
||||
| citation_position | Integer | 可空 | - | 内容在结果中的位置 |
|
||||
| citation_text | Text | 可空 | - | 提及的具体文本内容 |
|
||||
| competitor_brands | JSONB | 默认空列表 | [] | 竞争对手品牌列表 |
|
||||
| raw_response | Text | 可空 | - | 原始响应内容 |
|
||||
| queried_at | DateTime | 非空 | 当前时间 | 记录创建时间 |
|
||||
|
||||
#### 关系映射
|
||||
|
||||
引用记录模型与查询模型建立一对多关系,支持按查询分组统计分析。
|
||||
|
||||
#### 索引策略
|
||||
|
||||
引用记录模型包含四个关键索引:
|
||||
- `idx_citation_records_query_id`: 加速查询关联查询
|
||||
- `idx_citation_records_queried_at`: 支持时间序列分析
|
||||
- `idx_citation_records_platform`: 平台维度统计
|
||||
|
||||
**章节来源**
|
||||
- [citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
||||
### 查询任务模型(QueryTask)设计
|
||||
|
||||
查询任务模型管理异步查询任务的完整生命周期,支持状态跟踪和错误处理。
|
||||
|
||||
#### 字段设计分析
|
||||
|
||||
| 字段名 | 类型 | 约束 | 默认值 | 描述 |
|
||||
|--------|------|------|--------|------|
|
||||
| id | UUID | 主键 | 自动生成 | 任务唯一标识符 |
|
||||
| query_id | UUID | 外键, 非空 | - | 所属查询ID |
|
||||
| platform | String(50) | 非空 | - | 执行平台 |
|
||||
| status | String(20) | 非空, 默认"pending" | pending | 任务执行状态 |
|
||||
| error_message | Text | 可空 | - | 错误信息 |
|
||||
| scheduled_at | DateTime | 非空 | 当前时间 | 任务调度时间 |
|
||||
| started_at | DateTime | 可空 | - | 任务开始时间 |
|
||||
| completed_at | DateTime | 可空 | - | 任务完成时间 |
|
||||
|
||||
#### 关系映射
|
||||
|
||||
查询任务模型与查询模型建立一对多关系,支持任务状态监控和统计分析。
|
||||
|
||||
#### 索引策略
|
||||
|
||||
查询任务模型包含一个关键索引:
|
||||
- `idx_query_tasks_status`: 支持任务状态筛选和调度
|
||||
|
||||
**章节来源**
|
||||
- [query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
|
||||
### 订阅模型(Subscription)设计
|
||||
|
||||
订阅模型处理用户付费计划和账单管理,支持灵活的计费周期和状态管理。
|
||||
|
||||
#### 字段设计分析
|
||||
|
||||
| 字段名 | 类型 | 约束 | 默认值 | 描述 |
|
||||
|--------|------|------|--------|------|
|
||||
| id | UUID | 主键 | 自动生成 | 订阅唯一标识符 |
|
||||
| user_id | UUID | 外键, 非空 | - | 所属用户ID |
|
||||
| plan | String(20) | 非空 | - | 套餐类型 |
|
||||
| status | String(20) | 非空, 默认"active" | active | 订阅状态 |
|
||||
| start_date | Date | 非空 | - | 订阅开始日期 |
|
||||
| end_date | Date | 非空 | - | 订阅结束日期 |
|
||||
| amount | Numeric(10,2) | 可空 | - | 支付金额 |
|
||||
| payment_method | String(50) | 可空 | - | 支付方式 |
|
||||
| payment_id | String(255) | 可空 | - | 支付ID |
|
||||
| created_at | DateTime | 非空 | 当前时间 | 创建时间 |
|
||||
|
||||
#### 关系映射
|
||||
|
||||
订阅模型与用户模型建立一对多关系,支持用户订阅状态查询。
|
||||
|
||||
**章节来源**
|
||||
- [subscription.py:11-37](file://backend/app/models/subscription.py#L11-L37)
|
||||
|
||||
## 依赖关系分析
|
||||
|
||||
GEO平台的数据模型之间存在清晰的依赖关系,形成完整的业务数据流。
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
subgraph "用户层"
|
||||
Users[Users表]
|
||||
Subscriptions[Subscriptions表]
|
||||
end
|
||||
subgraph "查询层"
|
||||
Queries[Queries表]
|
||||
QueryTasks[QueryTasks表]
|
||||
end
|
||||
subgraph "内容层"
|
||||
CitationRecords[CitationRecords表]
|
||||
end
|
||||
Users --> Queries
|
||||
Users --> Subscriptions
|
||||
Queries --> CitationRecords
|
||||
Queries --> QueryTasks
|
||||
QueryTasks --> CitationRecords
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [488d0bd5ab01_initial_migration.py:23-111](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L23-L111)
|
||||
|
||||
### 外键约束分析
|
||||
|
||||
系统采用严格的外键约束确保数据完整性:
|
||||
- 查询记录删除时自动删除相关引用记录和任务记录
|
||||
- 用户删除时自动清理其所有查询和订阅信息
|
||||
- 外键约束支持级联删除,防止悬挂数据
|
||||
|
||||
### 级联操作策略
|
||||
|
||||
- **查询记录**: 删除查询时级联删除所有相关记录
|
||||
- **用户信息**: 删除用户时级联清理所有关联数据
|
||||
- **任务管理**: 支持独立的任务生命周期管理
|
||||
|
||||
**章节来源**
|
||||
- [488d0bd5ab01_initial_migration.py:55](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L55)
|
||||
- [488d0bd5ab01_initial_migration.py:74](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L74)
|
||||
- [488d0bd5ab01_initial_migration.py:92](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L92)
|
||||
|
||||
## 性能考虑
|
||||
|
||||
GEO平台的数据模型在设计时充分考虑了性能优化,采用多种策略提升查询效率和系统吞吐量。
|
||||
|
||||
### 索引优化策略
|
||||
|
||||
#### 查询模型索引
|
||||
- `idx_queries_user_id`: 支持按用户快速检索查询
|
||||
- `idx_queries_status`: 优化状态过滤查询
|
||||
- `idx_queries_next_query_at`: 加速调度任务查询
|
||||
|
||||
#### 引用记录模型索引
|
||||
- `idx_citation_records_query_id`: 支持按查询分组统计
|
||||
- `idx_citation_records_queried_at`: 时间序列分析优化
|
||||
- `idx_citation_records_platform`: 平台维度查询优化
|
||||
|
||||
#### 查询任务模型索引
|
||||
- `idx_query_tasks_status`: 任务状态筛选优化
|
||||
|
||||
### 查询优化建议
|
||||
|
||||
1. **批量操作**: 使用批量插入和更新减少数据库往返
|
||||
2. **延迟加载**: 对于大型JSONB字段采用延迟加载策略
|
||||
3. **分页查询**: 对大量数据采用分页机制避免内存溢出
|
||||
4. **缓存策略**: 结合Redis缓存热点数据
|
||||
|
||||
### 数据库连接管理
|
||||
|
||||
系统使用异步数据库连接池,配置如下:
|
||||
- 连接超时: 60秒
|
||||
- 连接池大小: 10-20个连接
|
||||
- 自动重连: 启用连接池重连机制
|
||||
|
||||
**章节来源**
|
||||
- [database.py:6-18](file://backend/app/database.py#L6-L18)
|
||||
|
||||
## 故障排除指南
|
||||
|
||||
### 常见问题诊断
|
||||
|
||||
#### 数据库连接问题
|
||||
- 检查DATABASE_URL配置是否正确
|
||||
- 验证PostgreSQL服务可用性
|
||||
- 确认网络连接和防火墙设置
|
||||
|
||||
#### 索引性能问题
|
||||
- 分析慢查询日志识别瓶颈
|
||||
- 检查索引使用情况
|
||||
- 考虑添加复合索引优化查询
|
||||
|
||||
#### 数据一致性问题
|
||||
- 验证外键约束是否正确设置
|
||||
- 检查级联删除行为
|
||||
- 确认事务边界设置
|
||||
|
||||
### 调试工具使用
|
||||
|
||||
1. **数据库监控**: 使用EXPLAIN ANALYZE分析查询计划
|
||||
2. **连接池监控**: 监控连接池使用率和等待时间
|
||||
3. **慢查询追踪**: 启用慢查询日志分析性能瓶颈
|
||||
|
||||
**章节来源**
|
||||
- [config.py:7](file://backend/app/config.py#L7)
|
||||
|
||||
## 结论
|
||||
|
||||
GEO平台的数据模型设计体现了现代Web应用的最佳实践,通过清晰的实体关系、完善的索引策略和异步数据库访问实现了高性能和高可用性。模型设计充分考虑了业务需求的复杂性,支持多平台内容监控、智能调度和详细统计分析。
|
||||
|
||||
系统的主要优势包括:
|
||||
- **强类型设计**: 所有字段都有明确的类型定义和约束
|
||||
- **关系完整性**: 通过外键约束和级联操作确保数据一致性
|
||||
- **性能优化**: 合理的索引策略和异步数据库访问
|
||||
- **扩展性**: 模块化的架构设计便于功能扩展
|
||||
|
||||
未来可以考虑的改进方向:
|
||||
- 添加审计日志支持
|
||||
- 实现数据分区策略
|
||||
- 增加数据备份和恢复机制
|
||||
- 优化大数据量场景下的查询性能
|
||||
|
|
@ -0,0 +1,388 @@
|
|||
# 核心框架配置
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [main.py](file://backend/app/main.py)
|
||||
- [config.py](file://backend/app/config.py)
|
||||
- [database.py](file://backend/app/database.py)
|
||||
- [deps.py](file://backend/app/api/deps.py)
|
||||
- [scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [auth.py](file://backend/app/api/auth.py)
|
||||
- [queries.py](file://backend/app/api/queries.py)
|
||||
- [query.py](file://backend/app/models/query.py)
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [requirements.txt](file://backend/requirements.txt)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排除指南](#故障排除指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件面向GEO后端核心框架的配置与运行机制,重点覆盖以下方面:
|
||||
- FastAPI应用实例的初始化、应用名称与版本管理、生命周期管理(lifespan)
|
||||
- CORS中间件配置策略
|
||||
- 路由注册机制与健康检查端点
|
||||
- 配置管理系统(环境变量读取、配置验证、默认值设置)
|
||||
- 数据库连接池配置、异步会话管理与连接超时设置
|
||||
- 应用启动与关闭流程及资源清理机制
|
||||
- 配置最佳实践与常见问题解决方案
|
||||
|
||||
## 项目结构
|
||||
后端采用分层组织方式,核心入口位于应用根目录,按功能划分为API、模型、服务、工具与工作器等模块。Docker Compose用于编排数据库、缓存与前后端服务。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "后端应用"
|
||||
MAIN["app/main.py<br/>应用入口与生命周期"]
|
||||
CONF["app/config.py<br/>配置定义与默认值"]
|
||||
DB["app/database.py<br/>异步引擎与会话工厂"]
|
||||
API_AUTH["app/api/auth.py<br/>认证路由"]
|
||||
API_QUERIES["app/api/queries.py<br/>查询路由"]
|
||||
DEPS["app/api/deps.py<br/>依赖注入与鉴权"]
|
||||
SCHED["app/workers/scheduler.py<br/>定时任务调度器"]
|
||||
MODEL_QUERY["app/models/query.py<br/>查询模型"]
|
||||
end
|
||||
subgraph "外部服务"
|
||||
DB_SRV["PostgreSQL"]
|
||||
REDIS_SRV["Redis"]
|
||||
end
|
||||
MAIN --> API_AUTH
|
||||
MAIN --> API_QUERIES
|
||||
MAIN --> SCHED
|
||||
API_AUTH --> DEPS
|
||||
API_QUERIES --> DEPS
|
||||
DEPS --> DB
|
||||
SCHED --> DB
|
||||
DB --> DB_SRV
|
||||
CONF --> DB
|
||||
CONF --> SCHED
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
- [scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
|
||||
章节来源
|
||||
- [main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
## 核心组件
|
||||
- 应用入口与生命周期:通过FastAPI实例与lifespan钩子实现启动与关闭阶段的资源管理。
|
||||
- 配置系统:基于Pydantic Settings的环境变量读取与默认值设置,支持.env文件加载。
|
||||
- 数据库层:SQLAlchemy 2.0异步引擎与会话工厂,提供异步依赖注入。
|
||||
- 路由与中间件:统一注册各业务路由,配置CORS以支持前端跨域访问。
|
||||
- 定时任务:APScheduler驱动的异步调度器,周期性检查并执行到期查询任务。
|
||||
|
||||
章节来源
|
||||
- [main.py:13-48](file://backend/app/main.py#L13-L48)
|
||||
- [config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
|
||||
## 架构总览
|
||||
下图展示从应用启动到请求处理的关键路径,包括配置加载、数据库会话、依赖注入与定时任务启动/关闭。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Uvicorn as "Uvicorn服务器"
|
||||
participant App as "FastAPI应用(main.py)"
|
||||
participant Lifespan as "生命周期(lifespan)"
|
||||
participant Scheduler as "查询调度器(scheduler.py)"
|
||||
participant Router as "路由(auth/queries)"
|
||||
participant Deps as "依赖注入(deps.py)"
|
||||
participant DB as "数据库(database.py)"
|
||||
Uvicorn->>App : 启动应用
|
||||
App->>Lifespan : 调用lifespan(启动)
|
||||
Lifespan->>Scheduler : start()
|
||||
Scheduler-->>Lifespan : 调度器就绪
|
||||
Lifespan-->>App : 启动完成
|
||||
Router->>Deps : 解析依赖(鉴权/数据库)
|
||||
Deps->>DB : 获取异步会话
|
||||
DB-->>Deps : 返回AsyncSession
|
||||
Deps-->>Router : 当前用户/数据库会话
|
||||
Uvicorn-->>App : 关闭信号
|
||||
App->>Lifespan : 调用lifespan(关闭)
|
||||
Lifespan->>Scheduler : shutdown()
|
||||
Scheduler-->>Lifespan : 调度器关闭
|
||||
Lifespan-->>App : 清理完成
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [main.py:13-48](file://backend/app/main.py#L13-L48)
|
||||
- [scheduler.py:30-90](file://backend/app/workers/scheduler.py#L30-L90)
|
||||
- [deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [database.py:23-29](file://backend/app/database.py#L23-L29)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### FastAPI应用初始化与生命周期管理
|
||||
- 应用名称与版本:在应用实例中设置标题与版本号,便于API文档与监控识别。
|
||||
- 生命周期钩子:通过lifespan上下文管理器在启动时导入ORM模型并启动调度器,在关闭时优雅停止调度器与资源清理。
|
||||
- 健康检查端点:提供简单健康检查接口,便于容器编排与负载均衡探测。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["应用启动"]) --> ImportModels["导入ORM模型"]
|
||||
ImportModels --> StartScheduler["启动查询调度器"]
|
||||
StartScheduler --> Ready(["应用就绪"])
|
||||
Shutdown(["应用关闭"]) --> StopScheduler["停止查询调度器"]
|
||||
StopScheduler --> Cleanup(["资源清理完成"])
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [main.py:13-28](file://backend/app/main.py#L13-L28)
|
||||
- [scheduler.py:30-90](file://backend/app/workers/scheduler.py#L30-L90)
|
||||
|
||||
章节来源
|
||||
- [main.py:13-48](file://backend/app/main.py#L13-L48)
|
||||
|
||||
### CORS中间件配置
|
||||
- 允许来源:配置允许的前端地址,支持凭证传递。
|
||||
- 方法与头:开放所有HTTP方法与请求头,简化跨域交互。
|
||||
- 生效范围:对整个应用生效,确保前端与后端API通信顺畅。
|
||||
|
||||
章节来源
|
||||
- [main.py:30-36](file://backend/app/main.py#L30-L36)
|
||||
|
||||
### 路由注册机制
|
||||
- 统一前缀与标签:各业务路由均带有统一的版本前缀与中文标签,便于API文档分类与维护。
|
||||
- 依赖注入:路由函数通过Depends获取数据库会话与当前用户信息,实现鉴权与数据访问分离。
|
||||
- 特殊路由:部分路由(如立即执行)复用同一前缀,保持命名一致性。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
APP["FastAPI应用(main.py)"] --> AUTH["认证路由(auth.py)"]
|
||||
APP --> QUERIES["查询路由(queries.py)"]
|
||||
AUTH --> DEPS["依赖注入(deps.py)"]
|
||||
QUERIES --> DEPS
|
||||
DEPS --> DB["数据库会话(database.py)"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [main.py:38-42](file://backend/app/main.py#L38-L42)
|
||||
- [auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
- [database.py:23-29](file://backend/app/database.py#L23-L29)
|
||||
|
||||
章节来源
|
||||
- [main.py:38-42](file://backend/app/main.py#L38-L42)
|
||||
- [auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
|
||||
### 健康检查端点
|
||||
- 路径:/health
|
||||
- 响应:返回状态字典,用于容器编排与服务发现。
|
||||
- 用途:Kubernetes/Compose健康检查、负载均衡探活。
|
||||
|
||||
章节来源
|
||||
- [main.py:45-48](file://backend/app/main.py#L45-L48)
|
||||
|
||||
### 配置管理系统
|
||||
- 配置类:使用Pydantic Settings定义配置项,支持.env文件加载与额外字段忽略。
|
||||
- 默认值:数据库URL、Redis URL、JWT密钥与过期时间、浏览器自动化路径、第三方平台API Key等均有合理默认值。
|
||||
- 加载顺序:优先读取环境变量,未设置时使用默认值;生产环境务必覆盖敏感配置。
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class Settings {
|
||||
+DATABASE_URL : str
|
||||
+REDIS_URL : str
|
||||
+JWT_SECRET : str
|
||||
+JWT_EXPIRE_HOURS : int
|
||||
+PLAYWRIGHT_BROWSERS_PATH : str
|
||||
+ZHIPU_API_KEY : str
|
||||
+TONGYI_API_KEY : str
|
||||
}
|
||||
class ConfigInstance {
|
||||
+settings : Settings
|
||||
}
|
||||
Settings <.. ConfigInstance : "实例化"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [config.py:4-16](file://backend/app/config.py#L4-L16)
|
||||
|
||||
章节来源
|
||||
- [config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
### 数据库连接池与异步会话管理
|
||||
- 引擎创建:基于配置中的数据库URL创建异步引擎,echo关闭,future启用。
|
||||
- 会话工厂:配置会话类、过期策略、自动刷新与提交行为,确保事务一致性。
|
||||
- 依赖注入:通过异步生成器提供会话,确保每次请求结束后正确关闭会话。
|
||||
- 连接超时:当前实现未显式设置超时参数,建议在生产环境根据数据库性能调优。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Router as "路由函数"
|
||||
participant Deps as "依赖注入(deps.py)"
|
||||
participant SessionMaker as "AsyncSessionLocal"
|
||||
participant Engine as "异步引擎"
|
||||
participant DB as "数据库"
|
||||
Router->>Deps : 调用get_db()
|
||||
Deps->>SessionMaker : 创建会话
|
||||
SessionMaker->>Engine : 获取连接
|
||||
Engine-->>SessionMaker : 返回连接
|
||||
SessionMaker-->>Deps : 返回AsyncSession
|
||||
Deps-->>Router : 提供会话
|
||||
Router->>DB : 执行查询/更新
|
||||
Router-->>Deps : 请求结束
|
||||
Deps->>SessionMaker : 关闭会话
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [database.py:6-29](file://backend/app/database.py#L6-L29)
|
||||
- [deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
|
||||
章节来源
|
||||
- [database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
|
||||
### 定时任务调度与资源清理
|
||||
- 调度器:使用APScheduler的AsyncIOScheduler,每小时检查并执行到期查询。
|
||||
- 启动流程:应用启动时调用start(),注册检查任务并启动调度器。
|
||||
- 关闭流程:应用关闭时调用shutdown(),停止调度器并关闭相关资源。
|
||||
- 任务执行:异步查询执行,异常捕获与日志记录,避免单个任务失败影响整体。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["应用启动"]) --> InitScheduler["初始化调度器"]
|
||||
InitScheduler --> AddJob["添加定时任务(每小时)"]
|
||||
AddJob --> Run["启动调度器"]
|
||||
Run --> Tick["定时触发"]
|
||||
Tick --> Check["查询到期任务"]
|
||||
Check --> Exec["执行查询任务"]
|
||||
Exec --> Tick
|
||||
Shutdown(["应用关闭"]) --> Stop["停止调度器"]
|
||||
Stop --> CloseRes["关闭资源"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
|
||||
章节来源
|
||||
- [scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
|
||||
### 依赖注入与鉴权流程
|
||||
- OAuth2方案:使用OAuth2PasswordBearer,令牌端点为认证登录接口。
|
||||
- 用户解析:从令牌中提取用户ID,查询数据库获取用户对象。
|
||||
- 异常处理:令牌无效或用户不存在时抛出401错误,确保鉴权安全。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant Router as "路由(auth.py)"
|
||||
participant Deps as "依赖注入(deps.py)"
|
||||
participant DB as "数据库(database.py)"
|
||||
Client->>Router : 发起受保护请求
|
||||
Router->>Deps : 解析令牌与会话
|
||||
Deps->>Deps : 验证JWT令牌
|
||||
Deps->>DB : 查询用户信息
|
||||
DB-->>Deps : 返回用户对象
|
||||
Deps-->>Router : 返回当前用户
|
||||
Router-->>Client : 返回响应
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [database.py:23-29](file://backend/app/database.py#L23-L29)
|
||||
|
||||
章节来源
|
||||
- [auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
|
||||
## 依赖分析
|
||||
- 应用入口依赖:路由模块、调度器与CORS中间件。
|
||||
- 路由依赖:依赖注入模块与数据库会话。
|
||||
- 调度器依赖:数据库会话工厂与查询模型。
|
||||
- 配置依赖:数据库与缓存URL、JWT配置等。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
MAIN["main.py"] --> ROUTERS["API路由模块"]
|
||||
MAIN --> SCHED["scheduler.py"]
|
||||
MAIN --> CORS["CORS中间件"]
|
||||
ROUTERS --> DEPS["deps.py"]
|
||||
DEPS --> DB["database.py"]
|
||||
SCHED --> DB
|
||||
SCHED --> MODEL["models/query.py"]
|
||||
CONF["config.py"] --> DB
|
||||
CONF --> SCHED
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
- [database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
章节来源
|
||||
- [main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
|
||||
## 性能考虑
|
||||
- 异步数据库:使用SQLAlchemy异步引擎与会话,减少阻塞,提升并发能力。
|
||||
- 连接池参数:当前未显式设置连接池大小与超时,建议结合数据库性能与负载进行调优。
|
||||
- 定时任务频率:每小时检查一次,可根据业务量调整间隔,避免频繁扫描。
|
||||
- CORS开放策略:生产环境建议限制允许来源与方法,降低跨域风险。
|
||||
|
||||
## 故障排除指南
|
||||
- 数据库连接失败
|
||||
- 检查数据库URL与凭据是否正确,确认容器网络连通性。
|
||||
- 参考:[database.py:6-10](file://backend/app/database.py#L6-L10)
|
||||
- Redis连接失败
|
||||
- 确认Redis服务可用与URL正确,检查容器健康检查状态。
|
||||
- 参考:[config.py](file://backend/app/config.py#L8)
|
||||
- JWT鉴权失败
|
||||
- 确认JWT密钥与过期时间配置,检查令牌格式与有效期。
|
||||
- 参考:[config.py:9-10](file://backend/app/config.py#L9-L10)
|
||||
- 定时任务不执行
|
||||
- 检查调度器启动日志与任务注册状态,确认数据库中查询状态与下次执行时间。
|
||||
- 参考:[scheduler.py:30-40](file://backend/app/workers/scheduler.py#L30-L40)
|
||||
- CORS跨域问题
|
||||
- 确认允许来源与凭证设置,生产环境建议收紧策略。
|
||||
- 参考:[main.py:30-36](file://backend/app/main.py#L30-L36)
|
||||
|
||||
章节来源
|
||||
- [database.py:6-10](file://backend/app/database.py#L6-L10)
|
||||
- [config.py:8-10](file://backend/app/config.py#L8-L10)
|
||||
- [scheduler.py:30-40](file://backend/app/workers/scheduler.py#L30-L40)
|
||||
- [main.py:30-36](file://backend/app/main.py#L30-L36)
|
||||
|
||||
## 结论
|
||||
GEO后端核心框架通过清晰的分层设计与异步化实现,提供了可扩展的配置体系、健壮的数据库会话管理与可靠的定时任务调度。建议在生产环境中完善连接池参数、收紧CORS策略并强化配置校验,以获得更稳定与安全的运行表现。
|
||||
|
||||
## 附录
|
||||
- 环境变量与默认值
|
||||
- 数据库URL:用于PostgreSQL连接,默认值见配置文件。
|
||||
- Redis URL:用于缓存与任务队列,默认值见配置文件。
|
||||
- JWT密钥与过期时间:用于令牌签发与验证。
|
||||
- 浏览器自动化路径:Playwright浏览器二进制路径。
|
||||
- 第三方平台API Key:用于集成外部服务。
|
||||
- Docker编排
|
||||
- 数据库与缓存服务通过Compose编排,后端服务依赖其健康状态。
|
||||
- 前端服务依赖后端服务,命令行启动参数可按需调整。
|
||||
|
||||
章节来源
|
||||
- [config.py:7-14](file://backend/app/config.py#L7-L14)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
|
@ -0,0 +1,505 @@
|
|||
# 认证系统
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/models/user.py](file://backend/app/models/user.py)
|
||||
- [backend/app/schemas/auth.py](file://backend/app/schemas/auth.py)
|
||||
- [backend/app/services/auth.py](file://backend/app/services/auth.py)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/api/deps.py](file://backend/app/api/deps.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py)
|
||||
- [frontend/lib/auth.ts](file://frontend/lib/auth.ts)
|
||||
- [frontend/lib/api.ts](file://frontend/lib/api.ts)
|
||||
- [frontend/app/(auth)/login/page.tsx](file://frontend/app/(auth)/login/page.tsx)
|
||||
- [frontend/app/(auth)/register/page.tsx](file://frontend/app/(auth)/register/page.tsx)
|
||||
- [frontend/app/api/auth/[...nextauth]/route.ts](file://frontend/app/api/auth/[...nextauth]/route.ts)
|
||||
- [tests/test_auth.py](file://tests/test_auth.py)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件为 GEO 平台认证系统的实现文档,覆盖用户模型设计、JWT 令牌生成与验证、用户注册与登录流程、依赖注入在认证中的应用(当前用户获取与权限检查)、安全最佳实践、令牌存储策略与会话管理,以及认证错误处理与调试指南。系统采用后端 FastAPI + SQLAlchemy 异步 ORM + PostgreSQL + Redis(配置项存在);前端使用 NextAuth.js 作为会话与令牌桥接层,通过自定义凭据提供者对接后端认证接口。
|
||||
|
||||
## 项目结构
|
||||
后端认证相关模块组织如下:
|
||||
- 模型层:用户模型定义与数据库表结构
|
||||
- 模式层:Pydantic 数据校验模型(注册、登录、响应)
|
||||
- 服务层:密码哈希/校验、JWT 编码/解码、注册与认证业务逻辑
|
||||
- API 层:认证路由(注册、登录、当前用户)、依赖注入解析当前用户
|
||||
- 配置与数据库:设置加载、异步数据库引擎与会话工厂
|
||||
- 前端:NextAuth 配置、API 客户端、登录/注册页面
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "后端"
|
||||
M["models/user.py<br/>用户模型"]
|
||||
S["schemas/auth.py<br/>Pydantic 模型"]
|
||||
SV["services/auth.py<br/>密码/JWT/业务"]
|
||||
A["api/auth.py<br/>认证路由"]
|
||||
D["api/deps.py<br/>依赖注入解析当前用户"]
|
||||
CFG["config.py<br/>配置"]
|
||||
DB["database.py<br/>异步数据库"]
|
||||
MAIN["main.py<br/>应用入口"]
|
||||
MIG["alembic 迁移<br/>initial_migration.py"]
|
||||
end
|
||||
subgraph "前端"
|
||||
FA["lib/api.ts<br/>API 客户端"]
|
||||
FN["lib/auth.ts<br/>NextAuth 配置"]
|
||||
LR["app/(auth)/login/page.tsx<br/>登录页"]
|
||||
RR["app/(auth)/register/page.tsx<br/>注册页"]
|
||||
NAR["app/api/auth/[...nextauth]/route.ts<br/>NextAuth 路由"]
|
||||
end
|
||||
LR --> FA
|
||||
RR --> FA
|
||||
FA --> A
|
||||
A --> SV
|
||||
SV --> DB
|
||||
D --> SV
|
||||
D --> DB
|
||||
MAIN --> A
|
||||
MAIN --> D
|
||||
CFG --> SV
|
||||
CFG --> DB
|
||||
MIG --> DB
|
||||
FN --> NAR
|
||||
FA --> FN
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/app/schemas/auth.py:7-34](file://backend/app/schemas/auth.py#L7-L34)
|
||||
- [backend/app/services/auth.py:16-69](file://backend/app/services/auth.py#L16-L69)
|
||||
- [backend/app/api/auth.py:13-43](file://backend/app/api/auth.py#L13-L43)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/config.py:4-17](file://backend/app/config.py#L4-L17)
|
||||
- [backend/app/database.py:6-29](file://backend/app/database.py#L6-L29)
|
||||
- [backend/app/main.py:24-48](file://backend/app/main.py#L24-L48)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:21-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L21-L128)
|
||||
- [frontend/lib/api.ts:23-36](file://frontend/lib/api.ts#L23-L36)
|
||||
- [frontend/lib/auth.ts:5-56](file://frontend/lib/auth.ts#L5-L56)
|
||||
- [frontend/app/(auth)/login/page.tsx:19-93](file://frontend/app/(auth)/login/page.tsx#L19-L93)
|
||||
- [frontend/app/(auth)/register/page.tsx:20-128](file://frontend/app/(auth)/register/page.tsx#L20-L128)
|
||||
- [frontend/app/api/auth/[...nextauth]/route.ts:1-6](file://frontend/app/api/auth/[...nextauth]/route.ts#L1-L6)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:24-48](file://backend/app/main.py#L24-L48)
|
||||
- [backend/app/api/auth.py:13-43](file://backend/app/api/auth.py#L13-L43)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/services/auth.py:16-69](file://backend/app/services/auth.py#L16-L69)
|
||||
- [backend/app/schemas/auth.py:7-34](file://backend/app/schemas/auth.py#L7-L34)
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/app/config.py:4-17](file://backend/app/config.py#L4-L17)
|
||||
- [backend/app/database.py:6-29](file://backend/app/database.py#L6-L29)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:21-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L21-L128)
|
||||
- [frontend/lib/api.ts:23-36](file://frontend/lib/api.ts#L23-L36)
|
||||
- [frontend/lib/auth.ts:5-56](file://frontend/lib/auth.ts#L5-L56)
|
||||
- [frontend/app/(auth)/login/page.tsx:19-93](file://frontend/app/(auth)/login/page.tsx#L19-L93)
|
||||
- [frontend/app/(auth)/register/page.tsx:20-128](file://frontend/app/(auth)/register/page.tsx#L20-L128)
|
||||
- [frontend/app/api/auth/[...nextauth]/route.ts:1-6](file://frontend/app/api/auth/[...nextauth]/route.ts#L1-L6)
|
||||
|
||||
## 核心组件
|
||||
- 用户模型与数据库表
|
||||
- 字段与约束:UUID 主键、唯一邮箱、密码哈希、可选姓名、计划与配额、激活状态、时间戳等
|
||||
- 关系:与查询与订阅的一对多级联删除
|
||||
- Pydantic 模式
|
||||
- 注册:邮箱、密码最小长度、姓名长度限制
|
||||
- 登录:邮箱与密码
|
||||
- 响应:用户信息与创建时间
|
||||
- 服务层
|
||||
- 密码:bcrypt 上下文进行哈希与校验
|
||||
- JWT:HS256 签名,基于配置的密钥与过期时长
|
||||
- 注册:去重检查、哈希密码、持久化
|
||||
- 认证:邮箱查找、密码校验
|
||||
- API 层
|
||||
- 注册:接收注册体,调用服务,异常转 HTTP
|
||||
- 登录:认证失败返回未授权,成功签发访问令牌
|
||||
- 当前用户:通过依赖注入解析当前用户
|
||||
- 依赖注入
|
||||
- OAuth2 密码流解析 Bearer 令牌,解码 JWT,按 ID 查询用户
|
||||
- 配置与数据库
|
||||
- 设置:数据库 URL、Redis URL、JWT 密钥、过期小时数等
|
||||
- 引擎:异步 PostgreSQL 引擎与会话工厂
|
||||
- 前端集成
|
||||
- NextAuth 使用凭据提供者对接后端登录接口,以 JWT 策略存储会话
|
||||
|
||||
章节来源
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:21-37](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L21-L37)
|
||||
- [backend/app/schemas/auth.py:7-34](file://backend/app/schemas/auth.py#L7-L34)
|
||||
- [backend/app/services/auth.py:16-69](file://backend/app/services/auth.py#L16-L69)
|
||||
- [backend/app/api/auth.py:13-43](file://backend/app/api/auth.py#L13-L43)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/config.py:4-17](file://backend/app/config.py#L4-L17)
|
||||
- [backend/app/database.py:6-29](file://backend/app/database.py#L6-L29)
|
||||
- [frontend/lib/auth.ts:5-56](file://frontend/lib/auth.ts#L5-L56)
|
||||
- [frontend/lib/api.ts:23-36](file://frontend/lib/api.ts#L23-L36)
|
||||
|
||||
## 架构总览
|
||||
后端认证流程概览:
|
||||
- 前端登录/注册页面通过 API 客户端调用后端 /api/v1/auth/* 接口
|
||||
- 后端服务层完成密码哈希/校验与 JWT 签发
|
||||
- 依赖注入层解析 Bearer 令牌并解析当前用户
|
||||
- 前端 NextAuth 将后端返回的访问令牌存入本地会话(JWT 策略)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant FE as "前端页面"
|
||||
participant API as "API 客户端"
|
||||
participant AUTH as "后端认证路由"
|
||||
participant SVC as "认证服务"
|
||||
participant DB as "数据库"
|
||||
participant DEP as "依赖注入"
|
||||
FE->>API : "提交登录/注册表单"
|
||||
API->>AUTH : "POST /api/v1/auth/login 或 /register"
|
||||
AUTH->>SVC : "authenticate_user/register_user"
|
||||
SVC->>DB : "查询/插入用户"
|
||||
DB-->>SVC : "返回用户/结果"
|
||||
SVC-->>AUTH : "返回用户或令牌"
|
||||
AUTH-->>API : "返回 {access_token, user}"
|
||||
API-->>FE : "保存令牌并跳转"
|
||||
FE->>DEP : "后续请求携带 Authorization : Bearer"
|
||||
DEP->>SVC : "verify_token 解码"
|
||||
SVC-->>DEP : "payload(sub)"
|
||||
DEP->>DB : "按ID查询用户"
|
||||
DB-->>DEP : "返回用户"
|
||||
DEP-->>FE : "注入当前用户"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [frontend/lib/api.ts:23-36](file://frontend/lib/api.ts#L23-L36)
|
||||
- [backend/app/api/auth.py:13-43](file://backend/app/api/auth.py#L13-L43)
|
||||
- [backend/app/services/auth.py:37-69](file://backend/app/services/auth.py#L37-L69)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [frontend/lib/auth.ts:13-32](file://frontend/lib/auth.ts#L13-L32)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 用户模型与数据库表
|
||||
- 字段与类型
|
||||
- id: UUID 主键,默认生成
|
||||
- email: 字符串,唯一且非空
|
||||
- password_hash: 字符串,非空
|
||||
- name: 字符串,可空
|
||||
- plan: 字符串,默认 "free"
|
||||
- max_queries: 整数,默认 5
|
||||
- is_active: 布尔,默认 true
|
||||
- created_at/updated_at: 时间戳,默认当前时间,更新时同步
|
||||
- 约束与索引
|
||||
- 唯一约束:email
|
||||
- 外键:查询与订阅表关联 users(id),级联删除
|
||||
- 关系
|
||||
- 一对多:用户 -> 查询、订阅
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
USERS {
|
||||
uuid id PK
|
||||
string email UK
|
||||
string password_hash
|
||||
string name
|
||||
string plan
|
||||
int max_queries
|
||||
boolean is_active
|
||||
timestamp created_at
|
||||
timestamp updated_at
|
||||
}
|
||||
QUERIES {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
timestamp last_queried_at
|
||||
timestamp next_query_at
|
||||
timestamp created_at
|
||||
timestamp updated_at
|
||||
}
|
||||
SUBSCRIPTIONS {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string plan
|
||||
string status
|
||||
date start_date
|
||||
date end_date
|
||||
numeric amount
|
||||
string payment_method
|
||||
string payment_id
|
||||
timestamp created_at
|
||||
}
|
||||
USERS ||--o{ QUERIES : "拥有"
|
||||
USERS ||--o{ SUBSCRIPTIONS : "拥有"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:21-112](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L21-L112)
|
||||
|
||||
章节来源
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:21-112](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L21-L112)
|
||||
|
||||
### JWT 令牌生成与验证机制
|
||||
- 签名算法:HS256
|
||||
- 密钥来源:配置项 JWT_SECRET
|
||||
- 过期时间:配置项 JWT_EXPIRE_HOURS(小时)
|
||||
- 生成流程
|
||||
- 以用户 ID 作为 sub,加入过期时间,使用 HS256 签名编码
|
||||
- 验证流程
|
||||
- 使用相同密钥与算法解码,提取 sub(用户 ID),再按 ID 查询用户
|
||||
- 刷新策略
|
||||
- 代码中未实现刷新令牌机制;当前为一次性访问令牌,过期后需重新登录
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["开始"]) --> Encode["构造载荷{sub, exp}<br/>HS256 签名"]
|
||||
Encode --> Token["生成 access_token"]
|
||||
Token --> Verify["解码 access_token<br/>校验算法与密钥"]
|
||||
Verify --> ParseSub["提取 sub(用户ID)"]
|
||||
ParseSub --> Lookup["按ID查询用户"]
|
||||
Lookup --> Done(["结束"])
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/services/auth.py:24-34](file://backend/app/services/auth.py#L24-L34)
|
||||
- [backend/app/api/deps.py:26-37](file://backend/app/api/deps.py#L26-L37)
|
||||
- [backend/app/config.py:9-10](file://backend/app/config.py#L9-L10)
|
||||
|
||||
章节来源
|
||||
- [backend/app/services/auth.py:24-34](file://backend/app/services/auth.py#L24-L34)
|
||||
- [backend/app/api/deps.py:26-37](file://backend/app/api/deps.py#L26-L37)
|
||||
- [backend/app/config.py:9-10](file://backend/app/config.py#L9-L10)
|
||||
|
||||
### 用户注册流程
|
||||
- 输入校验:邮箱格式、密码最小长度、姓名长度
|
||||
- 去重检查:按邮箱查询,若已存在则抛出错误
|
||||
- 密码处理:bcrypt 哈希
|
||||
- 持久化:创建用户记录并提交事务
|
||||
- 返回:用户信息(不含敏感字段)
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
RStart(["注册入口"]) --> Validate["Pydantic 校验"]
|
||||
Validate --> CheckDup["按邮箱查重"]
|
||||
CheckDup --> Dup{"已存在?"}
|
||||
Dup -- 是 --> Err["抛出错误"]
|
||||
Dup -- 否 --> Hash["bcrypt 哈希密码"]
|
||||
Hash --> Persist["创建用户并提交"]
|
||||
Persist --> RDone(["返回用户信息"])
|
||||
Err --> RDone
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/schemas/auth.py:7-11](file://backend/app/schemas/auth.py#L7-L11)
|
||||
- [backend/app/services/auth.py:37-52](file://backend/app/services/auth.py#L37-L52)
|
||||
|
||||
章节来源
|
||||
- [backend/app/schemas/auth.py:7-11](file://backend/app/schemas/auth.py#L7-L11)
|
||||
- [backend/app/services/auth.py:37-52](file://backend/app/services/auth.py#L37-L52)
|
||||
- [tests/test_auth.py:25-58](file://tests/test_auth.py#L25-L58)
|
||||
|
||||
### 登录认证过程
|
||||
- 输入校验:邮箱与密码
|
||||
- 用户查找:按邮箱查询
|
||||
- 凭据验证:bcrypt 校验密码哈希
|
||||
- 会话管理:签发访问令牌(Bearer)
|
||||
- 权限分配:当前用户解析后,后续路由可依赖注入获取用户
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as "客户端"
|
||||
participant A as "认证路由"
|
||||
participant S as "认证服务"
|
||||
participant DB as "数据库"
|
||||
C->>A : "POST /login {email,password}"
|
||||
A->>S : "authenticate_user(email,password)"
|
||||
S->>DB : "select * from users where email=?"
|
||||
DB-->>S : "用户或空"
|
||||
S->>S : "bcrypt 校验"
|
||||
S-->>A : "用户或None"
|
||||
A-->>C : "{access_token,bearer,user}"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/auth.py:22-37](file://backend/app/api/auth.py#L22-L37)
|
||||
- [backend/app/services/auth.py:55-69](file://backend/app/services/auth.py#L55-L69)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:22-37](file://backend/app/api/auth.py#L22-L37)
|
||||
- [backend/app/services/auth.py:55-69](file://backend/app/services/auth.py#L55-L69)
|
||||
- [tests/test_auth.py:61-84](file://tests/test_auth.py#L61-L84)
|
||||
|
||||
### 依赖注入与当前用户获取
|
||||
- OAuth2 密码流:tokenUrl 指向 /api/v1/auth/login
|
||||
- 依赖函数 get_current_user:
|
||||
- 从 Authorization 头解析 Bearer 令牌
|
||||
- 调用 verify_token 解码
|
||||
- 提取 sub(UUID),查询用户
|
||||
- 不存在或解码失败返回未授权
|
||||
- 在路由中使用:Depends(get_current_user) 获取当前用户
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
DS(["依赖注入入口"]) --> Parse["OAuth2 解析 Bearer"]
|
||||
Parse --> Decode["verify_token 解码"]
|
||||
Decode --> Sub{"sub 是否存在?"}
|
||||
Sub -- 否 --> Unauth["401 未授权"]
|
||||
Sub -- 是 --> Query["按ID查询用户"]
|
||||
Query --> Found{"找到用户?"}
|
||||
Found -- 否 --> Unauth
|
||||
Found -- 是 --> Inject["注入当前用户"]
|
||||
Inject --> Done(["下游路由可用"])
|
||||
Unauth --> Done
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/services/auth.py:32-34](file://backend/app/services/auth.py#L32-L34)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/api/auth.py:40-42](file://backend/app/api/auth.py#L40-L42)
|
||||
- [tests/test_auth.py:87-104](file://tests/test_auth.py#L87-L104)
|
||||
|
||||
### 前端集成与会话管理
|
||||
- NextAuth 配置
|
||||
- 凭据提供者:向后端发送邮箱/密码
|
||||
- authorize:调用后端登录接口,成功则将 access_token 与用户信息写入 JWT
|
||||
- 会话策略:jwt
|
||||
- 回调:将 access_token 与用户 ID 写入 token/session
|
||||
- 页面交互
|
||||
- 登录页:收集邮箱/密码,调用 signIn("credentials"),错误提示“邮箱或密码错误”
|
||||
- 注册页:校验两次密码一致,调用后端注册,随后自动登录
|
||||
- API 客户端
|
||||
- 自动在请求头添加 Authorization: Bearer token
|
||||
- 统一错误处理与响应解析
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant L as "登录页"
|
||||
participant NA as "NextAuth"
|
||||
participant API as "后端登录"
|
||||
participant CL as "API 客户端"
|
||||
L->>NA : "signIn('credentials',{email,password})"
|
||||
NA->>API : "POST /api/v1/auth/login"
|
||||
API-->>NA : "{access_token,user}"
|
||||
NA->>NA : "回调写入 token.accessToken/token.id"
|
||||
NA-->>L : "登录成功,跳转仪表盘"
|
||||
L->>CL : "后续请求携带 Authorization : Bearer"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [frontend/app/(auth)/login/page.tsx:26-42](file://frontend/app/(auth)/login/page.tsx#L26-L42)
|
||||
- [frontend/app/(auth)/register/page.tsx:29-55](file://frontend/app/(auth)/register/page.tsx#L29-L55)
|
||||
- [frontend/lib/auth.ts:13-32](file://frontend/lib/auth.ts#L13-L32)
|
||||
- [frontend/lib/api.ts:12-21](file://frontend/lib/api.ts#L12-L21)
|
||||
- [frontend/app/api/auth/[...nextauth]/route.ts:1-6](file://frontend/app/api/auth/[...nextauth]/route.ts#L1-L6)
|
||||
|
||||
章节来源
|
||||
- [frontend/lib/auth.ts:5-56](file://frontend/lib/auth.ts#L5-L56)
|
||||
- [frontend/app/(auth)/login/page.tsx:19-93](file://frontend/app/(auth)/login/page.tsx#L19-L93)
|
||||
- [frontend/app/(auth)/register/page.tsx:20-128](file://frontend/app/(auth)/register/page.tsx#L20-L128)
|
||||
- [frontend/lib/api.ts:23-36](file://frontend/lib/api.ts#L23-L36)
|
||||
- [frontend/app/api/auth/[...nextauth]/route.ts:1-6](file://frontend/app/api/auth/[...nextauth]/route.ts#L1-L6)
|
||||
|
||||
## 依赖分析
|
||||
- 组件耦合
|
||||
- API 层依赖服务层与数据库依赖注入
|
||||
- 服务层依赖配置与密码/JWT工具
|
||||
- 依赖注入层依赖服务层的令牌验证与数据库查询
|
||||
- 外部依赖
|
||||
- 数据库:PostgreSQL(异步驱动)
|
||||
- 缓存:Redis(配置项存在,当前认证未直接使用)
|
||||
- 加密:bcrypt(passlib)
|
||||
- JWT:jose(python-jose)
|
||||
- 可能的循环依赖
|
||||
- 当前模块间无明显循环导入
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
API["api/auth.py"] --> SVC["services/auth.py"]
|
||||
API --> DB["database.py"]
|
||||
DEP["api/deps.py"] --> SVC
|
||||
DEP --> DB
|
||||
SVC --> CFG["config.py"]
|
||||
SVC --> DB
|
||||
MAIN["main.py"] --> API
|
||||
MAIN --> DEP
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
|
||||
## 性能考虑
|
||||
- 数据库连接
|
||||
- 使用异步引擎与会话工厂,减少阻塞
|
||||
- 会话配置:关闭自动提交/刷写,expire_on_commit 控制回话生命周期
|
||||
- 密码哈希
|
||||
- bcrypt 默认成本适中,建议在生产环境评估成本参数
|
||||
- JWT
|
||||
- HS256 为对称算法,计算开销低;注意密钥安全与轮换
|
||||
- 网络与缓存
|
||||
- Redis 配置存在但未在认证中使用,可考虑用于令牌黑名单或会话缓存(需额外实现)
|
||||
|
||||
## 故障排查指南
|
||||
- 常见错误与定位
|
||||
- 注册重复邮箱:后端抛出值错误,测试断言 400 与错误详情
|
||||
- 登录凭据错误:后端返回 401 未授权,测试断言错误详情
|
||||
- 未携带或无效令牌:依赖注入解析失败,返回 401
|
||||
- 前端登录失败
|
||||
- 检查 NextAuth authorize 是否收到 access_token
|
||||
- 确认后端 /api/v1/auth/login 返回结构
|
||||
- API 请求失败
|
||||
- 检查 Authorization 头是否正确附加 Bearer 令牌
|
||||
- 统一错误处理会将后端错误消息透传
|
||||
|
||||
章节来源
|
||||
- [tests/test_auth.py:25-84](file://tests/test_auth.py#L25-L84)
|
||||
- [backend/app/api/auth.py:17-30](file://backend/app/api/auth.py#L17-L30)
|
||||
- [backend/app/api/deps.py:20-41](file://backend/app/api/deps.py#L20-L41)
|
||||
- [frontend/lib/api.ts:16-21](file://frontend/lib/api.ts#L16-L21)
|
||||
|
||||
## 结论
|
||||
本认证系统采用清晰的分层设计:模式层负责输入校验,服务层封装密码与令牌逻辑,API 层提供 REST 接口,依赖注入统一解析当前用户。前端通过 NextAuth 与后端无缝衔接,形成完整的登录/注册与会话管理闭环。当前实现具备良好的扩展性,建议在生产环境中强化令牌刷新、黑名单与密钥轮换策略,并结合 Redis 实现更完善的会话与令牌治理。
|
||||
|
||||
## 附录
|
||||
- 安全最佳实践
|
||||
- 密钥管理:确保 JWT_SECRET 不泄露,定期轮换
|
||||
- 传输安全:启用 HTTPS,避免明文传输
|
||||
- 输入校验:前端与后端双重校验,防止越界与注入
|
||||
- 令牌策略:考虑引入刷新令牌与黑名单机制
|
||||
- 会话策略:前端 JWT 策略简单可靠,建议配合 HttpOnly Cookie 与 SameSite 策略进一步加固
|
||||
- 令牌存储策略
|
||||
- 前端:NextAuth JWT 存储在浏览器会话中
|
||||
- 后端:当前未实现服务端会话存储,建议结合 Redis 实现
|
||||
- 权限检查
|
||||
- 当前仅实现“当前用户”解析;如需细粒度权限,可在服务层增加角色/资源权限映射并在路由中校验
|
||||
|
|
@ -0,0 +1,329 @@
|
|||
# 代码规范
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
- [backend/alembic.ini](file://backend/alembic.ini)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/schemas/auth.py](file://backend/app/schemas/auth.py)
|
||||
- [backend/app/models/user.py](file://backend/app/models/user.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [tests/test_auth.py](file://tests/test_auth.py)
|
||||
- [frontend/package.json](file://frontend/package.json)
|
||||
- [.eslintrc.json](file://frontend/.eslintrc.json)
|
||||
- [frontend/tsconfig.json](file://frontend/tsconfig.json)
|
||||
- [frontend/app/layout.tsx](file://frontend/app/layout.tsx)
|
||||
- [frontend/lib/api.ts](file://frontend/lib/api.ts)
|
||||
- [frontend/components/ui/button.tsx](file://frontend/components/ui/button.tsx)
|
||||
- [frontend/types/next-auth.d.ts](file://frontend/types/next-auth.d.ts)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [引言](#引言)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 引言
|
||||
本文件为 GEO 项目的统一代码规范文档,覆盖后端 Python 与前端 TypeScript/Next.js 的风格与实践,明确命名约定、导入顺序、注释规范、类型与接口设计、模块组织、错误处理模式以及前后端一致性要求,并提供可操作的自动化检查工具配置与使用方法。文档同时给出“正确/错误”示例的路径指引,便于团队在开发过程中快速对照与改进。
|
||||
|
||||
## 项目结构
|
||||
- 后端采用 FastAPI + SQLAlchemy 2.x + Pydantic v2,使用异步数据库会话与依赖注入,模块按功能分层:API 路由、服务层、模型与模式(schemas)、工作流与调度。
|
||||
- 前端采用 Next.js App Router + TypeScript + TailwindCSS,UI 组件基于 Radix UI 与 class-variance-authority,类型扩展用于 NextAuth。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "后端"
|
||||
MAIN["app/main.py<br/>应用入口与路由挂载"]
|
||||
AUTH_API["app/api/auth.py<br/>认证路由"]
|
||||
SCHEMA_AUTH["app/schemas/auth.py<br/>认证模式"]
|
||||
MODEL_USER["app/models/user.py<br/>用户模型"]
|
||||
WORKER["app/workers/citation_engine.py<br/>引用检测引擎"]
|
||||
CONFIG["app/config.py<br/>配置读取"]
|
||||
end
|
||||
subgraph "前端"
|
||||
LAYOUT["app/layout.tsx<br/>根布局与元数据"]
|
||||
API_TS["lib/api.ts<br/>API 封装"]
|
||||
BTN["components/ui/button.tsx<br/>按钮组件"]
|
||||
TYPES["types/next-auth.d.ts<br/>NextAuth 类型扩展"]
|
||||
end
|
||||
MAIN --> AUTH_API
|
||||
AUTH_API --> SCHEMA_AUTH
|
||||
AUTH_API --> MODEL_USER
|
||||
MAIN --> WORKER
|
||||
MAIN --> CONFIG
|
||||
LAYOUT --> API_TS
|
||||
API_TS --> AUTH_API
|
||||
BTN --> LAYOUT
|
||||
TYPES --> LAYOUT
|
||||
```
|
||||
|
||||
图示来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/schemas/auth.py:1-34](file://backend/app/schemas/auth.py#L1-L34)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [frontend/app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [frontend/components/ui/button.tsx:1-57](file://frontend/components/ui/button.tsx#L1-L57)
|
||||
- [frontend/types/next-auth.d.ts:1-26](file://frontend/types/next-auth.d.ts#L1-L26)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [frontend/app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
|
||||
## 核心组件
|
||||
- 应用入口与中间件:统一设置 CORS、健康检查端点、路由挂载与生命周期管理。
|
||||
- 认证模块:注册、登录、当前用户信息获取,配合 Pydantic 模式与数据库模型。
|
||||
- 引擎与工作流:引用检测引擎封装品牌匹配、竞争品牌识别与多平台适配器调用。
|
||||
- 前端 API 封装:统一鉴权头、错误处理与资源访问;UI 组件通过变体系统实现一致风格。
|
||||
- 类型与扩展:NextAuth 类型扩展确保会话与 JWT 字段的类型安全。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/schemas/auth.py:1-34](file://backend/app/schemas/auth.py#L1-L34)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [frontend/types/next-auth.d.ts:1-26](file://frontend/types/next-auth.d.ts#L1-L26)
|
||||
|
||||
## 架构总览
|
||||
后端通过 FastAPI 提供 REST 接口,前端通过 Next.js App Router 渲染页面并通过封装的 API 客户端访问后端。测试覆盖认证流程与错误场景,确保行为符合预期。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant FE as "前端页面"
|
||||
participant API as "lib/api.ts"
|
||||
participant BE as "FastAPI 路由"
|
||||
participant SVC as "服务/模型"
|
||||
FE->>API : "调用认证/查询/报告接口"
|
||||
API->>BE : "携带 Authorization 头发起请求"
|
||||
BE->>SVC : "依赖注入与业务处理"
|
||||
SVC-->>BE : "返回响应或异常"
|
||||
BE-->>API : "标准化响应"
|
||||
API-->>FE : "解析并渲染结果"
|
||||
```
|
||||
|
||||
图示来源
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### Python 后端代码规范
|
||||
- 语言与版本:使用 Python 3.x,推荐 3.10+。
|
||||
- 依赖管理:通过 requirements.txt 管理生产与测试依赖。
|
||||
- 代码风格与静态检查:Alembic 配置中预留了格式化与检查钩子(如 black/ruff),建议在本地与 CI 中启用以保持一致性。
|
||||
- 命名约定
|
||||
- 模块与包:小写、下划线分隔(如 app/api/auth.py)。
|
||||
- 类:大驼峰(如 User、CitationEngine)。
|
||||
- 函数与变量:小写加下划线(如 register_user、execute_query)。
|
||||
- 常量:全大写加下划线(如 DATABASE_URL)。
|
||||
- 导入顺序
|
||||
- 标准库 → 第三方库 → 项目内相对导入(遵循相对导入与绝对导入的一致性)。
|
||||
- FastAPI 路由模块中,先导入标准库与第三方,再导入项目内模块。
|
||||
- 注释规范
|
||||
- 模块级 docstring:简述模块职责。
|
||||
- 类与函数:使用三引号 docstring,描述参数、返回值与异常。
|
||||
- 关键逻辑处添加行内注释,解释复杂算法或边界条件。
|
||||
- 错误处理
|
||||
- 使用 HTTPException 抛出语义化错误,避免泄露内部细节。
|
||||
- 服务层捕获业务异常并转换为 HTTP 状态码。
|
||||
- 类型与数据模型
|
||||
- 使用 Pydantic v2 BaseModel 定义输入输出模式,配合 EmailStr、Field 等约束。
|
||||
- SQLAlchemy ORM 模型使用 mapped_column、mapped_alias 等现代语法。
|
||||
- 并发与异步
|
||||
- 使用 AsyncSession 与 async/await,避免阻塞操作。
|
||||
- 示例路径
|
||||
- 正确:[认证路由与异常处理:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- 正确:[Pydantic 模式定义:1-34](file://backend/app/schemas/auth.py#L1-L34)
|
||||
- 正确:[SQLAlchemy 模型字段与关系:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- 正确:[引擎类与方法注释:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [backend/alembic.ini:92-114](file://backend/alembic.ini#L92-L114)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/schemas/auth.py:1-34](file://backend/app/schemas/auth.py#L1-L34)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
|
||||
### TypeScript/JavaScript 前端代码规范
|
||||
- 语言与编译选项:严格模式、ESNext 模块解析、Bundler 解析、JSX 保留。
|
||||
- 命名约定
|
||||
- 文件:小写加连字符(如 api.ts),组件使用 PascalCase(如 Button)。
|
||||
- 变量与函数:小驼峰(如 fetchWithAuth)。
|
||||
- 类型:接口与类型别名使用大驼峰(如 ButtonProps)。
|
||||
- 导入顺序
|
||||
- 先标准库与第三方,再内部路径别名(如 @/*)。
|
||||
- 注释规范
|
||||
- 函数与接口:使用 JSDoc 风格注释,说明参数、返回值与异常。
|
||||
- 复杂逻辑处补充行内注释。
|
||||
- 类型定义与接口设计
|
||||
- 使用 TypeScript 接口与联合类型表达数据结构。
|
||||
- NextAuth 类型扩展确保会话与 JWT 字段类型安全。
|
||||
- 模块组织
|
||||
- API 客户端集中于 lib/api.ts,按领域拆分子模块。
|
||||
- UI 组件位于 components/ui,通过变体系统统一风格。
|
||||
- 错误处理模式
|
||||
- 统一的 fetchWithAuth 封装:校验响应状态、提取错误详情并抛出错误。
|
||||
- 页面侧通过 try/catch 或 Promise.catch 捕获并提示。
|
||||
- 示例路径
|
||||
- 正确:[TS 编译配置:1-27](file://frontend/tsconfig.json#L1-L27)
|
||||
- 正确:[ESLint 扩展:1-4](file://frontend/.eslintrc.json#L1-L4)
|
||||
- 正确:[API 客户端封装与错误处理:1-58](file://frontend/lib/api.ts#L1-58)
|
||||
- 正确:[UI 组件变体系统:1-57](file://frontend/components/ui/button.tsx#L1-57)
|
||||
- 正确:[NextAuth 类型扩展:1-26](file://frontend/types/next-auth.d.ts#L1-26)
|
||||
|
||||
章节来源
|
||||
- [frontend/tsconfig.json:1-27](file://frontend/tsconfig.json#L1-L27)
|
||||
- [frontend/.eslintrc.json:1-4](file://frontend/.eslintrc.json#L1-L4)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [frontend/components/ui/button.tsx:1-57](file://frontend/components/ui/button.tsx#L1-L57)
|
||||
- [frontend/types/next-auth.d.ts:1-26](file://frontend/types/next-auth.d.ts#L1-L26)
|
||||
|
||||
### 前后端一致性要求
|
||||
- 接口契约
|
||||
- 请求与响应字段命名保持一致(如 email、password、access_token)。
|
||||
- 错误响应结构统一(包含 detail 字段)。
|
||||
- 鉴权与头部
|
||||
- 前端统一在 Authorization 头中携带 Bearer Token。
|
||||
- 后端路由对未认证请求返回 401。
|
||||
- 资源路径
|
||||
- 前端 API 基础地址通过环境变量控制,与后端路由前缀保持一致。
|
||||
- 示例路径
|
||||
- 正确:[后端路由前缀与标签:38-42](file://backend/app/main.py#L38-L42)
|
||||
- 正确:[前端 API 基础地址与鉴权头:1-21](file://frontend/lib/api.ts#L1-L21)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:38-42](file://backend/app/main.py#L38-L42)
|
||||
- [frontend/lib/api.ts:1-21](file://frontend/lib/api.ts#L1-L21)
|
||||
|
||||
### 自动化代码检查工具配置与使用
|
||||
- 后端
|
||||
- Alembic 配置预留了 post_write_hooks,可集成 black 与 ruff 进行格式化与检查。
|
||||
- 建议在本地与 CI 中启用:格式化(black)与静态检查(ruff/flake8/pylint)。
|
||||
- 依赖声明参考 requirements.txt,确保工具链可用。
|
||||
- 前端
|
||||
- ESLint 通过 .eslintrc.json 扩展 next/core-web-vitals 与 next/typescript。
|
||||
- 建议在本地与 CI 中运行 lint 脚本,修复问题后再提交。
|
||||
- TS 编译配置启用 strict 模式,提升类型安全性。
|
||||
- 示例路径
|
||||
- 正确:[Alembic hooks 配置:92-114](file://backend/alembic.ini#L92-L114)
|
||||
- 正确:[ESLint 扩展:1-4](file://frontend/.eslintrc.json#L1-L4)
|
||||
- 正确:[TS 严格模式与模块解析:6-12](file://frontend/tsconfig.json#L6-L12)
|
||||
- 正确:[依赖声明:1-35](file://backend/requirements.txt#L1-L35)
|
||||
|
||||
章节来源
|
||||
- [backend/alembic.ini:92-114](file://backend/alembic.ini#L92-L114)
|
||||
- [frontend/.eslintrc.json:1-4](file://frontend/.eslintrc.json#L1-L4)
|
||||
- [frontend/tsconfig.json:6-12](file://frontend/tsconfig.json#L6-L12)
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
|
||||
## 依赖分析
|
||||
- 后端依赖
|
||||
- Web 框架:FastAPI、Uvicorn
|
||||
- 数据库:SQLAlchemy 2.x、Asyncpg、Alembic
|
||||
- 数据验证与配置:Pydantic v2、Pydantic-Settings、email-validator
|
||||
- 认证与安全:python-jose、passlib、multipart
|
||||
- 缓存与任务调度:Redis、APScheduler
|
||||
- 浏览器自动化:Playwright
|
||||
- HTTP 客户端与工具:HTTPX、python-dotenv
|
||||
- 测试:pytest、pytest-asyncio、aiosqlite
|
||||
- 前端依赖
|
||||
- 运行时:Next.js、React、NextAuth
|
||||
- UI:Radix UI、Recharts、TailwindCSS
|
||||
- 开发工具:ESLint、TypeScript、PostCSS、TailwindCSS
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
subgraph "后端"
|
||||
FASTAPI["FastAPI"]
|
||||
SQLA["SQLAlchemy 2.x"]
|
||||
PYD["Pydantic v2"]
|
||||
REDIS["Redis"]
|
||||
APS["APScheduler"]
|
||||
PW["Playwright"]
|
||||
HTTPX["HTTPX"]
|
||||
PYTEST["pytest"]
|
||||
end
|
||||
subgraph "前端"
|
||||
NEXT["Next.js"]
|
||||
REACT["React"]
|
||||
NA["NextAuth"]
|
||||
RUI["Radix UI"]
|
||||
REC["Recharts"]
|
||||
TSC["TypeScript"]
|
||||
ESL["ESLint"]
|
||||
end
|
||||
FASTAPI --> SQLA
|
||||
FASTAPI --> PYD
|
||||
FASTAPI --> REDIS
|
||||
FASTAPI --> APS
|
||||
FASTAPI --> HTTPX
|
||||
NEXT --> REACT
|
||||
NEXT --> NA
|
||||
NEXT --> RUI
|
||||
NEXT --> REC
|
||||
NEXT --> TSC
|
||||
NEXT --> ESL
|
||||
```
|
||||
|
||||
图示来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
|
||||
## 性能考虑
|
||||
- 异步优先:后端使用 AsyncSession 与异步 I/O,避免阻塞事件循环。
|
||||
- 任务调度:APScheduler 合理设置频率与并发,避免重复任务与资源争用。
|
||||
- 缓存策略:Redis 用于热点数据缓存与会话存储,减少数据库压力。
|
||||
- 前端渲染:Next.js App Router 与静态生成/预渲染结合,降低首屏延迟。
|
||||
- 网络请求:统一的 API 客户端封装,避免重复请求与无谓重试。
|
||||
|
||||
## 故障排查指南
|
||||
- 认证失败
|
||||
- 检查前端是否正确传递 Authorization 头。
|
||||
- 检查后端路由是否正确处理 401 场景。
|
||||
- 参考测试用例断言与错误消息。
|
||||
- 数据库连接
|
||||
- 确认 DATABASE_URL 与容器网络可达。
|
||||
- 检查 Alembic 迁移是否成功。
|
||||
- 任务执行异常
|
||||
- 查看日志与 QueryTask 状态更新,定位具体平台适配器问题。
|
||||
- 前端类型错误
|
||||
- 启用 TS 严格模式,修复类型不匹配。
|
||||
- 使用 ESLint 规则约束代码质量。
|
||||
|
||||
章节来源
|
||||
- [tests/test_auth.py:1-104](file://tests/test_auth.py#L1-L104)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/workers/citation_engine.py:211-228](file://backend/app/workers/citation_engine.py#L211-L228)
|
||||
- [frontend/lib/api.ts:16-21](file://frontend/lib/api.ts#L16-L21)
|
||||
|
||||
## 结论
|
||||
本规范明确了 GEO 项目在 Python 与 TypeScript 生态下的风格与实践,强调一致性、可维护性与可测试性。建议在本地与 CI 中强制执行格式化与静态检查,持续完善测试覆盖,确保前后端契约稳定与类型安全。
|
||||
|
||||
## 附录
|
||||
- 快速检查清单
|
||||
- 后端:使用 Alembic hooks 格式化与检查;Pydantic 字段约束完整;HTTP 异常语义化;异步会话正确使用。
|
||||
- 前端:ESLint 通过;TS 严格模式开启;UI 组件统一变体;API 客户端统一错误处理;NextAuth 类型扩展齐全。
|
||||
- 示例路径(正确/错误对照)
|
||||
- 正确:[认证路由与异常处理:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- 正确:[API 客户端封装与错误处理:1-58](file://frontend/lib/api.ts#L1-58)
|
||||
- 正确:[NextAuth 类型扩展:1-26](file://frontend/types/next-auth.d.ts#L1-26)
|
||||
|
|
@ -0,0 +1,360 @@
|
|||
# 开发工具
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [backend/Dockerfile](file://backend/Dockerfile)
|
||||
- [frontend/Dockerfile](file://frontend/Dockerfile)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
- [frontend/package.json](file://frontend/package.json)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/alembic.ini](file://backend/alembic.ini)
|
||||
- [backend/alembic/env.py](file://backend/alembic/env.py)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [frontend/lib/api.ts](file://frontend/lib/api.ts)
|
||||
- [frontend/tsconfig.json](file://frontend/tsconfig.json)
|
||||
- [frontend/.eslintrc.json](file://frontend/.eslintrc.json)
|
||||
- [frontend/next.config.mjs](file://frontend/next.config.mjs)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件面向GEO项目的开发者,提供从IDE配置、调试工具、开发辅助工具到命令行与脚本使用的完整指南。内容覆盖:
|
||||
- VS Code配置与推荐插件(Python、TypeScript/Next.js)
|
||||
- 断点调试、日志分析与性能分析方法
|
||||
- API测试、数据库管理与Docker容器管理
|
||||
- 命令行工具与脚本使用
|
||||
- 开发环境优化与常见问题排查
|
||||
|
||||
## 项目结构
|
||||
GEO采用前后端分离的多容器架构,通过Docker Compose编排数据库、缓存、后端服务与前端开发服务器。后端基于FastAPI,前端基于Next.js(App Router),数据库迁移使用Alembic。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "本地开发环境"
|
||||
VSCode["VS Code 开发环境"]
|
||||
Postman["API 测试工具"]
|
||||
DBTools["数据库管理工具"]
|
||||
DockerCLI["Docker CLI"]
|
||||
end
|
||||
subgraph "容器编排"
|
||||
DC["docker-compose.yml"]
|
||||
DB["Postgres 容器"]
|
||||
Redis["Redis 容器"]
|
||||
BE["后端容器"]
|
||||
FE["前端容器"]
|
||||
end
|
||||
VSCode --> DC
|
||||
Postman --> BE
|
||||
DBTools --> DB
|
||||
DockerCLI --> DC
|
||||
DC --> DB
|
||||
DC --> Redis
|
||||
DC --> BE
|
||||
DC --> FE
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
|
||||
## 核心组件
|
||||
- 后端服务(FastAPI)
|
||||
- 应用入口与生命周期管理、CORS配置、路由注册与健康检查接口
|
||||
- 配置加载与外部服务密钥(数据库、Redis、JWT、浏览器路径等)
|
||||
- 数据库迁移(Alembic)
|
||||
- 异步迁移环境、日志级别与SQLAlchemy连接配置
|
||||
- 前端应用(Next.js App Router)
|
||||
- API封装、类型配置、ESLint规则与构建配置
|
||||
- 定时任务调度器(APScheduler + Playwright)
|
||||
- 异步调度、周期性查询检查与执行
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
- [backend/alembic.ini:1-150](file://backend/alembic.ini#L1-L150)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [frontend/tsconfig.json:1-27](file://frontend/tsconfig.json#L1-L27)
|
||||
- [frontend/.eslintrc.json:1-4](file://frontend/.eslintrc.json#L1-L4)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
|
||||
## 架构总览
|
||||
下图展示本地开发与容器化运行时的交互关系,以及各组件在容器内的端口映射与依赖顺序。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
Dev["开发者主机"]
|
||||
subgraph "Docker Compose 编排"
|
||||
subgraph "服务"
|
||||
DB["db:5432 → POSTGRES"]
|
||||
RD["redis:6379 → 缓存"]
|
||||
BE["backend:8000 → FastAPI"]
|
||||
FE["frontend:3000 → Next.js dev"]
|
||||
end
|
||||
end
|
||||
Dev --> |"浏览器访问"| FE
|
||||
Dev --> |"API 请求"| BE
|
||||
BE --> |"数据库/缓存"| DB
|
||||
BE --> |"缓存/队列"| RD
|
||||
FE --> |"调用后端 API"| BE
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [docker-compose.yml:3-66](file://docker-compose.yml#L3-L66)
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 后端服务(FastAPI)调试与配置
|
||||
- 应用入口与生命周期
|
||||
- 使用生命周期钩子初始化模型与启动调度器,并在关闭时优雅停机
|
||||
- 注册认证、查询、引用、报告等路由前缀与标签
|
||||
- CORS策略
|
||||
- 允许来自前端开发地址的跨域请求
|
||||
- 健康检查
|
||||
- 提供基础健康检查端点用于容器健康探测
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant FE as "前端(3000)"
|
||||
participant API as "后端(8000)"
|
||||
participant DB as "数据库"
|
||||
participant Cache as "缓存"
|
||||
Client->>FE : "打开应用"
|
||||
FE->>API : "发起登录/查询/报告等请求"
|
||||
API->>DB : "读写数据(异步ORM)"
|
||||
API->>Cache : "读写缓存(可选)"
|
||||
API-->>FE : "返回JSON响应"
|
||||
FE-->>Client : "渲染界面"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:13-47](file://backend/app/main.py#L13-L47)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
### 数据库迁移(Alembic)流程
|
||||
- 运行模式
|
||||
- 支持离线与在线两种迁移模式,使用异步引擎连接数据库
|
||||
- 日志配置
|
||||
- 控制根日志、SQLAlchemy引擎与Alembic的日志级别
|
||||
- 数据库URL
|
||||
- 默认指向本地或容器内Postgres服务
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["启动迁移"]) --> Mode{"选择模式"}
|
||||
Mode --> |离线| Offline["使用配置URL直接迁移"]
|
||||
Mode --> |在线| Online["创建异步引擎并连接"]
|
||||
Offline --> Tx["开启事务并执行迁移"]
|
||||
Online --> Tx
|
||||
Tx --> Done(["完成"])
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/alembic/env.py:33-88](file://backend/alembic/env.py#L33-L88)
|
||||
- [backend/alembic.ini:115-150](file://backend/alembic.ini#L115-L150)
|
||||
|
||||
章节来源
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
- [backend/alembic.ini:1-150](file://backend/alembic.ini#L1-L150)
|
||||
|
||||
### 前端(Next.js App Router)配置要点
|
||||
- TypeScript与路径别名
|
||||
- 严格类型检查、Bundler模块解析、路径别名配置
|
||||
- ESLint规则
|
||||
- 继承Next.js核心Web Vitals与TypeScript规则
|
||||
- 构建与开发
|
||||
- 开发服务器端口、构建产物与类型生成
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
TS["tsconfig.json<br/>严格类型/路径别名"] --> ESL[".eslintrc.json<br/>继承Next规则"]
|
||||
ESL --> Build["next.config.mjs<br/>默认配置"]
|
||||
Build --> Dev["npm run dev<br/>3000端口"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [frontend/tsconfig.json:1-27](file://frontend/tsconfig.json#L1-L27)
|
||||
- [frontend/.eslintrc.json:1-4](file://frontend/.eslintrc.json#L1-L4)
|
||||
- [frontend/next.config.mjs:1-5](file://frontend/next.config.mjs#L1-L5)
|
||||
|
||||
章节来源
|
||||
- [frontend/tsconfig.json:1-27](file://frontend/tsconfig.json#L1-L27)
|
||||
- [frontend/.eslintrc.json:1-4](file://frontend/.eslintrc.json#L1-L4)
|
||||
- [frontend/next.config.mjs:1-5](file://frontend/next.config.mjs#L1-L5)
|
||||
|
||||
### 定时任务调度器(APScheduler + Playwright)
|
||||
- 调度策略
|
||||
- 每小时触发一次检查,筛选到期的活跃查询并交由引文引擎执行
|
||||
- 异常处理
|
||||
- 单条任务失败不影响整体调度;关闭时优雅停止调度器与浏览器资源
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class QueryScheduler {
|
||||
+start()
|
||||
+check_and_execute_queries()
|
||||
+shutdown()
|
||||
-_run_check()
|
||||
-_execute_single_query(query, db)
|
||||
}
|
||||
class CitationEngine {
|
||||
+execute_query(query, db)
|
||||
+close()
|
||||
}
|
||||
QueryScheduler --> CitationEngine : "执行查询"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
|
||||
## 依赖分析
|
||||
- 后端依赖
|
||||
- Web框架、异步数据库、序列化、认证、缓存、任务调度、浏览器自动化、HTTP客户端、环境变量、测试工具
|
||||
- 前端依赖
|
||||
- Next.js、React、UI库、Tailwind、TypeScript与开发工具链
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
BE["后端应用"] --> Req["requirements.txt"]
|
||||
FE["前端应用"] --> Pkg["package.json"]
|
||||
Req --> FastAPI["FastAPI"]
|
||||
Req --> SQLA["SQLAlchemy/AsyncPG/Alembic"]
|
||||
Req --> RedisDep["Redis/APScheduler"]
|
||||
Req --> PW["Playwright"]
|
||||
Req --> HTTPX["HTTPX/HTTPX"]
|
||||
Req --> Test["pytest/pytest-asyncio/aiosqlite"]
|
||||
Pkg --> Next["Next.js"]
|
||||
Pkg --> UI["@radix-ui/*"]
|
||||
Pkg --> Tail["Tailwind/Recharts"]
|
||||
Pkg --> TSDev["TypeScript/ESLint"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
|
||||
## 性能考虑
|
||||
- 后端
|
||||
- 使用异步ORM与连接池,避免阻塞;合理设置任务调度间隔,避免频繁扫描
|
||||
- 浏览器自动化(Playwright)需预装浏览器二进制,减少重复下载开销
|
||||
- 前端
|
||||
- 开发模式启用快速刷新;生产构建建议启用最小化与Tree-shaking
|
||||
- 数据库
|
||||
- 迁移日志级别按需调整,避免过度输出影响性能
|
||||
|
||||
## 故障排查指南
|
||||
- 容器健康与端口
|
||||
- 确认数据库与缓存容器健康检查通过,端口映射正确
|
||||
- 后端与前端容器分别监听8000与3000端口
|
||||
- 数据库连接
|
||||
- 检查数据库URL与凭据;确认Alembic配置与容器网络可达
|
||||
- 调试与日志
|
||||
- 后端生命周期钩子负责启动/关闭调度器;查看容器日志定位异常
|
||||
- 前端API封装统一处理错误响应,便于定位后端返回的错误详情
|
||||
- 定时任务
|
||||
- 若查询未按时执行,检查调度器状态与异常日志;确认UTC时间与时区一致性
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:16-34](file://docker-compose.yml#L16-L34)
|
||||
- [backend/app/main.py:13-21](file://backend/app/main.py#L13-L21)
|
||||
- [frontend/lib/api.ts:16-21](file://frontend/lib/api.ts#L16-L21)
|
||||
- [backend/app/workers/scheduler.py:42-90](file://backend/app/workers/scheduler.py#L42-L90)
|
||||
|
||||
## 结论
|
||||
通过容器化编排与严格的前后端配置,GEO提供了可复现的开发环境。配合本文档提供的IDE配置、调试与运维工具清单,开发者可以高效地进行功能迭代与问题定位。
|
||||
|
||||
## 附录
|
||||
|
||||
### IDE配置与推荐插件(VS Code)
|
||||
- Python相关
|
||||
- 扩展:Python、Pylance、Black、isort、pytest
|
||||
- 设置:启用Pylance类型检查、自动格式化、导入排序
|
||||
- TypeScript/Next.js相关
|
||||
- 扩展:ESLint、TypeScript Importer、Tailwind CSS IntelliSense
|
||||
- 设置:启用严格模式、路径别名、TS/TSX语法高亮
|
||||
- Docker
|
||||
- 扩展:Docker、Kubernetes(可选)
|
||||
- 用途:可视化容器状态、日志查看、镜像构建与部署
|
||||
|
||||
[本节为通用实践建议,不直接分析具体源码文件]
|
||||
|
||||
### 调试工具使用方法
|
||||
- 断点调试
|
||||
- 后端:在FastAPI路由或业务逻辑处设置断点,结合容器调试或本地调试
|
||||
- 前端:在API封装函数与页面逻辑处设置断点,观察请求/响应
|
||||
- 日志分析
|
||||
- 后端:关注调度器与API层日志;容器日志中过滤关键字定位异常
|
||||
- 数据库:调整Alembic日志级别以获取更详细的迁移信息
|
||||
- 性能分析
|
||||
- 后端:对慢查询与任务执行耗时进行采样;评估浏览器自动化并发
|
||||
- 前端:利用Next.js开发服务器热重载与性能面板
|
||||
|
||||
[本节为通用实践建议,不直接分析具体源码文件]
|
||||
|
||||
### 开发辅助工具
|
||||
- API测试
|
||||
- 工具:Postman、Insomnia、curl
|
||||
- 建议:使用环境变量管理不同环境的API基础URL与令牌
|
||||
- 数据库管理
|
||||
- 工具:pgAdmin、DBeaver、DataGrip
|
||||
- 建议:使用Alembic进行版本化迁移,避免手写DDL
|
||||
- Docker容器管理
|
||||
- 工具:Docker Desktop、Lens(可选)
|
||||
- 建议:使用Compose一键拉起所有服务,查看容器日志与健康状态
|
||||
|
||||
[本节为通用实践建议,不直接分析具体源码文件]
|
||||
|
||||
### 命令行工具与脚本使用
|
||||
- 后端
|
||||
- 安装依赖:pip install -r requirements.txt
|
||||
- 运行开发服务器:uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
|
||||
- 数据库迁移:alembic revision --autogenerate,alembic upgrade head
|
||||
- 前端
|
||||
- 安装依赖:npm ci
|
||||
- 开发:npm run dev(3000端口)
|
||||
- 构建:npm run build(14.x)
|
||||
- Docker
|
||||
- 构建与启动:docker-compose up --build
|
||||
- 查看日志:docker-compose logs -f 服务名
|
||||
- 进入容器:docker-compose exec 服务名 sh
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [backend/Dockerfile:28-33](file://backend/Dockerfile#L28-L33)
|
||||
- [frontend/Dockerfile:6-7](file://frontend/Dockerfile#L6-L7)
|
||||
- [docker-compose.yml:36-66](file://docker-compose.yml#L36-L66)
|
||||
|
|
@ -0,0 +1,413 @@
|
|||
# 开发指南
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
- [frontend/package.json](file://frontend/package.json)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/Dockerfile](file://backend/Dockerfile)
|
||||
- [frontend/Dockerfile](file://frontend/Dockerfile)
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [backend/alembic.ini](file://backend/alembic.ini)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [frontend/tsconfig.json](file://frontend/tsconfig.json)
|
||||
- [frontend/tailwind.config.ts](file://frontend/tailwind.config.ts)
|
||||
- [frontend/.eslintrc.json](file://frontend/.eslintrc.json)
|
||||
- [tests/conftest.py](file://tests/conftest.py)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/schemas/auth.py](file://backend/app/schemas/auth.py)
|
||||
- [backend/app/models/user.py](file://backend/app/models/user.py)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. 引言
|
||||
2. 项目结构
|
||||
3. 核心组件
|
||||
4. 架构总览
|
||||
5. 详细组件分析
|
||||
6. 依赖分析
|
||||
7. 性能考虑
|
||||
8. 故障排查指南
|
||||
9. 结论
|
||||
10. 附录
|
||||
|
||||
## 引言
|
||||
本开发指南面向GEO项目的开发者,旨在统一前后端代码规范与最佳实践,明确开发流程与工作流(包括分支策略、代码评审与版本发布),并提供开发工具使用方法(IDE配置、调试与性能分析)、新功能开发指导原则(模块设计、接口定义与测试要求),以及常见问题的排查方案。本指南以仓库中现有实现为依据,确保内容可落地、可执行。
|
||||
|
||||
## 项目结构
|
||||
GEO采用前后端分离架构,后端基于FastAPI,前端基于Next.js,数据库使用PostgreSQL,缓存使用Redis,任务调度使用APScheduler,浏览器自动化使用Playwright。项目通过Docker与docker-compose进行容器化编排,便于本地开发与部署。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "前端"
|
||||
FE_Docker["frontend/Dockerfile"]
|
||||
FE_Pkg["frontend/package.json"]
|
||||
FE_TS["frontend/tsconfig.json"]
|
||||
FE_ESLint[".eslintrc.json"]
|
||||
FE_Tailwind["frontend/tailwind.config.ts"]
|
||||
end
|
||||
subgraph "后端"
|
||||
BE_Docker["backend/Dockerfile"]
|
||||
BE_Req["backend/requirements.txt"]
|
||||
BE_Main["backend/app/main.py"]
|
||||
BE_Config["backend/app/config.py"]
|
||||
BE_Alembic["backend/alembic.ini"]
|
||||
end
|
||||
subgraph "基础设施"
|
||||
DB["PostgreSQL 容器"]
|
||||
REDIS["Redis 容器"]
|
||||
DC["docker-compose.yml"]
|
||||
end
|
||||
FE_Docker --> FE_Pkg
|
||||
FE_TS --> FE_ESLint
|
||||
FE_Tailwind --> FE_Pkg
|
||||
BE_Docker --> BE_Req
|
||||
BE_Main --> BE_Config
|
||||
BE_Alembic --> DB
|
||||
DC --> FE_Docker
|
||||
DC --> BE_Docker
|
||||
DC --> DB
|
||||
DC --> REDIS
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/alembic.ini:1-150](file://backend/alembic.ini#L1-L150)
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/alembic.ini:1-150](file://backend/alembic.ini#L1-L150)
|
||||
|
||||
## 核心组件
|
||||
- 后端服务入口:FastAPI应用在生命周期内启动查询调度器,并注册认证、查询词、引用数据、报告等路由。
|
||||
- 配置管理:使用Pydantic Settings加载环境变量,集中管理数据库、Redis、JWT、浏览器路径与第三方平台密钥等配置项。
|
||||
- 前端构建与运行:Next.js项目通过package.json脚本控制开发、构建与启动;TypeScript严格模式开启;ESLint规则继承Next.js核心Web Vitals与TypeScript默认规则;Tailwind CSS按需扫描组件与页面目录。
|
||||
- 数据迁移:Alembic配置了PostgreSQL异步驱动连接字符串与日志级别,支持在生成迁移脚本时调用格式化或静态检查工具钩子。
|
||||
- 测试基础:pytest会自动注入后端源码路径,提供模拟调度器、认证用户、依赖覆盖与异步HTTP客户端等测试夹具。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [frontend/tsconfig.json:1-27](file://frontend/tsconfig.json#L1-L27)
|
||||
- [frontend/.eslintrc.json:1-4](file://frontend/.eslintrc.json#L1-L4)
|
||||
- [frontend/tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
- [backend/alembic.ini:86-114](file://backend/alembic.ini#L86-L114)
|
||||
- [tests/conftest.py:1-71](file://tests/conftest.py#L1-L71)
|
||||
|
||||
## 架构总览
|
||||
下图展示了从浏览器到后端API再到数据库与缓存的整体交互路径,以及容器化编排关系。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
Browser["浏览器/移动端"]
|
||||
NextApp["Next.js 应用<br/>frontend/"]
|
||||
FastAPI["FastAPI 应用<br/>backend/app/main.py"]
|
||||
Postgres["PostgreSQL<br/>geo_platform"]
|
||||
Redis["Redis"]
|
||||
Playwright["Playwright 浏览器"]
|
||||
Browser --> NextApp
|
||||
NextApp --> FastAPI
|
||||
FastAPI --> Postgres
|
||||
FastAPI --> Redis
|
||||
FastAPI --> Playwright
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:24-47](file://backend/app/main.py#L24-L47)
|
||||
- [backend/app/config.py:7-13](file://backend/app/config.py#L7-L13)
|
||||
- [backend/Dockerfile:31-33](file://backend/Dockerfile#L31-L33)
|
||||
- [docker-compose.yml:4-20](file://docker-compose.yml#L4-L20)
|
||||
- [docker-compose.yml:22-34](file://docker-compose.yml#L22-L34)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 后端服务与路由
|
||||
- 应用生命周期:在应用启动时导入模型并启动查询调度器,在关闭时优雅停止调度器。
|
||||
- 路由注册:认证、查询词、引用数据、报告等模块化路由按前缀挂载,便于版本化与职责划分。
|
||||
- 健康检查:提供/health端点返回服务状态。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant App as "FastAPI 应用"
|
||||
participant Scheduler as "查询调度器"
|
||||
participant Router as "各模块路由"
|
||||
Client->>App : "GET /health"
|
||||
App-->>Client : "{status : ok}"
|
||||
App->>Scheduler : "start()"
|
||||
App->>Router : "include_router(...)"
|
||||
Router-->>App : "注册完成"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:13-21](file://backend/app/main.py#L13-L21)
|
||||
- [backend/app/main.py:38-42](file://backend/app/main.py#L38-L42)
|
||||
- [backend/app/main.py:45-47](file://backend/app/main.py#L45-L47)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
|
||||
### 认证模块与数据模型
|
||||
- 认证接口:注册、登录、当前用户信息读取,返回TokenResponse与UserResponse。
|
||||
- Pydantic模型:UserRegister、UserLogin、UserResponse、TokenResponse,约束字段类型与长度。
|
||||
- SQLAlchemy模型:User表包含邮箱唯一性、密码哈希、计划等级、配额、激活状态与时间戳等字段,并与Query、Subscription建立一对多关系。
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class UserRegister {
|
||||
+邮箱
|
||||
+密码(最小长度)
|
||||
+姓名(长度范围)
|
||||
}
|
||||
class UserLogin {
|
||||
+邮箱
|
||||
+密码
|
||||
}
|
||||
class UserResponse {
|
||||
+id(UUID)
|
||||
+email
|
||||
+name
|
||||
+plan
|
||||
+max_queries
|
||||
+is_active
|
||||
+created_at
|
||||
}
|
||||
class TokenResponse {
|
||||
+access_token
|
||||
+token_type
|
||||
+user(UserResponse)
|
||||
}
|
||||
class User {
|
||||
+id(UUID)
|
||||
+email(唯一)
|
||||
+password_hash
|
||||
+name
|
||||
+plan
|
||||
+max_queries
|
||||
+is_active
|
||||
+created_at
|
||||
+updated_at
|
||||
}
|
||||
UserRegister --> UserResponse : "注册输入"
|
||||
UserLogin --> TokenResponse : "登录输出"
|
||||
UserResponse --> User : "序列化自属性"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/schemas/auth.py:7-34](file://backend/app/schemas/auth.py#L7-L34)
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/schemas/auth.py:1-34](file://backend/app/schemas/auth.py#L1-L34)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
|
||||
### 数据库与迁移
|
||||
- 连接字符串:使用异步PostgreSQL驱动,指向compose中的db服务。
|
||||
- 日志配置:设置SQLAlchemy与Alembic日志级别,便于定位迁移问题。
|
||||
- 工具钩子:可选集成格式化与静态检查工具对生成的迁移脚本进行处理。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["开始"]) --> CheckURL["检查数据库URL"]
|
||||
CheckURL --> LogCfg["配置日志级别"]
|
||||
LogCfg --> Hooks{"是否启用钩子?"}
|
||||
Hooks --> |是| RunHooks["运行格式化/静态检查"]
|
||||
Hooks --> |否| SkipHooks["跳过钩子"]
|
||||
RunHooks --> Done(["完成"])
|
||||
SkipHooks --> Done
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/alembic.ini:86-114](file://backend/alembic.ini#L86-L114)
|
||||
|
||||
章节来源
|
||||
- [backend/alembic.ini:1-150](file://backend/alembic.ini#L1-L150)
|
||||
- [backend/app/config.py:7-8](file://backend/app/config.py#L7-L8)
|
||||
|
||||
### 前端工程化
|
||||
- 构建与运行:dev/build/start/lint脚本由Next.js提供。
|
||||
- TypeScript:严格模式、不输出JS、模块解析采用bundler、路径别名@/*映射根目录。
|
||||
- ESLint:继承Next.js核心Web Vitals与TypeScript规则。
|
||||
- Tailwind:按需扫描pages/components/app目录,启用动画插件。
|
||||
|
||||
章节来源
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [frontend/tsconfig.json:1-27](file://frontend/tsconfig.json#L1-L27)
|
||||
- [frontend/.eslintrc.json:1-4](file://frontend/.eslintrc.json#L1-L4)
|
||||
- [frontend/tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
|
||||
## 依赖分析
|
||||
- 后端依赖:FastAPI、SQLAlchemy、Pydantic、Redis、APScheduler、Playwright、HTTPX、dotenv、pytest等。
|
||||
- 前端依赖:Next.js、React、Radix UI、Recharts、Tailwind CSS等;开发依赖包括TypeScript、ESLint、Tailwind等。
|
||||
- 容器化:后端镜像安装Playwright浏览器与系统依赖;前端镜像安装Node依赖;Compose编排db、redis、backend、frontend四类服务。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
subgraph "后端"
|
||||
FastAPI["FastAPI"]
|
||||
SQLA["SQLAlchemy"]
|
||||
Pydantic["Pydantic"]
|
||||
RedisDep["Redis"]
|
||||
APS["APScheduler"]
|
||||
PW["Playwright"]
|
||||
HTTPX["HTTPX"]
|
||||
DOTENV["python-dotenv"]
|
||||
PyTest["pytest"]
|
||||
end
|
||||
subgraph "前端"
|
||||
Next["Next.js"]
|
||||
React["React"]
|
||||
Radix["Radix UI"]
|
||||
Recharts["Recharts"]
|
||||
Tailwind["Tailwind CSS"]
|
||||
TS["TypeScript"]
|
||||
ESL["ESLint"]
|
||||
end
|
||||
FastAPI --> SQLA
|
||||
FastAPI --> Pydantic
|
||||
FastAPI --> RedisDep
|
||||
FastAPI --> APS
|
||||
FastAPI --> PW
|
||||
FastAPI --> HTTPX
|
||||
FastAPI --> DOTENV
|
||||
Next --> React
|
||||
Next --> Tailwind
|
||||
Next --> Radix
|
||||
Next --> Recharts
|
||||
Next --> TS
|
||||
Next --> ESL
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:11-38](file://frontend/package.json#L11-L38)
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
|
||||
## 性能考虑
|
||||
- 异步化:后端使用异步数据库驱动与异步HTTP客户端,减少阻塞,提升并发能力。
|
||||
- 缓存:Redis用于任务调度与会话等场景,建议在热点数据访问处引入缓存层。
|
||||
- 任务调度:APScheduler负责周期性任务,注意避免重复任务与资源泄漏,结合优雅停机逻辑。
|
||||
- 前端构建:严格模式与按需扫描Tailwind可降低包体与构建开销;生产构建建议开启压缩与Tree Shaking。
|
||||
- 数据库:合理索引与查询优化,避免N+1查询;批量写入与事务合并可减少往返次数。
|
||||
|
||||
## 故障排查指南
|
||||
- 启动失败(后端):检查数据库与Redis健康状态,确认连接字符串与端口映射正确;查看Uvicorn日志与容器重启策略。
|
||||
- 认证异常:核对JWT密钥与过期时间配置;确认请求头携带正确的Bearer Token;检查依赖覆盖与用户mock是否生效。
|
||||
- 数据迁移问题:检查Alembic日志级别与钩子配置;确认数据库URL与凭据;必要时手动回滚或修复迁移脚本。
|
||||
- 前端样式异常:确认Tailwind content扫描路径与组件目录一致;清理.next缓存后重新构建。
|
||||
- 测试失败:确认pytest会话注入后端路径;检查调度器mock与依赖覆盖;使用异步HTTP客户端发起请求。
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:4-34](file://docker-compose.yml#L4-L34)
|
||||
- [backend/app/config.py:7-13](file://backend/app/config.py#L7-L13)
|
||||
- [tests/conftest.py:19-50](file://tests/conftest.py#L19-L50)
|
||||
- [backend/alembic.ini:115-150](file://backend/alembic.ini#L115-L150)
|
||||
- [frontend/tailwind.config.ts:5-9](file://frontend/tailwind.config.ts#L5-L9)
|
||||
|
||||
## 结论
|
||||
本指南基于仓库现有实现,给出了统一的代码规范、开发流程与工具使用建议。建议在后续迭代中补充更详细的Git分支策略、代码评审清单与发布流程文档,并持续完善测试覆盖率与性能监控体系。
|
||||
|
||||
## 附录
|
||||
|
||||
### 代码规范与最佳实践
|
||||
|
||||
- Python(后端)
|
||||
- 使用Pydantic v2进行数据校验与配置管理,字段约束与默认值清晰明确。
|
||||
- 异步编程:优先使用异步数据库与HTTP客户端,避免阻塞操作。
|
||||
- 错误处理:对外抛出HTTPException并设置合适的状态码与错误信息。
|
||||
- 模块化:API、Schema、Model、Service分层清晰,职责单一。
|
||||
- 配置:通过Pydantic Settings从.env加载配置,区分开发与生产环境。
|
||||
|
||||
- TypeScript(前端)
|
||||
- 严格模式开启,禁用输出JS,使用bundler解析模块,确保类型安全。
|
||||
- ESLint规则继承Next.js核心Web Vitals与TypeScript默认规则,保持一致性。
|
||||
- Tailwind按需扫描组件与页面目录,减少CSS体积;启用动画插件提升交互体验。
|
||||
- 路径别名@/*映射根目录,简化导入路径。
|
||||
|
||||
- 命名约定
|
||||
- Python:模块与类使用PascalCase;函数与变量使用snake_case;常量使用UPPER_CASE。
|
||||
- TypeScript:接口与类型使用PascalCase;变量与函数使用camelCase;枚举使用UPPER_CASE。
|
||||
|
||||
### 开发流程与工作流
|
||||
|
||||
- Git分支策略(建议)
|
||||
- 主分支:保护分支,仅允许通过PR合并。
|
||||
- 功能分支:feature/xxx,完成后合并到develop。
|
||||
- 发布分支:release/x.y.z,用于预发布与回归测试。
|
||||
- 热修复分支:hotfix/xxx,直接修改主分支并回放至develop。
|
||||
|
||||
- 代码评审(建议)
|
||||
- PR必须包含变更说明、测试用例与性能影响评估。
|
||||
- 至少一名Reviewer同意后方可合并。
|
||||
- 评审关注点:代码质量、安全性、可维护性与兼容性。
|
||||
|
||||
- 版本发布管理(建议)
|
||||
- 语义化版本:小版本用于新增功能,补丁版本用于修复。
|
||||
- 发布前:更新CHANGELOG,运行全量测试,检查依赖安全漏洞。
|
||||
- 发布后:同步文档与环境配置,监控线上指标。
|
||||
|
||||
### 开发工具使用方法
|
||||
|
||||
- IDE配置(建议)
|
||||
- VS Code:安装Python与TypeScript扩展,启用ESLint与Prettier;配置Python解释器为虚拟环境。
|
||||
- 前端:启用TypeScript智能提示与ESLint实时检查;Tailwind IntelliSense增强CSS类提示。
|
||||
|
||||
- 调试技巧
|
||||
- 后端:使用Uvicorn的reload选项热重载;在FastAPI中设置调试日志级别;利用依赖注入覆盖与mock替换真实外部服务。
|
||||
- 前端:使用Next.js dev模式热更新;在浏览器开发者工具中检查网络与状态;Tailwind调试辅助类辅助布局。
|
||||
|
||||
- 性能分析工具(建议)
|
||||
- 后端:使用cProfile或py-spy分析CPU与内存;结合APScheduler监控任务耗时。
|
||||
- 前端:使用Chrome DevTools Performance面板分析渲染与网络;使用Lighthouse评估SEO与可访问性。
|
||||
|
||||
### 新功能开发指导原则
|
||||
|
||||
- 模块设计
|
||||
- 遵循“API-Service-Model”三层架构,保持关注点分离。
|
||||
- 将业务逻辑封装在Service层,避免在API层直接操作数据库。
|
||||
|
||||
- 接口定义
|
||||
- 使用Pydantic模型定义请求与响应结构,明确字段类型与约束。
|
||||
- 对外暴露RESTful接口,遵循统一的前缀与标签组织路由。
|
||||
|
||||
- 测试要求
|
||||
- 单元测试:覆盖关键业务逻辑与边界条件。
|
||||
- 集成测试:使用pytest与AsyncClient发起HTTP请求,验证端到端流程。
|
||||
- Mock策略:对调度器、外部服务与数据库进行合理Mock,保证测试稳定性。
|
||||
|
||||
### 常见问题与解决方案
|
||||
|
||||
- 数据库连接失败
|
||||
- 检查PostgreSQL容器健康状态与端口映射;确认DATABASE_URL与凭据。
|
||||
- Redis连接失败
|
||||
- 检查Redis容器健康状态与端口映射;确认REDIS_URL。
|
||||
- Playwright无法启动浏览器
|
||||
- 确认Dockerfile中已安装Playwright浏览器与系统依赖;检查PLAYWRIGHT_BROWSERS_PATH。
|
||||
- CORS跨域问题
|
||||
- 核对CORS中间件配置的allow_origins与headers;确保前端域名与端口匹配。
|
||||
- JWT认证失败
|
||||
- 检查JWT_SECRET与过期时间;确认请求头Authorization格式为Bearer Token。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:30-36](file://backend/app/main.py#L30-L36)
|
||||
- [backend/app/config.py:9-13](file://backend/app/config.py#L9-L13)
|
||||
- [backend/Dockerfile:31-33](file://backend/Dockerfile#L31-L33)
|
||||
- [docker-compose.yml:4-20](file://docker-compose.yml#L4-L20)
|
||||
- [docker-compose.yml:22-34](file://docker-compose.yml#L22-L34)
|
||||
|
|
@ -0,0 +1,283 @@
|
|||
# 开发流程
|
||||
|
||||
<cite>
|
||||
**本文引用的文件**
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [backend/Dockerfile](file://backend/Dockerfile)
|
||||
- [frontend/Dockerfile](file://frontend/Dockerfile)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
- [frontend/package.json](file://frontend/package.json)
|
||||
- [tests/conftest.py](file://tests/conftest.py)
|
||||
- [backend/alembic.ini](file://backend/alembic.ini)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py)
|
||||
- [frontend/README.md](file://frontend/README.md)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖关系分析](#依赖关系分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件面向GEO项目的开发团队,提供一套完整的开发流程规范,涵盖Git分支策略与工作流、代码审查流程、版本发布管理、持续集成/持续部署(CI/CD)配置说明,以及开发环境搭建与团队协作最佳实践。内容基于仓库现有技术栈与容器化配置进行设计,确保前后端协同开发、数据库迁移与测试流程可追溯、可复现。
|
||||
|
||||
## 项目结构
|
||||
GEO采用前后端分离与容器编排的组织方式:
|
||||
- 前端:Next.js 14 应用,使用TypeScript与TailwindCSS,开发端口3000。
|
||||
- 后端:FastAPI + Uvicorn,使用异步数据库驱动与Redis缓存,开发端口8000。
|
||||
- 数据层:PostgreSQL 15 + Redis 7,通过Docker Compose统一编排。
|
||||
- 迁移与版本管理:Alembic用于数据库迁移脚本生成与执行。
|
||||
- 测试:pytest + pytest-asyncio,配合HTTP异步客户端与依赖注入覆盖。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "本地开发环境"
|
||||
FE["前端服务<br/>Next.js 3000"]
|
||||
BE["后端服务<br/>FastAPI 8000"]
|
||||
DB["数据库<br/>PostgreSQL 15"]
|
||||
RC["缓存<br/>Redis 7"]
|
||||
end
|
||||
FE --> |"HTTP 请求"| BE
|
||||
BE --> DB
|
||||
BE --> RC
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [docker-compose.yml:36-66](file://docker-compose.yml#L36-L66)
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
|
||||
## 核心组件
|
||||
- 前端工程
|
||||
- 使用Next.js应用路由与API路由,开发脚本与构建脚本在package.json中定义。
|
||||
- TypeScript与ESLint配置位于前端根目录,保证类型安全与代码风格一致。
|
||||
- 后端工程
|
||||
- FastAPI应用入口与异步数据库连接,依赖通过requirements.txt集中管理。
|
||||
- Alembic迁移配置与初始迁移脚本存在,支持数据库演进。
|
||||
- 测试工程
|
||||
- pytest会话级fixture用于屏蔽真实定时任务,提供模拟用户与认证头,便于快速集成测试。
|
||||
- 容器化与编排
|
||||
- Dockerfile分别构建前后端镜像;docker-compose统一启动数据库、缓存、后端与前端服务,并设置健康检查与依赖顺序。
|
||||
|
||||
章节来源
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [tests/conftest.py:1-71](file://tests/conftest.py#L1-L71)
|
||||
- [backend/alembic.ini:1-134](file://backend/alembic.ini#L1-L134)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py)
|
||||
|
||||
## 架构总览
|
||||
下图展示从浏览器到后端API再到数据库与缓存的整体调用链路,体现开发环境中的典型请求路径与数据流向。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Browser as "浏览器"
|
||||
participant Frontend as "前端应用"
|
||||
participant Backend as "后端API"
|
||||
participant DB as "数据库"
|
||||
participant Cache as "缓存"
|
||||
Browser->>Frontend : "访问页面"
|
||||
Frontend->>Backend : "发起HTTP请求"
|
||||
Backend->>DB : "查询/写入数据"
|
||||
DB-->>Backend : "返回结果"
|
||||
Backend->>Cache : "读取/更新缓存"
|
||||
Cache-->>Backend : "返回缓存命中/更新"
|
||||
Backend-->>Frontend : "响应JSON"
|
||||
Frontend-->>Browser : "渲染页面"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [docker-compose.yml:36-66](file://docker-compose.yml#L36-L66)
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### Git 分支策略与工作流
|
||||
建议采用Git Flow变体,结合Feature分支与Release分支,确保开发与发布的可控性与可追溯性:
|
||||
|
||||
- 主分支
|
||||
- main:仅允许来自release或hotfix的合并,保持生产就绪状态。
|
||||
- develop:集成各功能特性,作为后续release的基线。
|
||||
- 功能分支(feature)
|
||||
- 命名:feature/<issue-id>-短描述
|
||||
- 从develop切出,完成后合并回develop并删除分支。
|
||||
- 预发布分支(release)
|
||||
- 从develop切出,进行最终修复与回归测试,完成后合并至main与develop并打标签。
|
||||
- 热修复分支(hotfix)
|
||||
- 从main切出,紧急修复后同时合并回main与develop。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A["开始"] --> B["创建功能分支 feature/<id>-desc"]
|
||||
B --> C["提交到功能分支"]
|
||||
C --> D{"完成开发?"}
|
||||
D --> |否| C
|
||||
D --> |是| E["推送并发起PR到 develop"]
|
||||
E --> F["代码审查与测试通过"]
|
||||
F --> G["合并到 develop 并删除分支"]
|
||||
G --> H["准备 release 分支"]
|
||||
H --> I["最终测试与修复"]
|
||||
I --> J["合并到 main 与 develop 并打标签"]
|
||||
```
|
||||
|
||||
说明
|
||||
- PR需关联Issue,确保需求可追溯。
|
||||
- 合并前必须通过本地与CI测试,且无冲突。
|
||||
|
||||
### 代码审查流程
|
||||
- Pull Request 模板
|
||||
- 必填字段:标题(简述变更)、类型(修复/特性/重构/文档)、影响范围、变更摘要、测试要点、风险评估与回滚预案。
|
||||
- 关联信息:关联Issue编号、相关PR链接、模块负责人。
|
||||
- 审查标准
|
||||
- 正确性:单元测试与集成测试覆盖率达标;边界条件与异常处理完备。
|
||||
- 可读性:命名规范、注释清晰、函数长度与复杂度合理。
|
||||
- 兼容性:接口变更需向后兼容或提供迁移方案;数据库变更需可逆。
|
||||
- 安全性:输入校验、权限控制、敏感信息脱敏。
|
||||
- 合并要求
|
||||
- 至少一名审查者批准。
|
||||
- CI流水线通过。
|
||||
- 无未解决评论。
|
||||
- 合并后清理功能分支。
|
||||
|
||||
### 版本发布管理
|
||||
- 语义化版本控制
|
||||
- 主版本号:破坏性变更
|
||||
- 次版本号:新增兼容功能
|
||||
- 修订号:修复兼容问题
|
||||
- 变更日志维护
|
||||
- 按版本记录:新增、修复、改进、废弃与破坏性变更。
|
||||
- 引用相关PR与Issue,便于追溯。
|
||||
- 发布标签
|
||||
- 在release分支合并后于main打标签,格式:vMAJOR.MINOR.REVISION。
|
||||
- 为每个版本生成对应Docker镜像并推送到制品库。
|
||||
|
||||
### 持续集成/持续部署(CI/CD)
|
||||
当前仓库未包含CI配置文件,建议在CI系统中实现以下流程:
|
||||
- 触发条件
|
||||
- push到develop/release/main/hotfix或打开PR时触发。
|
||||
- 步骤
|
||||
- 安装依赖:后端pip、前端npm ci。
|
||||
- 代码质量:后端使用ruff/black,前端使用ESLint。
|
||||
- 单元测试:pytest与pytest-asyncio,生成覆盖率报告。
|
||||
- 容器构建:分别构建后端与前端镜像。
|
||||
- 健康检查:启动docker-compose并在数据库/缓存健康后运行端到端测试。
|
||||
- 制品归档:上传测试报告与镜像。
|
||||
- 部署
|
||||
- develop:自动部署到预发布环境。
|
||||
- release:手动审批后部署到生产环境。
|
||||
- hotfix:自动部署到生产并回补到develop。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
S["提交/PR触发"] --> L["安装依赖与代码质量检查"]
|
||||
L --> T["运行单元测试与覆盖率"]
|
||||
T --> B["构建后端/前端镜像"]
|
||||
B --> D["启动容器并健康检查"]
|
||||
D --> E["端到端测试"]
|
||||
E --> R{"是否发布?"}
|
||||
R --> |否| P["归档制品与报告"]
|
||||
R --> |是| DEP["部署到目标环境"]
|
||||
DEP --> TAG["打标签并发布"]
|
||||
```
|
||||
|
||||
说明
|
||||
- 以上为通用CI/CD流程建议,具体实现需在CI平台配置相应作业与工件。
|
||||
|
||||
### 开发环境搭建与团队协作最佳实践
|
||||
- 环境准备
|
||||
- 安装Docker与Docker Compose,确保端口未被占用。
|
||||
- 复制示例环境变量文件并按需调整数据库与缓存参数。
|
||||
- 启动步骤
|
||||
- 后端:进入backend目录,安装依赖并启动服务;或使用docker-compose一键启动。
|
||||
- 前端:进入frontend目录,安装依赖并启动开发服务器。
|
||||
- 团队协作
|
||||
- 统一代码风格:后端使用ruff/black,前端使用ESLint/TailwindCSS。
|
||||
- 提交信息规范:type(scope): subject,如feat(api): 新增用户认证接口。
|
||||
- 冲突解决:优先rebase保持线性历史,必要时使用merge并保留解决记录。
|
||||
- 文档同步:变更涉及用户界面或API时同步更新README与变更日志。
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [frontend/README.md:1-37](file://frontend/README.md#L1-L37)
|
||||
|
||||
## 依赖关系分析
|
||||
- 技术栈依赖
|
||||
- 前端:Next.js、React、TypeScript、TailwindCSS、Radix UI等。
|
||||
- 后端:FastAPI、SQLAlchemy、AsyncPG、Alembic、Redis、APScheduler、Playwright等。
|
||||
- 容器与编排
|
||||
- docker-compose定义了数据库、缓存、后端与前端服务的依赖关系与健康检查。
|
||||
- 测试依赖
|
||||
- pytest与pytest-asyncio用于异步测试;ASGI传输用于HTTP客户端测试;依赖注入覆盖用于模拟认证上下文。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
FE["前端应用"] --> |HTTP| BE["后端API"]
|
||||
BE --> DB["PostgreSQL"]
|
||||
BE --> RC["Redis"]
|
||||
BE --> AL["Alembic 迁移"]
|
||||
TEST["pytest 测试"] --> BE
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [docker-compose.yml:36-66](file://docker-compose.yml#L36-L66)
|
||||
- [tests/conftest.py:1-71](file://tests/conftest.py#L1-L71)
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [tests/conftest.py:1-71](file://tests/conftest.py#L1-L71)
|
||||
- [backend/alembic.ini:1-134](file://backend/alembic.ini#L1-L134)
|
||||
|
||||
## 性能考虑
|
||||
- 数据库
|
||||
- 使用异步驱动与连接池,避免阻塞;迁移脚本应最小化DDL变更,避免长事务锁表。
|
||||
- 缓存
|
||||
- 合理设置TTL与键空间,热点数据优先缓存;定期清理过期键。
|
||||
- API
|
||||
- 控制响应大小与分页;对高频接口启用缓存;限制并发与速率。
|
||||
- 前端
|
||||
- 图片与静态资源优化;按需加载;TailwindCSS类名避免冗余。
|
||||
- 容器
|
||||
- 使用多阶段构建减少镜像体积;健康检查降低重启时间;资源限制防止资源争用。
|
||||
|
||||
## 故障排查指南
|
||||
- 本地服务无法启动
|
||||
- 检查端口占用与防火墙;确认数据库与缓存服务已健康运行。
|
||||
- 数据库迁移失败
|
||||
- 查看Alembic配置与URL;确认迁移脚本语法正确;必要时回滚至上一版本。
|
||||
- 测试异常
|
||||
- 确认pytest会话级fixture已正确屏蔽定时任务;检查依赖注入覆盖是否生效。
|
||||
- 前后端联调失败
|
||||
- 核对CORS与代理配置;确认API路由与鉴权头正确传递。
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:16-20](file://docker-compose.yml#L16-L20)
|
||||
- [docker-compose.yml:30-34](file://docker-compose.yml#L30-L34)
|
||||
- [backend/alembic.ini:86-89](file://backend/alembic.ini#L86-L89)
|
||||
- [tests/conftest.py:19-25](file://tests/conftest.py#L19-L25)
|
||||
|
||||
## 结论
|
||||
通过明确的分支策略、严格的代码审查流程、规范的版本发布管理与可落地的CI/CD实践,GEO项目能够在保障交付质量的同时提升团队协作效率。建议尽快在CI平台落地上述流程,并根据实际运行情况迭代优化。
|
||||
|
||||
## 附录
|
||||
- 快速启动命令
|
||||
- docker-compose:启动所有服务并进入交互模式。
|
||||
- 前端:进入frontend目录,安装依赖后启动开发服务器。
|
||||
- 后端:进入backend目录,安装依赖后启动Uvicorn服务。
|
||||
- 参考文档
|
||||
- Next.js官方文档与部署指南。
|
||||
- FastAPI与SQLAlchemy异步数据库实践。
|
||||
- Alembic迁移最佳实践。
|
||||
|
|
@ -0,0 +1,359 @@
|
|||
# 新功能开发
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/schemas/auth.py](file://backend/app/schemas/auth.py)
|
||||
- [backend/app/schemas/query.py](file://backend/app/schemas/query.py)
|
||||
- [backend/app/models/user.py](file://backend/app/models/user.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/services/auth.py](file://backend/app/services/auth.py)
|
||||
- [backend/app/services/query.py](file://backend/app/services/query.py)
|
||||
- [frontend/app/layout.tsx](file://frontend/app/layout.tsx)
|
||||
- [frontend/components/providers.tsx](file://frontend/components/providers.tsx)
|
||||
- [frontend/lib/api.ts](file://frontend/lib/api.ts)
|
||||
- [tests/test_auth.py](file://tests/test_auth.py)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本指导文档面向在 GEO 平台进行新功能开发的工程师,系统阐述后端 API 模块设计原则、接口定义与依赖管理;前端页面开发规范(页面组织、组件设计、状态管理);数据库模型设计指导(表结构、关系映射、索引策略);以及测试驱动开发(TDD)最佳实践与单元测试编写指南。文档以现有代码库为依据,结合可扩展性与一致性原则,帮助团队高效、安全地交付高质量功能。
|
||||
|
||||
## 项目结构
|
||||
- 后端采用 FastAPI + SQLAlchemy 异步 ORM 架构,按领域分层组织:API 路由、服务层、模型层、模式层(Pydantic)、依赖注入与工具。
|
||||
- 前端采用 Next.js App Router,使用 TypeScript、TailwindCSS 与 NextAuth 管理会话状态,通过统一的 API 客户端封装调用。
|
||||
- 测试覆盖后端 API 与服务逻辑,使用 pytest 异步客户端与依赖覆盖机制。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "前端"
|
||||
FE_App["Next.js 应用<br/>根布局与全局样式"]
|
||||
FE_Providers["会话提供者<br/>NextAuth Provider"]
|
||||
FE_API["API 客户端<br/>统一请求封装"]
|
||||
end
|
||||
subgraph "后端"
|
||||
BE_Main["FastAPI 应用<br/>生命周期与中间件"]
|
||||
BE_Routers["API 路由<br/>认证/查询/引用/报告"]
|
||||
BE_Services["服务层<br/>业务逻辑"]
|
||||
BE_Models["模型层<br/>SQLAlchemy ORM"]
|
||||
BE_Schemas["模式层<br/>Pydantic 校验"]
|
||||
end
|
||||
FE_App --> FE_Providers
|
||||
FE_API --> BE_Routers
|
||||
BE_Main --> BE_Routers
|
||||
BE_Routers --> BE_Services
|
||||
BE_Services --> BE_Models
|
||||
BE_Services --> BE_Schemas
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [frontend/app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [frontend/components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [frontend/app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [frontend/components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
## 核心组件
|
||||
- 后端应用与生命周期:定义 CORS、路由注册、健康检查与任务调度器启动/关闭。
|
||||
- 认证模块:注册、登录、当前用户信息,配合 JWT 令牌与密码哈希。
|
||||
- 查询模块:CRUD、分页、权限校验(用户配额限制)、频率变更触发下次执行时间重算。
|
||||
- 数据模型:用户、查询、引用记录、查询任务、订阅等,定义字段、外键与索引。
|
||||
- 前端布局与会话:根布局、全局样式、NextAuth Provider 包裹。
|
||||
- 前端 API 客户端:统一封装鉴权头、错误处理与各模块接口。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [frontend/app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [frontend/components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
## 架构总览
|
||||
后端通过 FastAPI 聚合多个领域路由,每个路由对应一个服务层函数,服务层通过 SQLAlchemy 异步会话访问数据库模型,并使用 Pydantic 模式进行输入输出校验。前端通过统一 API 客户端发起请求,自动附加鉴权头,错误时统一解析并抛出。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant FE as "前端页面"
|
||||
participant API as "API 客户端"
|
||||
participant Router as "FastAPI 路由"
|
||||
participant Service as "服务层"
|
||||
participant Model as "模型层(SQLAlchemy)"
|
||||
participant DB as "数据库"
|
||||
FE->>API : "调用接口(带鉴权头)"
|
||||
API->>Router : "HTTP 请求"
|
||||
Router->>Service : "调用业务方法"
|
||||
Service->>Model : "ORM 查询/更新"
|
||||
Model->>DB : "异步 SQL 执行"
|
||||
DB-->>Model : "结果集"
|
||||
Model-->>Service : "领域对象"
|
||||
Service-->>Router : "返回结果"
|
||||
Router-->>API : "JSON 响应"
|
||||
API-->>FE : "解析并渲染"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/services/query.py:1-130](file://backend/app/services/query.py#L1-L130)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 后端模块设计原则与依赖管理
|
||||
- 模块划分
|
||||
- API 层:定义路由、参数校验、异常映射与响应模型。
|
||||
- 服务层:封装业务规则、权限控制与跨模型操作。
|
||||
- 模型层:定义表结构、关系与索引。
|
||||
- 模式层:Pydantic 模型用于请求/响应序列化与校验。
|
||||
- 接口定义
|
||||
- 统一前缀与标签:如 /api/v1/auth、/api/v1/queries 等。
|
||||
- 参数校验:Query 分页参数、Pydantic 字段长度与枚举校验。
|
||||
- 错误处理:HTTP 状态码与错误详情映射。
|
||||
- 依赖管理
|
||||
- 依赖注入:数据库会话、当前用户通过 Depends 获取。
|
||||
- 生命周期:应用启动时初始化模型与调度器,关闭时优雅退出。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/schemas/auth.py:1-34](file://backend/app/schemas/auth.py#L1-L34)
|
||||
- [backend/app/schemas/query.py:1-94](file://backend/app/schemas/query.py#L1-L94)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/services/query.py:1-130](file://backend/app/services/query.py#L1-L130)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
|
||||
### 认证模块(API/服务/模式)
|
||||
- 路由设计
|
||||
- 注册:接收注册体,返回用户信息(201)。
|
||||
- 登录:校验凭据,签发 JWT,返回令牌与用户信息。
|
||||
- 当前用户:基于依赖注入获取已认证用户。
|
||||
- 处理逻辑
|
||||
- 密码哈希与校验、JWT 过期时间配置、邮箱唯一性检查。
|
||||
- 错误处理
|
||||
- 注册重复邮箱、登录凭据无效、未授权访问。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as "客户端"
|
||||
participant R as "认证路由"
|
||||
participant S as "认证服务"
|
||||
participant U as "用户模型"
|
||||
participant T as "JWT/密码库"
|
||||
C->>R : "POST /api/v1/auth/register"
|
||||
R->>S : "register_user()"
|
||||
S->>U : "查询邮箱是否已存在"
|
||||
U-->>S : "不存在"
|
||||
S->>T : "hash_password()"
|
||||
S->>U : "创建用户并持久化"
|
||||
U-->>S : "返回用户"
|
||||
S-->>R : "用户信息"
|
||||
R-->>C : "201 + 用户"
|
||||
C->>R : "POST /api/v1/auth/login"
|
||||
R->>S : "authenticate_user()"
|
||||
S->>U : "查找用户"
|
||||
U-->>S : "用户对象"
|
||||
S->>T : "verify_password()"
|
||||
T-->>S : "匹配成功"
|
||||
S-->>R : "用户+令牌"
|
||||
R-->>C : "200 + 令牌"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/schemas/auth.py:1-34](file://backend/app/schemas/auth.py#L1-L34)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/schemas/auth.py:1-34](file://backend/app/schemas/auth.py#L1-L34)
|
||||
|
||||
### 查询模块(API/服务/模式/模型)
|
||||
- 路由设计
|
||||
- 列表:支持 skip/limit 分页,返回 items 与 total。
|
||||
- 创建:校验平台与频率,检查用户配额,计算 next_query_at。
|
||||
- 读取/更新/删除:基于 query_id 与 user_id 的权限校验。
|
||||
- 处理逻辑
|
||||
- 频率变更时重算下次执行时间。
|
||||
- 删除级联清理关联任务与引用记录。
|
||||
- 错误处理
|
||||
- 403 配额超限、404 未找到、401 未授权。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["创建查询"]) --> Count["统计用户查询数量"]
|
||||
Count --> CheckLimit{"是否超过配额?"}
|
||||
CheckLimit --> |是| Raise403["抛出权限错误(403)"]
|
||||
CheckLimit --> |否| CalcNext["根据频率计算 next_query_at"]
|
||||
CalcNext --> Persist["持久化查询记录"]
|
||||
Persist --> Done(["完成"])
|
||||
Raise403 --> End(["结束"])
|
||||
Done --> End
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/services/query.py:45-81](file://backend/app/services/query.py#L45-L81)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/services/query.py:1-130](file://backend/app/services/query.py#L1-L130)
|
||||
- [backend/app/schemas/query.py:1-94](file://backend/app/schemas/query.py#L1-L94)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
|
||||
### 前端页面开发规范
|
||||
- 页面组织
|
||||
- 使用 Next.js App Router 的分组路由与布局,根布局统一注入 Provider。
|
||||
- 组件设计
|
||||
- UI 组件库化,遵循语义化与可复用性;页面组件负责数据获取与展示。
|
||||
- 状态管理
|
||||
- 使用 NextAuth 提供的会话状态,通过 Provider 在应用根部注入。
|
||||
- API 调用
|
||||
- 统一的 api.ts 封装:自动添加 Content-Type 与 Authorization 头,统一错误处理与 JSON 解析。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
Root["根布局<br/>layout.tsx"] --> Providers["会话提供者<br/>providers.tsx"]
|
||||
Providers --> Pages["页面与组件"]
|
||||
Pages --> API["API 客户端<br/>api.ts"]
|
||||
API --> Backend["后端 API"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [frontend/app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [frontend/components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
章节来源
|
||||
- [frontend/app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [frontend/components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
### 数据库模型设计指导
|
||||
- 表结构设计
|
||||
- 使用 UUID 主键,字符串长度约束,布尔与整数字段合理设置默认值。
|
||||
- JSONB 字段用于存储动态列表(品牌别名、平台列表)。
|
||||
- 关系映射
|
||||
- 外键约束与级联删除(如用户删除时级联删除其查询与订阅)。
|
||||
- 双向关系与回溯属性(back_populates)。
|
||||
- 索引策略
|
||||
- 对高频过滤字段建立复合或单列索引(如 user_id、status、next_query_at)。
|
||||
|
||||
章节来源
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
|
||||
### 测试驱动开发(TDD)最佳实践与单元测试编写指南
|
||||
- 设计原则
|
||||
- 先写失败的测试,再实现最小逻辑使其通过;关注边界条件与错误路径。
|
||||
- 使用依赖覆盖(dependency override)隔离外部依赖,确保测试可重复。
|
||||
- 单元测试编写
|
||||
- 使用 pytest 异步客户端发起请求,断言状态码与响应结构。
|
||||
- Mock 服务层或模型层行为,验证路由层的参数传递与异常映射。
|
||||
- 示例参考
|
||||
- 认证模块测试覆盖注册成功/重复邮箱、登录成功/凭据错误、当前用户接口的鉴权场景。
|
||||
|
||||
章节来源
|
||||
- [tests/test_auth.py:1-104](file://tests/test_auth.py#L1-L104)
|
||||
|
||||
## 依赖分析
|
||||
- 后端模块耦合
|
||||
- API 路由依赖服务层;服务层依赖模型层与配置;模式层独立于业务逻辑。
|
||||
- 依赖注入降低耦合,便于替换与测试。
|
||||
- 前后端交互
|
||||
- 前端通过统一 API 客户端与后端路由通信,鉴权头由 NextAuth 管理。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
API_Auth["认证路由"] --> Svc_Auth["认证服务"]
|
||||
API_Queries["查询路由"] --> Svc_Query["查询服务"]
|
||||
Svc_Auth --> Model_User["用户模型"]
|
||||
Svc_Query --> Model_Query["查询模型"]
|
||||
FE_API["前端 API 客户端"] --> API_Auth
|
||||
FE_API --> API_Queries
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/services/query.py:1-130](file://backend/app/services/query.py#L1-L130)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/services/query.py:1-130](file://backend/app/services/query.py#L1-L130)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
## 性能考虑
|
||||
- 数据库层面
|
||||
- 为高频查询字段建立索引,避免全表扫描;对分页与排序字段保持索引覆盖。
|
||||
- 使用批量查询与延迟加载策略,减少 N+1 查询风险。
|
||||
- 服务层层面
|
||||
- 合理拆分查询与计数,避免不必要的 COUNT 开销;缓存热点数据(如平台白名单)。
|
||||
- API 层面
|
||||
- 控制分页大小上限,防止过大数据量返回;对敏感字段进行选择性序列化。
|
||||
- 前端层面
|
||||
- 使用虚拟列表与懒加载优化长列表渲染;统一错误提示与加载态,提升用户体验。
|
||||
|
||||
## 故障排查指南
|
||||
- 常见问题定位
|
||||
- 401 未授权:确认鉴权头是否正确传递与令牌是否过期。
|
||||
- 403 权限错误:检查用户配额与资源归属(user_id)。
|
||||
- 404 未找到:核对主键与所属用户 ID 是否匹配。
|
||||
- 400 参数错误:检查 Pydantic 校验规则与枚举值。
|
||||
- 日志与监控
|
||||
- 后端开启路由级日志与异常捕获;前端统一错误上报与用户提示。
|
||||
- 回滚与恢复
|
||||
- 数据库迁移版本化管理;关键变更需配套回滚脚本与数据备份。
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
## 结论
|
||||
通过清晰的模块划分、严格的依赖注入、完善的模式校验与统一的前后端交互规范,GEO 平台具备良好的扩展性与可维护性。建议在新增功能时遵循本文档的设计原则与测试实践,确保功能快速落地且质量可控。
|
||||
|
||||
## 附录
|
||||
- 新增功能开发清单
|
||||
- 明确领域边界与模块职责,避免跨模块耦合。
|
||||
- 编写路由与模式定义,确保参数与响应结构清晰。
|
||||
- 实现服务层业务逻辑,覆盖边界条件与异常路径。
|
||||
- 设计数据库模型与索引,评估性能影响。
|
||||
- 编写单元测试与集成测试,保证功能正确性。
|
||||
- 前端页面与组件开发,统一状态管理与错误处理。
|
||||
- 文档与回归测试同步推进,保障发布质量。
|
||||
|
|
@ -0,0 +1,454 @@
|
|||
# 快速开始
|
||||
|
||||
<cite>
|
||||
**本文引用的文件**
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [backend/Dockerfile](file://backend/Dockerfile)
|
||||
- [frontend/Dockerfile](file://frontend/Dockerfile)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
- [frontend/package.json](file://frontend/package.json)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [backend/app/models/user.py](file://backend/app/models/user.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [frontend/lib/api.ts](file://frontend/lib/api.ts)
|
||||
- [frontend/README.md](file://frontend/README.md)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排除指南](#故障排除指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本指南面向新开发者,帮助你在最短时间内完成 GEO 平台的本地开发与容器化部署,并掌握核心功能的使用方式。你将学会:
|
||||
- 环境要求与依赖安装
|
||||
- 本地开发环境配置
|
||||
- Docker 容器化部署全流程
|
||||
- 基本使用示例:用户注册/登录、创建查询任务、查看引用数据
|
||||
- 调试方法与常见问题解决
|
||||
|
||||
## 项目结构
|
||||
项目采用前后端分离架构,后端基于 FastAPI,前端基于 Next.js,数据库使用 PostgreSQL,缓存使用 Redis,任务调度与浏览器自动化通过 APScheduler 和 Playwright 实现。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "容器编排"
|
||||
DC["docker-compose.yml"]
|
||||
end
|
||||
subgraph "后端服务"
|
||||
BK_MAIN["backend/app/main.py"]
|
||||
BK_CONF["backend/app/config.py"]
|
||||
BK_AUTH["backend/app/api/auth.py"]
|
||||
BK_QUERIES["backend/app/api/queries.py"]
|
||||
BK_CIT["backend/app/api/citations.py"]
|
||||
BK_MODELS["backend/app/models/*"]
|
||||
BK_REQ["backend/requirements.txt"]
|
||||
BK_DOCK["backend/Dockerfile"]
|
||||
end
|
||||
subgraph "前端服务"
|
||||
FE_API["frontend/lib/api.ts"]
|
||||
FE_PKG["frontend/package.json"]
|
||||
FE_README["frontend/README.md"]
|
||||
FE_DOCK["frontend/Dockerfile"]
|
||||
end
|
||||
DB["PostgreSQL 容器"]
|
||||
RDS["Redis 容器"]
|
||||
DC --> DB
|
||||
DC --> RDS
|
||||
DC --> BK_MAIN
|
||||
DC --> FE_API
|
||||
BK_MAIN --> BK_AUTH
|
||||
BK_MAIN --> BK_QUERIES
|
||||
BK_MAIN --> BK_CIT
|
||||
BK_MAIN --> BK_MODELS
|
||||
BK_MAIN --> BK_CONF
|
||||
FE_API --> BK_MAIN
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [frontend/README.md:1-37](file://frontend/README.md#L1-L37)
|
||||
|
||||
## 核心组件
|
||||
- 后端 API(FastAPI):提供认证、查询词管理、引用数据与报告相关接口;内置健康检查端点。
|
||||
- 前端应用(Next.js):通过统一的 API 封装调用后端接口,支持注册、登录、查询列表、引用数据与导出。
|
||||
- 数据库(PostgreSQL):存储用户、查询词、引用记录等业务数据。
|
||||
- 缓存与任务(Redis + APScheduler + Playwright):用于任务调度与浏览器自动化抓取。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:24-47](file://backend/app/main.py#L24-L47)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [backend/app/config.py:7-13](file://backend/app/config.py#L7-L13)
|
||||
|
||||
## 架构总览
|
||||
下图展示了容器化部署时各服务之间的交互关系与启动顺序。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "网络"
|
||||
L3000["前端: http://localhost:3000"]
|
||||
L8000["后端: http://localhost:8000"]
|
||||
end
|
||||
subgraph "容器"
|
||||
DB["Postgres:5432"]
|
||||
RDS["Redis:6379"]
|
||||
BE["后端容器"]
|
||||
FE["前端容器"]
|
||||
end
|
||||
L3000 --> FE
|
||||
FE --> BE
|
||||
BE --> DB
|
||||
BE --> RDS
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [docker-compose.yml:3-66](file://docker-compose.yml#L3-L66)
|
||||
- [backend/app/main.py:30-36](file://backend/app/main.py#L30-L36)
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/app/main.py:24-47](file://backend/app/main.py#L24-L47)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 后端 API 组件
|
||||
- 应用入口与生命周期:在应用启动时初始化模型与调度器,在关闭时优雅停机。
|
||||
- 中间件:启用 CORS,允许前端域名访问。
|
||||
- 路由:认证、查询词、引用数据、报告等模块路由均已挂载。
|
||||
- 健康检查:提供 /health 探针。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as "客户端"
|
||||
participant F as "前端(Next.js)"
|
||||
participant B as "后端(FastAPI)"
|
||||
participant D as "数据库(PostgreSQL)"
|
||||
participant Q as "调度器(APScheduler)"
|
||||
C->>F : 打开页面
|
||||
F->>B : GET /health
|
||||
B-->>F : {"status" : "ok"}
|
||||
Note over B,Q : 应用启动时启动调度器
|
||||
B->>Q : 启动任务调度
|
||||
Q->>D : 按计划执行查询
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:13-21](file://backend/app/main.py#L13-L21)
|
||||
- [backend/app/main.py:45-47](file://backend/app/main.py#L45-L47)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
|
||||
### 认证与用户模型
|
||||
- 认证接口:注册、登录、获取当前用户信息。
|
||||
- 用户模型:包含邮箱、密码哈希、名称、订阅计划、配额、活跃状态及时间戳字段。
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class User {
|
||||
+UUID id
|
||||
+string email
|
||||
+string password_hash
|
||||
+string name
|
||||
+string plan
|
||||
+int max_queries
|
||||
+bool is_active
|
||||
+datetime created_at
|
||||
+datetime updated_at
|
||||
}
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
|
||||
### 查询词与引用数据
|
||||
- 查询词接口:分页列出、创建、更新、删除查询词。
|
||||
- 引用数据接口:分页查询引用记录、统计信息、立即触发查询任务。
|
||||
- 查询词模型:包含关键词、目标品牌、别名、平台集合、频率、状态、下次执行时间等。
|
||||
- 引用记录模型:包含平台、是否被引、位置、文本、竞品品牌、原始响应、抓取时间等。
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
USER ||--o{ QUERY : "拥有"
|
||||
QUERY ||--o{ CITATION_RECORD : "产生"
|
||||
USER {
|
||||
uuid id
|
||||
string email
|
||||
string name
|
||||
}
|
||||
QUERY {
|
||||
uuid id
|
||||
uuid user_id
|
||||
string keyword
|
||||
string target_brand
|
||||
json brand_aliases
|
||||
json platforms
|
||||
string frequency
|
||||
string status
|
||||
timestamp last_queried_at
|
||||
timestamp next_query_at
|
||||
}
|
||||
CITATION_RECORD {
|
||||
uuid id
|
||||
uuid query_id
|
||||
string platform
|
||||
bool cited
|
||||
int citation_position
|
||||
text citation_text
|
||||
json competitor_brands
|
||||
text raw_response
|
||||
timestamp queried_at
|
||||
}
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
|
||||
### 前端 API 封装
|
||||
- 基础地址:优先读取环境变量 NEXT_PUBLIC_API_URL,否则回退到本地后端地址。
|
||||
- 认证封装:自动注入 Authorization Bearer Token。
|
||||
- 接口覆盖:认证、查询词、引用数据、报告导出等。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["发起 API 请求"]) --> BuildHeaders["构建请求头<br/>含 Content-Type 与可选 Authorization"]
|
||||
BuildHeaders --> FetchCall["fetch 发起请求"]
|
||||
FetchCall --> RespOk{"响应成功?"}
|
||||
RespOk --> |是| ParseJson["解析 JSON"]
|
||||
RespOk --> |否| ThrowErr["抛出错误(包含 HTTP 状态或详情)"]
|
||||
ParseJson --> End(["返回数据"])
|
||||
ThrowErr --> End
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [frontend/lib/api.ts:3-21](file://frontend/lib/api.ts#L3-L21)
|
||||
|
||||
章节来源
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
## 依赖分析
|
||||
- 后端依赖:Web 框架、数据库 ORM、异步驱动、配置校验、认证与安全、缓存与任务调度、浏览器自动化、HTTP 客户端、测试工具等。
|
||||
- 前端依赖:Next.js、UI 组件库、样式与类型工具等。
|
||||
- 容器镜像:后端基于 Python slim 镜像,安装 Playwright 依赖;前端基于 Node Alpine 镜像,使用 npm ci 安装依赖。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
subgraph "后端"
|
||||
PY["Python 3.11"]
|
||||
REQ["requirements.txt"]
|
||||
PLY["Playwright(chromium)"]
|
||||
end
|
||||
subgraph "前端"
|
||||
NODE["Node 20"]
|
||||
PKG["package.json"]
|
||||
end
|
||||
REQ --> PY
|
||||
PLY --> PY
|
||||
PKG --> NODE
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [backend/Dockerfile:6-33](file://backend/Dockerfile#L6-L33)
|
||||
- [frontend/Dockerfile:6-7](file://frontend/Dockerfile#L6-L7)
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
|
||||
## 性能考虑
|
||||
- 数据库索引:查询词与引用记录表均建立复合索引以优化分页与过滤查询。
|
||||
- 异步与连接池:使用异步 SQLAlchemy 与异步 PostgreSQL 驱动,提升并发处理能力。
|
||||
- 任务调度:APSCheduler 在后台按计划执行查询,避免阻塞主请求线程。
|
||||
- 前端缓存:合理利用浏览器缓存与 Next.js 的静态资源优化策略。
|
||||
|
||||
章节来源
|
||||
- [backend/app/models/query.py:50-54](file://backend/app/models/query.py#L50-L54)
|
||||
- [backend/app/models/citation_record.py:37-41](file://backend/app/models/citation_record.py#L37-L41)
|
||||
|
||||
## 故障排除指南
|
||||
- 健康检查失败
|
||||
- 现象:访问 /health 返回异常或容器反复重启。
|
||||
- 排查:确认数据库与 Redis 健康检查通过;检查后端日志输出。
|
||||
- 参考:后端健康检查端点与容器编排健康检查配置。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:45-47](file://backend/app/main.py#L45-L47)
|
||||
- [docker-compose.yml:16-20](file://docker-compose.yml#L16-L20)
|
||||
- [docker-compose.yml:30-34](file://docker-compose.yml#L30-L34)
|
||||
|
||||
- 前端无法访问后端接口
|
||||
- 现象:前端控制台出现跨域错误或 404。
|
||||
- 排查:确认后端 CORS 允许前端域名;确认前端 API 基础地址正确;确认后端端口映射为 8000。
|
||||
- 参考:CORS 配置与前端 API 基础地址。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:30-36](file://backend/app/main.py#L30-L36)
|
||||
- [frontend/lib/api.ts:1-1](file://frontend/lib/api.ts#L1-L1)
|
||||
|
||||
- 数据库连接失败
|
||||
- 现象:后端启动时报数据库连接错误。
|
||||
- 排查:确认数据库容器已就绪且凭据正确;检查 .env 或环境变量;确认数据库端口映射为 5432。
|
||||
- 参考:数据库默认连接串与容器编排。
|
||||
|
||||
章节来源
|
||||
- [backend/app/config.py:7-7](file://backend/app/config.py#L7-L7)
|
||||
- [docker-compose.yml:8-13](file://docker-compose.yml#L8-L13)
|
||||
|
||||
- Redis 连接失败
|
||||
- 现象:任务调度或缓存相关功能异常。
|
||||
- 排查:确认 Redis 容器已就绪;检查 REDIS_URL;确认端口映射为 6379。
|
||||
- 参考:Redis 默认连接串与容器编排。
|
||||
|
||||
章节来源
|
||||
- [backend/app/config.py:8-8](file://backend/app/config.py#L8-L8)
|
||||
- [docker-compose.yml:22-34](file://docker-compose.yml#L22-L34)
|
||||
|
||||
- Playwright 报错(浏览器不可用)
|
||||
- 现象:抓取任务失败,提示浏览器相关错误。
|
||||
- 排查:确认容器内已安装 Playwright 依赖;检查浏览器路径配置;必要时重建后端镜像。
|
||||
- 参考:后端 Dockerfile 中 Playwright 安装步骤。
|
||||
|
||||
章节来源
|
||||
- [backend/Dockerfile:31-33](file://backend/Dockerfile#L31-L33)
|
||||
|
||||
## 结论
|
||||
通过本指南,你可以完成 GEO 平台的环境准备、依赖安装与容器化部署,并快速上手核心功能。建议在本地开发时结合健康检查与日志排查,逐步验证认证、查询与引用数据链路,再进行更复杂的任务调度与导出功能验证。
|
||||
|
||||
## 附录
|
||||
|
||||
### 环境要求
|
||||
- 操作系统:Linux/macOS/Windows(WSL2)
|
||||
- 容器运行时:Docker Engine 与 Compose 插件
|
||||
- 前端运行时:Node.js 20.x
|
||||
- 后端运行时:Python 3.11
|
||||
|
||||
章节来源
|
||||
- [backend/Dockerfile:1-1](file://backend/Dockerfile#L1-L1)
|
||||
- [frontend/Dockerfile:1-1](file://frontend/Dockerfile#L1-L1)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
|
||||
### 依赖安装步骤
|
||||
- 后端依赖
|
||||
- 使用 pip 安装 requirements.txt 中的包。
|
||||
- 安装 Playwright 浏览器与系统依赖。
|
||||
- 前端依赖
|
||||
- 使用 npm ci 安装 package.json 中的包。
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [backend/Dockerfile:6-33](file://backend/Dockerfile#L6-L33)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [frontend/Dockerfile:6-7](file://frontend/Dockerfile#L6-L7)
|
||||
|
||||
### 本地开发环境配置
|
||||
- 后端
|
||||
- 设置数据库与 Redis 连接字符串(默认来自配置类)。
|
||||
- 启动后端服务,监听 8000 端口。
|
||||
- 前端
|
||||
- 设置 NEXT_PUBLIC_API_URL 指向后端地址。
|
||||
- 启动开发服务器,监听 3000 端口。
|
||||
|
||||
章节来源
|
||||
- [backend/app/config.py:7-13](file://backend/app/config.py#L7-L13)
|
||||
- [backend/app/main.py:24-47](file://backend/app/main.py#L24-L47)
|
||||
- [frontend/lib/api.ts:1-1](file://frontend/lib/api.ts#L1-L1)
|
||||
- [frontend/README.md:5-15](file://frontend/README.md#L5-L15)
|
||||
|
||||
### Docker 容器化部署流程
|
||||
- 步骤 1:准备环境
|
||||
- 确保 Docker 已安装并运行。
|
||||
- 步骤 2:构建镜像
|
||||
- 后端镜像:基于 Python slim,安装系统与 Python 依赖,预装 Playwright。
|
||||
- 前端镜像:基于 Node Alpine,使用 npm ci 安装依赖。
|
||||
- 步骤 3:启动容器
|
||||
- 使用 docker-compose 启动 db、redis、backend、frontend 四个服务。
|
||||
- 等待数据库与 Redis 健康检查通过后,后端与前端启动完成。
|
||||
- 步骤 4:访问应用
|
||||
- 前端:http://localhost:3000
|
||||
- 后端:http://localhost:8000
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
|
||||
### 基本使用示例
|
||||
- 用户注册与登录
|
||||
- 注册:调用认证接口提交用户名、邮箱、密码。
|
||||
- 登录:提交邮箱与密码获取访问令牌。
|
||||
- 获取当前用户:携带令牌调用“获取当前用户”接口。
|
||||
- 创建查询任务
|
||||
- 列表:分页获取查询词列表。
|
||||
- 创建:提交关键词、目标品牌、平台集合等参数。
|
||||
- 更新/删除:按需更新或删除指定查询词。
|
||||
- 查看引用数据
|
||||
- 列表:按查询词、平台、日期范围分页查询引用记录。
|
||||
- 统计:获取引用统计信息。
|
||||
- 立即执行:触发一次查询任务。
|
||||
- 导出报告
|
||||
- 导出 CSV:根据查询 ID 导出对应报告。
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:13-42](file://backend/app/api/auth.py#L13-L42)
|
||||
- [backend/app/api/queries.py:15-85](file://backend/app/api/queries.py#L15-L85)
|
||||
- [backend/app/api/citations.py:25-77](file://backend/app/api/citations.py#L25-L77)
|
||||
- [frontend/lib/api.ts:24-56](file://frontend/lib/api.ts#L24-L56)
|
||||
|
||||
### 开发调试方法
|
||||
- 后端
|
||||
- 使用 Uvicorn 启动并开启热重载,便于快速迭代。
|
||||
- 关注健康检查端点与日志输出,定位服务状态。
|
||||
- 前端
|
||||
- 使用 Next.js 开发服务器,自动刷新页面。
|
||||
- 在浏览器开发者工具中观察网络请求与错误信息。
|
||||
- 容器
|
||||
- 通过 docker-compose logs 查看服务日志。
|
||||
- 使用 docker exec 进入容器内部排查依赖与端口占用。
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:51-66](file://docker-compose.yml#L51-L66)
|
||||
- [backend/app/main.py:45-47](file://backend/app/main.py#L45-L47)
|
||||
- [frontend/README.md:5-15](file://frontend/README.md#L5-L15)
|
||||
|
|
@ -0,0 +1,308 @@
|
|||
# 功能扩展
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [backend/app/schemas/citation.py](file://backend/app/schemas/citation.py)
|
||||
- [backend/app/services/citation.py](file://backend/app/services/citation.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/platforms/base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [backend/app/workers/platforms/kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [backend/app/workers/platforms/wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
- [frontend/app/layout.tsx](file://frontend/app/layout.tsx)
|
||||
- [frontend/lib/api.ts](file://frontend/lib/api.ts)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx](file://frontend/app/(dashboard)/dashboard/page.tsx)
|
||||
- [frontend/components/ui/button.tsx](file://frontend/components/ui/button.tsx)
|
||||
- [frontend/components/layout/header.tsx](file://frontend/components/layout/header.tsx)
|
||||
- [frontend/components/charts/trend-chart.tsx](file://frontend/components/charts/trend-chart.tsx)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件面向GEO平台的功能扩展,系统性地说明如何新增API接口(后端路由、数据模型、业务逻辑)、前端页面扩展(Next.js路由、组件与状态管理)、引用检测引擎扩展(新品牌匹配策略、竞争品牌识别算法)、以及UI组件库扩展(新组件开发、样式定制与响应式设计)。文档同时提供流程图、序列图与类图,帮助开发者快速理解并落地实施。
|
||||
|
||||
## 项目结构
|
||||
- 后端采用FastAPI,模块化组织API、模型、服务与工作流;数据库使用SQLAlchemy ORM,迁移通过Alembic管理。
|
||||
- 前端采用Next.js App Router,以功能域分层组织页面、组件与工具函数;UI基于Radix UI与Tailwind CSS。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "后端"
|
||||
A["FastAPI 应用<br/>backend/app/main.py"]
|
||||
B["API 路由<br/>backend/app/api/*"]
|
||||
C["模型与表结构<br/>backend/app/models/*"]
|
||||
D["服务层<br/>backend/app/services/*"]
|
||||
E["工作流与引擎<br/>backend/app/workers/*"]
|
||||
end
|
||||
subgraph "前端"
|
||||
F["应用布局<br/>frontend/app/layout.tsx"]
|
||||
G["API 客户端<br/>frontend/lib/api.ts"]
|
||||
H["页面与组件<br/>frontend/app/(dashboard)/*"]
|
||||
I["UI 组件库<br/>frontend/components/ui/*"]
|
||||
J["图表组件<br/>frontend/components/charts/*"]
|
||||
end
|
||||
A --> B
|
||||
B --> D
|
||||
D --> C
|
||||
D --> E
|
||||
F --> H
|
||||
H --> G
|
||||
H --> I
|
||||
H --> J
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [frontend/app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [frontend/app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
|
||||
## 核心组件
|
||||
- 后端API入口与路由挂载:在应用生命周期内注册认证、查询、引用、报告等路由,并开放健康检查端点。
|
||||
- 引用检测引擎:负责品牌匹配、竞争品牌识别、跨平台适配与任务调度。
|
||||
- 数据模型:查询、引用记录、任务等核心实体及索引。
|
||||
- 服务层:封装数据访问、统计聚合、导出与即时触发逻辑。
|
||||
- 前端API客户端:统一处理鉴权头、错误与响应解析。
|
||||
- UI组件库:基于Radix UI的可变样式组件,支持主题与尺寸变体。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:38-47](file://backend/app/main.py#L38-L47)
|
||||
- [backend/app/workers/citation_engine.py:148-301](file://backend/app/workers/citation_engine.py#L148-L301)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/services/citation.py:24-201](file://backend/app/services/citation.py#L24-L201)
|
||||
- [frontend/lib/api.ts:23-57](file://frontend/lib/api.ts#L23-L57)
|
||||
- [frontend/components/ui/button.tsx:1-57](file://frontend/components/ui/button.tsx#L1-L57)
|
||||
|
||||
## 架构总览
|
||||
下图展示从前端到后端的关键交互路径,包括API调用、服务层处理与引擎执行。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant FE as "前端页面<br/>frontend/app/(dashboard)/dashboard/page.tsx"
|
||||
participant API as "API 客户端<br/>frontend/lib/api.ts"
|
||||
participant APP as "FastAPI 应用<br/>backend/app/main.py"
|
||||
participant ROUTER as "引用路由<br/>backend/app/api/citations.py"
|
||||
participant SVC as "服务层<br/>backend/app/services/citation.py"
|
||||
participant ENG as "引擎<br/>backend/app/workers/citation_engine.py"
|
||||
FE->>API : "调用统计数据接口"
|
||||
API->>APP : "GET /api/v1/citations/stats"
|
||||
APP->>ROUTER : "转发到引用统计路由"
|
||||
ROUTER->>SVC : "get_citation_stats(user_id, query_id?)"
|
||||
SVC->>ENG : "聚合统计含趋势、平台分布"
|
||||
ENG-->>SVC : "返回统计结果"
|
||||
SVC-->>ROUTER : "返回统计结果"
|
||||
ROUTER-->>API : "JSON 响应"
|
||||
API-->>FE : "渲染图表与卡片"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:30-44](file://frontend/app/(dashboard)/dashboard/page.tsx#L30-L44)
|
||||
- [frontend/lib/api.ts:46-50](file://frontend/lib/api.ts#L46-L50)
|
||||
- [backend/app/main.py:38-42](file://backend/app/main.py#L38-L42)
|
||||
- [backend/app/api/citations.py:49-56](file://backend/app/api/citations.py#L49-L56)
|
||||
- [backend/app/services/citation.py:76-201](file://backend/app/services/citation.py#L76-L201)
|
||||
- [backend/app/workers/citation_engine.py:148-301](file://backend/app/workers/citation_engine.py#L148-L301)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 新增API接口:后端路由、模型与业务逻辑
|
||||
- 新增后端路由
|
||||
- 在API包中创建新的路由模块,定义路径、参数与响应模型。
|
||||
- 在应用入口注册路由并设置前缀与标签。
|
||||
- 示例参考:引用路由与统计接口的组织方式。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/citations.py:21-78](file://backend/app/api/citations.py#L21-L78)
|
||||
- [backend/app/main.py:38-42](file://backend/app/main.py#L38-L42)
|
||||
|
||||
- 数据模型扩展
|
||||
- 在models目录新增或修改ORM模型,定义字段、索引与外键关系。
|
||||
- 使用PostgreSQL JSONB类型存储动态配置(如品牌别名、平台列表)。
|
||||
- 示例参考:查询与引用记录模型的字段与索引设计。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
||||
- 业务逻辑实现
|
||||
- 在services目录编写数据访问与聚合逻辑,确保权限校验与边界条件处理。
|
||||
- 对于复杂统计,使用SQL聚合函数与分组查询,保证性能与准确性。
|
||||
- 示例参考:引用统计与CSV导出的实现。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/services/citation.py:76-201](file://backend/app/services/citation.py#L76-L201)
|
||||
- [backend/app/services/citation.py:237-269](file://backend/app/services/citation.py#L237-L269)
|
||||
|
||||
- Pydantic模型与响应格式
|
||||
- 在schemas目录定义请求/响应模型,确保类型安全与序列化一致性。
|
||||
- 示例参考:引用列表、统计与运行任务响应模型。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/schemas/citation.py:7-50](file://backend/app/schemas/citation.py#L7-L50)
|
||||
|
||||
- 引擎扩展:品牌匹配与竞争品牌识别
|
||||
- 品牌匹配策略:精确匹配、别名匹配、模糊匹配(编辑距离),并返回置信度与上下文片段。
|
||||
- 竞争品牌识别:预定义行业品牌库,检测除目标品牌外的其他品牌。
|
||||
- 引擎执行流程:按平台顺序执行查询、匹配与记录生成,并更新查询任务状态与下次查询时间。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/citation_engine.py:19-120](file://backend/app/workers/citation_engine.py#L19-L120)
|
||||
- [backend/app/workers/citation_engine.py:122-146](file://backend/app/workers/citation_engine.py#L122-L146)
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
|
||||
- 平台适配器扩展
|
||||
- 继承基类实现query方法,处理页面交互、稳定性检测与异常重试。
|
||||
- 支持多平台并行扩展,统一由引擎调度。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:33-125](file://backend/app/workers/platforms/kimi.py#L33-L125)
|
||||
- [backend/app/workers/platforms/wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
|
||||
### 前端页面扩展:Next.js路由、组件与状态管理
|
||||
- Next.js页面路由添加
|
||||
- 在App Router中新增页面目录与page组件,使用useSession获取会话信息。
|
||||
- 在根布局中注入Provider,确保全局状态可用。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/layout.tsx:22-36](file://frontend/app/layout.tsx#L22-L36)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:20-44](file://frontend/app/(dashboard)/dashboard/page.tsx#L20-L44)
|
||||
|
||||
- 组件开发与状态管理集成
|
||||
- 使用React Hook进行数据加载与错误处理,结合UI组件库构建卡片与图表。
|
||||
- 在图表组件中使用Recharts实现响应式可视化。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/components/layout/header.tsx:7-29](file://frontend/components/layout/header.tsx#L7-L29)
|
||||
- [frontend/components/charts/trend-chart.tsx:22-59](file://frontend/components/charts/trend-chart.tsx#L22-L59)
|
||||
|
||||
- API客户端集成
|
||||
- 统一封装fetchWithAuth,自动附加Authorization头并处理错误。
|
||||
- 在页面中调用api.citations.getStats加载统计数据。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/lib/api.ts:3-21](file://frontend/lib/api.ts#L3-L21)
|
||||
- [frontend/lib/api.ts:46-50](file://frontend/lib/api.ts#L46-L50)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:30-44](file://frontend/app/(dashboard)/dashboard/page.tsx#L30-L44)
|
||||
|
||||
### UI组件库扩展指南
|
||||
- 新组件开发
|
||||
- 基于Radix UI与cva变体系统,定义默认样式与尺寸变体。
|
||||
- 使用Slot支持透传子节点,保持语义化与可组合性。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/components/ui/button.tsx:7-54](file://frontend/components/ui/button.tsx#L7-L54)
|
||||
|
||||
- 样式定制与响应式设计
|
||||
- 使用Tailwind CSS变量与暗色主题适配,确保组件在深色模式下的可读性。
|
||||
- 图表组件使用ResponsiveContainer实现自适应宽高。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/components/charts/trend-chart.tsx:24-58](file://frontend/components/charts/trend-chart.tsx#L24-L58)
|
||||
|
||||
## 依赖分析
|
||||
- 后端模块耦合
|
||||
- API层仅依赖服务层;服务层依赖模型与引擎;引擎依赖平台适配器。
|
||||
- 数据库索引覆盖常用查询条件,提升统计与分页性能。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
API["API 层<br/>backend/app/api/*"] --> SVC["服务层<br/>backend/app/services/*"]
|
||||
SVC --> MODELS["模型层<br/>backend/app/models/*"]
|
||||
SVC --> ENGINE["引擎层<br/>backend/app/workers/citation_engine.py"]
|
||||
ENGINE --> ADAPTERS["平台适配器<br/>backend/app/workers/platforms/*"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/citations.py:15-19](file://backend/app/api/citations.py#L15-L19)
|
||||
- [backend/app/services/citation.py:9-12](file://backend/app/services/citation.py#L9-L12)
|
||||
- [backend/app/workers/citation_engine.py:148-157](file://backend/app/workers/citation_engine.py#L148-L157)
|
||||
- [backend/app/workers/platforms/base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/models/query.py:50-54](file://backend/app/models/query.py#L50-L54)
|
||||
- [backend/app/models/citation_record.py:37-41](file://backend/app/models/citation_record.py#L37-L41)
|
||||
|
||||
## 性能考虑
|
||||
- 数据库查询优化
|
||||
- 为高频过滤字段建立索引(如用户ID、状态、下次查询时间、查询ID、查询时间等)。
|
||||
- 使用SQL聚合与分组减少Python侧计算开销。
|
||||
- 引擎执行策略
|
||||
- 平台查询采用指数退避与稳定性检测,避免频繁失败与重复请求。
|
||||
- 将统计聚合与导出逻辑异步化,避免阻塞主流程。
|
||||
- 前端渲染优化
|
||||
- 图表组件使用ResponsiveContainer与轻量级数据结构,降低重排成本。
|
||||
- 页面按需加载与错误兜底,提升用户体验。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/models/query.py:50-54](file://backend/app/models/query.py#L50-L54)
|
||||
- [backend/app/models/citation_record.py:37-41](file://backend/app/models/citation_record.py#L37-L41)
|
||||
- [backend/app/workers/platforms/kimi.py:126-197](file://backend/app/workers/platforms/kimi.py#L126-L197)
|
||||
- [frontend/components/charts/trend-chart.tsx:24-58](file://frontend/components/charts/trend-chart.tsx#L24-L58)
|
||||
|
||||
## 故障排查指南
|
||||
- 引擎执行失败
|
||||
- 平台适配器抛出异常时,记录错误信息并创建一条cited=False的占位记录,便于追踪。
|
||||
- 关闭适配器资源,避免句柄泄漏。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/citation_engine.py:211-228](file://backend/app/workers/citation_engine.py#L211-L228)
|
||||
- [backend/app/workers/citation_engine.py:302-309](file://backend/app/workers/citation_engine.py#L302-L309)
|
||||
|
||||
- API调用失败
|
||||
- 前端API客户端统一处理HTTP错误与JSON解析,抛出可读错误信息。
|
||||
- 页面中捕获错误并提供重试入口。
|
||||
|
||||
**章节来源**
|
||||
- [frontend/lib/api.ts:16-21](file://frontend/lib/api.ts#L16-L21)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:54-66](file://frontend/app/(dashboard)/dashboard/page.tsx#L54-L66)
|
||||
|
||||
- 权限与数据隔离
|
||||
- 服务层在查询与导出前验证查询归属,防止越权访问。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/services/citation.py:14-22](file://backend/app/services/citation.py#L14-L22)
|
||||
- [backend/app/services/citation.py:242-244](file://backend/app/services/citation.py#L242-L244)
|
||||
|
||||
## 结论
|
||||
通过模块化的后端架构与清晰的前后端职责划分,GEO平台具备良好的扩展性。新增API接口与页面只需遵循现有路由、模型与服务层规范;引擎与适配器的抽象设计使得品牌匹配策略与平台能力可插拔扩展;UI组件库提供了统一的样式与交互体验。建议在扩展过程中严格遵循数据权限、错误处理与性能优化的最佳实践。
|
||||
|
||||
## 附录
|
||||
- 新增品牌匹配策略的实现要点
|
||||
- 在BrandMatcher中扩展候选词提取与相似度阈值,支持领域特定规则(如正则匹配、停用词过滤)。
|
||||
- 为不同行业维护品牌别名库,提高别名匹配准确率。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/citation_engine.py:19-120](file://backend/app/workers/citation_engine.py#L19-L120)
|
||||
|
||||
- 竞争品牌识别算法的实现要点
|
||||
- 基于预定义品牌分类(保险、金融、科技等)进行集合比对,排除目标品牌。
|
||||
- 可引入NLP分词与实体识别进一步提升召回质量。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/citation_engine.py:122-146](file://backend/app/workers/citation_engine.py#L122-L146)
|
||||
|
||||
- 平台适配器扩展流程
|
||||
- 继承BasePlatformAdapter,实现query方法与可选的close方法。
|
||||
- 在引擎中注册平台映射,确保execute_query能够调度新平台。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/base.py:4-18](file://backend/app/workers/platforms/base.py#L4-L18)
|
||||
- [backend/app/workers/citation_engine.py:152-157](file://backend/app/workers/citation_engine.py#L152-L157)
|
||||
|
|
@ -0,0 +1,481 @@
|
|||
# 扩展与定制
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/api/reports.py](file://backend/app/api/reports.py)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/schemas/query.py](file://backend/app/schemas/query.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/workers/platforms/base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [backend/app/workers/platforms/kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [backend/app/workers/platforms/wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/services/auth.py](file://backend/app/services/auth.py)
|
||||
- [backend/app/services/query.py](file://backend/app/services/query.py)
|
||||
- [backend/app/services/citation.py](file://backend/app/services/citation.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/api/deps.py](file://backend/app/api/deps.py)
|
||||
- [backend/app/models/user.py](file://backend/app/models/user.py)
|
||||
- [backend/app/schemas/auth.py](file://backend/app/schemas/auth.py)
|
||||
- [backend/app/schemas/citation.py](file://backend/app/schemas/citation.py)
|
||||
- [backend/app/schemas/query.py](file://backend/app/schemas/query.py)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
- [frontend/app/layout.tsx](file://frontend/app/layout.tsx)
|
||||
- [frontend/components/providers.tsx](file://frontend/components/providers.tsx)
|
||||
- [frontend/lib/api.ts](file://frontend/lib/api.ts)
|
||||
- [frontend/package.json](file://frontend/package.json)
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [backend/Dockerfile](file://backend/Dockerfile)
|
||||
- [frontend/Dockerfile](file://frontend/Dockerfile)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件面向需要对 GEO 平台进行扩展与定制的工程师与产品团队,系统性阐述后端 API 扩展、前端页面扩展、数据模型扩展、配置定制、第三方集成(AI 平台、数据库、认证)以及插件化扩展的最佳实践。文档同时提供可落地的实施建议与案例研究,帮助快速实现业务定制化目标。
|
||||
|
||||
## 项目结构
|
||||
GEO 采用前后端分离架构:
|
||||
- 后端基于 FastAPI,提供 REST API;通过 Alembic 管理数据库迁移;使用 SQLAlchemy ORM 定义模型;APScheduler 实现定时任务;Playwright 支持 AI 平台网页抓取。
|
||||
- 前端基于 Next.js 14,使用 TypeScript、TailwindCSS、Radix UI 组件库;通过自定义 API 封装层与后端交互;NextAuth v4 提供会话管理。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "前端"
|
||||
FE_APP["Next.js 应用<br/>路由与页面"]
|
||||
FE_LIB["API 封装<br/>lib/api.ts"]
|
||||
FE_PROV["会话提供者<br/>components/providers.tsx"]
|
||||
end
|
||||
subgraph "后端"
|
||||
BE_MAIN["FastAPI 应用<br/>app/main.py"]
|
||||
BE_ROUTER_AUTH["认证路由<br/>app/api/auth.py"]
|
||||
BE_ROUTER_QUERIES["查询路由<br/>app/api/queries.py"]
|
||||
BE_ROUTER_CITATIONS["引用路由<br/>app/api/citations.py"]
|
||||
BE_ROUTER_REPORTS["报告路由<br/>app/api/reports.py"]
|
||||
BE_SCHED["调度器<br/>app/workers/scheduler.py"]
|
||||
BE_PLAT_BASE["平台适配器基类<br/>app/workers/platforms/base.py"]
|
||||
BE_PLAT_KIMI["Kimi 适配器<br/>app/workers/platforms/kimi.py"]
|
||||
BE_PLAT_WENXIN["文心一言适配器<br/>app/workers/platforms/wenxin.py"]
|
||||
BE_DB["数据库与模型<br/>app/database.py + models/*"]
|
||||
end
|
||||
FE_APP --> FE_LIB
|
||||
FE_LIB --> BE_MAIN
|
||||
BE_MAIN --> BE_ROUTER_AUTH
|
||||
BE_MAIN --> BE_ROUTER_QUERIES
|
||||
BE_MAIN --> BE_ROUTER_CITATIONS
|
||||
BE_MAIN --> BE_ROUTER_REPORTS
|
||||
BE_MAIN --> BE_SCHED
|
||||
BE_SCHED --> BE_PLAT_BASE
|
||||
BE_PLAT_BASE --> BE_PLAT_KIMI
|
||||
BE_PLAT_BASE --> BE_PLAT_WENXIN
|
||||
BE_MAIN --> BE_DB
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [frontend/app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [frontend/components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
|
||||
## 核心组件
|
||||
- API 层:认证、查询词、引用数据、报告导出等模块化路由,统一挂载于主应用。
|
||||
- 服务层:封装业务逻辑,如用户认证、查询 CRUD、引用处理等。
|
||||
- 数据层:SQLAlchemy 模型与 Pydantic Schema,定义实体与请求/响应结构。
|
||||
- 工作器与调度:APScheduler 驱动定时任务,CitationEngine 协调平台适配器执行查询。
|
||||
- 前端:Next.js 页面与组件,通过 lib/api.ts 统一访问后端接口;NextAuth 提供会话状态。
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [backend/app/api/reports.py](file://backend/app/api/reports.py)
|
||||
- [backend/app/services/auth.py](file://backend/app/services/auth.py)
|
||||
- [backend/app/services/query.py](file://backend/app/services/query.py)
|
||||
- [backend/app/services/citation.py](file://backend/app/services/citation.py)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/schemas/query.py:1-94](file://backend/app/schemas/query.py#L1-L94)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
## 架构总览
|
||||
下图展示从浏览器到后端 API、数据库与外部 AI 平台的完整链路:
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Browser as "浏览器"
|
||||
participant Frontend as "前端 Next.js"
|
||||
participant API as "后端 FastAPI"
|
||||
participant Svc as "服务层"
|
||||
participant DB as "数据库"
|
||||
participant Scheduler as "调度器"
|
||||
participant Engine as "CitationEngine"
|
||||
participant Plat as "平台适配器"
|
||||
Browser->>Frontend : 用户操作
|
||||
Frontend->>API : 发起 HTTP 请求
|
||||
API->>Svc : 路由分发与校验
|
||||
Svc->>DB : 读写数据
|
||||
DB-->>Svc : 返回结果
|
||||
Svc-->>API : 业务结果
|
||||
API-->>Frontend : JSON 响应
|
||||
Note over Scheduler,Engine : 定时触发查询执行
|
||||
Scheduler->>Engine : 触发执行
|
||||
Engine->>Plat : 调用具体平台适配器
|
||||
Plat-->>Engine : 返回原始响应
|
||||
Engine->>DB : 写入引用记录
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 新增 API 接口扩展指南
|
||||
- 路由扩展步骤
|
||||
- 在后端 app/api 下新增模块(如 app/api/new_feature.py),定义 APIRouter 并编写路由函数。
|
||||
- 在 app/main.py 中引入并挂载路由,指定前缀与标签。
|
||||
- 在 app/services 下新增对应服务函数,封装业务逻辑。
|
||||
- 在 app/schemas 下新增请求/响应模型,确保字段校验与默认值。
|
||||
- 在 app/models 下新增模型(如需持久化)。
|
||||
- 在 app/database.py 中注册模型,并在 alembic 迁移中生成/更新表结构。
|
||||
- 示例参考路径
|
||||
- [认证路由示例:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [查询路由示例:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [主应用挂载示例:38-42](file://backend/app/main.py#L38-L42)
|
||||
- [服务层示例](file://backend/app/services/query.py)
|
||||
- [Schema 示例:1-94](file://backend/app/schemas/query.py#L1-L94)
|
||||
- [模型示例:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["开始"]) --> CreateRouter["创建路由模块<br/>app/api/new_feature.py"]
|
||||
CreateRouter --> MountRouter["在 app/main.py 挂载路由"]
|
||||
MountRouter --> AddService["在 app/services 添加服务函数"]
|
||||
AddService --> AddSchema["在 app/schemas 添加模型"]
|
||||
AddSchema --> AddModel["在 app/models 添加模型如需"]
|
||||
AddModel --> Alembic["生成/更新数据库迁移"]
|
||||
Alembic --> Test["编写单元测试"]
|
||||
Test --> End(["完成"])
|
||||
```
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:38-42](file://backend/app/main.py#L38-L42)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/schemas/query.py:1-94](file://backend/app/schemas/query.py#L1-L94)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
|
||||
### 前端页面扩展指南
|
||||
- 页面与布局
|
||||
- 在 frontend/app/(dashboard)/(或新的组) 下新增页面目录与 page.tsx。
|
||||
- 在 frontend/app/layout.tsx 中组织根布局与 Providers。
|
||||
- 在 components/ui 下新增或复用 UI 组件,保持一致的设计语言。
|
||||
- API 调用
|
||||
- 在 frontend/lib/api.ts 中新增方法,遵循现有命名与错误处理模式。
|
||||
- 在页面中通过 hooks 或直接调用 api.* 方法获取数据。
|
||||
- 会话与权限
|
||||
- 使用 frontend/components/providers.tsx 包裹应用,确保 NextAuth 会话可用。
|
||||
- 在页面中根据用户状态控制渲染与交互。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
NewPage["新增页面<br/>frontend/app/(group)/new/page.tsx"] --> Layout["布局与 Providers<br/>frontend/app/layout.tsx"]
|
||||
Layout --> UI["UI 组件<br/>frontend/components/ui/*"]
|
||||
UI --> API["API 封装<br/>frontend/lib/api.ts"]
|
||||
API --> Backend["后端 API<br/>backend/app/api/*"]
|
||||
```
|
||||
|
||||
章节来源
|
||||
- [frontend/app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [frontend/components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
### 数据模型扩展指南
|
||||
- 字段与约束
|
||||
- 在 app/models 下新增或修改模型,使用 SQLAlchemy 类型与索引策略。
|
||||
- 在 app/schemas 下同步新增/变更 Pydantic 模型,确保序列化与校验。
|
||||
- 在 app/api 下更新路由的响应模型。
|
||||
- 迁移与版本控制
|
||||
- 使用 Alembic 生成迁移脚本,维护数据库演进历史。
|
||||
- 在生产环境执行迁移,避免破坏性变更。
|
||||
- 性能与一致性
|
||||
- 对高频查询字段建立索引;合理拆分表与外键关系。
|
||||
- 通过服务层统一数据访问,避免绕过校验。
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
QUERIES {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
timestamp last_queried_at
|
||||
timestamp next_query_at
|
||||
timestamp created_at
|
||||
timestamp updated_at
|
||||
}
|
||||
USERS {
|
||||
uuid id PK
|
||||
string email UK
|
||||
string name
|
||||
string hashed_password
|
||||
boolean is_active
|
||||
timestamp created_at
|
||||
timestamp updated_at
|
||||
}
|
||||
CITATION_RECORDS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
text raw_response
|
||||
timestamp created_at
|
||||
}
|
||||
QUERIES ||--o{ CITATION_RECORDS : "包含"
|
||||
USERS ||--o{ QUERIES : "拥有"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py)
|
||||
|
||||
章节来源
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/schemas/query.py:1-94](file://backend/app/schemas/query.py#L1-L94)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py)
|
||||
|
||||
### 配置定制选项
|
||||
- 环境变量
|
||||
- 通过 app/config.py 的 Settings 类集中管理,支持 .env 文件覆盖。
|
||||
- 关键配置项包括数据库连接、Redis、JWT 密钥与过期时间、Playwright 浏览器路径、第三方平台密钥等。
|
||||
- 功能开关与性能参数
|
||||
- 平台列表、频率策略、状态枚举在 Schema 中集中校验,便于扩展与限制。
|
||||
- 调度周期(每小时)可在 app/workers/scheduler.py 中调整。
|
||||
- 前端 NEXT_PUBLIC_API_URL 控制后端域名,lib/api.ts 中统一拼接。
|
||||
- 建议
|
||||
- 生产环境务必替换默认密钥与数据库密码。
|
||||
- 将敏感信息放入 .env 并加入 .gitignore。
|
||||
|
||||
章节来源
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/schemas/query.py:6-8](file://backend/app/schemas/query.py#L6-L8)
|
||||
- [backend/app/workers/scheduler.py:32-38](file://backend/app/workers/scheduler.py#L32-L38)
|
||||
- [frontend/lib/api.ts:1](file://frontend/lib/api.ts#L1)
|
||||
|
||||
### 第三方集成扩展指南
|
||||
|
||||
#### 新 AI 平台接入
|
||||
- 适配器开发
|
||||
- 继承 app/workers/platforms/base.py,实现 query 与可选的 close。
|
||||
- 在 app/workers/platforms 下新增适配器文件,按现有 Kimi/Wenxin 模式实现页面交互与稳定性检测。
|
||||
- 注册与调度
|
||||
- 在 app/schemas/query.py 的 VALID_PLATFORMS 中添加新平台枚举值。
|
||||
- 在 CitationEngine 中注册新适配器映射,或通过工厂动态加载。
|
||||
- 错误与重试
|
||||
- 参考现有指数退避与超时处理策略,保证鲁棒性。
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class BasePlatformAdapter {
|
||||
+string platform_name
|
||||
+string platform_url
|
||||
+query(keyword) str
|
||||
+close()
|
||||
}
|
||||
class KimiAdapter {
|
||||
+query(keyword) str
|
||||
+close()
|
||||
}
|
||||
class WenxinAdapter {
|
||||
+query(keyword) str
|
||||
+close()
|
||||
}
|
||||
BasePlatformAdapter <|-- KimiAdapter
|
||||
BasePlatformAdapter <|-- WenxinAdapter
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
- [backend/app/schemas/query.py:6](file://backend/app/schemas/query.py#L6)
|
||||
|
||||
#### 新数据库支持
|
||||
- 当前使用 PostgreSQL + asyncpg,若需更换:
|
||||
- 在 app/database.py 中切换引擎与方言。
|
||||
- 更新 app/config.py 中 DATABASE_URL。
|
||||
- 在 requirements.txt 中替换驱动包。
|
||||
- 重新生成/更新 Alembic 迁移以适配新方言。
|
||||
|
||||
章节来源
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/config.py:7](file://backend/app/config.py#L7)
|
||||
- [backend/requirements.txt:5-8](file://backend/requirements.txt#L5-L8)
|
||||
|
||||
#### 新认证方式集成
|
||||
- 当前使用 JWT 令牌与 NextAuth 会话:
|
||||
- 后端:app/services/auth.py 生成与校验 JWT。
|
||||
- 前端:components/providers.tsx 提供 SessionProvider。
|
||||
- 若需 OAuth/SSO:在 NextAuth 配置中新增提供方;后端保持 JWT 令牌发放与校验逻辑不变。
|
||||
- 注意事项
|
||||
- 保持 Authorization 头格式与后端解析一致。
|
||||
- 在 app/api/deps.py 中的依赖注入中校验用户身份。
|
||||
|
||||
章节来源
|
||||
- [backend/app/services/auth.py](file://backend/app/services/auth.py)
|
||||
- [frontend/components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
- [frontend/lib/api.ts:3-21](file://frontend/lib/api.ts#L3-L21)
|
||||
- [backend/app/api/deps.py](file://backend/app/api/deps.py)
|
||||
|
||||
### 插件系统使用指南与最佳实践
|
||||
- 插件化思路
|
||||
- 平台适配器采用“插件”式扩展:通过继承基类与工厂/映射注册,实现多平台并行。
|
||||
- 调度器与 CitationEngine 作为“核心引擎”,通过适配器接口解耦平台差异。
|
||||
- 最佳实践
|
||||
- 明确职责边界:路由负责协议与鉴权,服务层负责业务规则,模型负责数据结构。
|
||||
- 统一错误处理:前端统一捕获 HTTP 错误并提示;后端抛出明确异常码与消息。
|
||||
- 可观测性:为关键流程增加日志与指标,便于定位问题。
|
||||
- 安全:严格校验输入、最小权限原则、HTTPS 传输、密钥轮换。
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [frontend/lib/api.ts:16-21](file://frontend/lib/api.ts#L16-L21)
|
||||
|
||||
## 依赖分析
|
||||
- 后端依赖
|
||||
- Web 框架与 ASGI 服务器:FastAPI + Uvicorn
|
||||
- 数据库与迁移:SQLAlchemy + Alembic + asyncpg
|
||||
- 配置与校验:Pydantic + pydantic-settings + python-dotenv
|
||||
- 认证与安全:python-jose + passlib + multipart
|
||||
- 任务调度:APScheduler
|
||||
- 浏览器自动化:Playwright
|
||||
- HTTP 客户端:httpx
|
||||
- 前端依赖
|
||||
- 框架与 UI:Next.js + Radix UI + TailwindCSS
|
||||
- 认证:NextAuth v4
|
||||
- 图表:Recharts
|
||||
- 开发工具:TypeScript + ESLint + TailwindCSS
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
subgraph "后端依赖"
|
||||
F["FastAPI"]
|
||||
S["SQLAlchemy/Alembic"]
|
||||
P["Pydantic/pydantic-settings"]
|
||||
J["python-jose/passlib"]
|
||||
R["Redis/APScheduler"]
|
||||
PW["Playwright"]
|
||||
H["httpx/python-dotenv"]
|
||||
end
|
||||
subgraph "前端依赖"
|
||||
N["Next.js"]
|
||||
NA["NextAuth"]
|
||||
UI["Radix UI/TailwindCSS"]
|
||||
RC["Recharts"]
|
||||
end
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:11-27](file://frontend/package.json#L11-L27)
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:11-27](file://frontend/package.json#L11-L27)
|
||||
|
||||
## 性能考虑
|
||||
- 数据库
|
||||
- 为高频查询字段建立索引;避免 N+1 查询;使用分页参数限制单页规模。
|
||||
- API
|
||||
- 合理设置分页参数(skip/limit),避免一次性返回大量数据。
|
||||
- 对热点接口启用缓存(如 Redis)减少重复计算。
|
||||
- 定时任务
|
||||
- 调度周期可根据业务需求调整;在高负载时降低频率或增加并发控制。
|
||||
- 浏览器自动化
|
||||
- Playwright 启动成本较高,尽量复用上下文;失败重试与超时控制要合理设置。
|
||||
- 前端
|
||||
- 按需加载页面与组件;减少不必要的 re-render;利用浏览器缓存与静态资源优化。
|
||||
|
||||
## 故障排查指南
|
||||
- 常见问题定位
|
||||
- 后端健康检查:访问 /health 确认服务可用。
|
||||
- CORS:确认 app/main.py 中允许的源与方法。
|
||||
- 数据库连接:检查 DATABASE_URL 与网络连通性。
|
||||
- Playwright:确保已安装浏览器二进制;查看适配器初始化日志。
|
||||
- 日志与监控
|
||||
- 调度器与平台适配器均输出详细日志,定位失败原因。
|
||||
- 前端统一错误处理:lib/api.ts 在请求失败时抛出错误,便于 UI 提示。
|
||||
- 快速恢复
|
||||
- 重启后端服务与前端构建;检查 .env 配置是否正确;核对迁移是否执行。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:45-47](file://backend/app/main.py#L45-L47)
|
||||
- [backend/app/main.py:30-36](file://backend/app/main.py#L30-L36)
|
||||
- [backend/app/config.py:7](file://backend/app/config.py#L7)
|
||||
- [backend/app/workers/platforms/kimi.py:23-32](file://backend/app/workers/platforms/kimi.py#L23-L32)
|
||||
- [frontend/lib/api.ts:16-21](file://frontend/lib/api.ts#L16-L21)
|
||||
|
||||
## 结论
|
||||
GEO 平台提供了清晰的分层架构与可扩展点:路由层、服务层、数据层与工作器层相互解耦,配合配置中心与前端统一 API 封装,能够高效支撑业务扩展。通过平台适配器插件化、Schema/模型标准化、调度器与任务队列机制,团队可以快速接入新 AI 平台、扩展前端页面与数据模型,并在生产环境中保持稳定与可观测。
|
||||
|
||||
## 附录
|
||||
|
||||
### 系统定制化案例研究与实施建议
|
||||
- 案例一:接入新 AI 平台
|
||||
- 步骤:新增适配器类 → 在 Schema 中注册平台枚举 → 在 CitationEngine 中注册映射 → 编写测试 → 部署与灰度。
|
||||
- 建议:先在本地模拟页面交互,再逐步对接真实站点;为不同页面结构准备多套选择器策略。
|
||||
- 案例二:新增查询词字段
|
||||
- 步骤:在模型与 Schema 中新增字段 → 生成迁移 → 更新路由与服务层逻辑 → 前端页面与表单适配。
|
||||
- 建议:使用默认值与非空约束,确保向后兼容。
|
||||
- 案例三:前端新增报表页面
|
||||
- 步骤:新增页面与路由 → 引入图表组件 → 调用后端报表接口 → 权限控制与数据可视化。
|
||||
- 建议:复用现有 UI 组件库,保持设计一致性。
|
||||
|
||||
### 部署与运行要点
|
||||
- 使用 Docker Compose 启动后端与前端服务,确保端口映射与网络互通。
|
||||
- 后端 Dockerfile 与 requirements.txt 已配置,注意镜像构建缓存与依赖锁定。
|
||||
- 前端 Dockerfile 与 Next.js 版本已固定,构建产物由 Next.js 管理。
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [backend/Dockerfile](file://backend/Dockerfile)
|
||||
- [frontend/Dockerfile](file://frontend/Dockerfile)
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:11-27](file://frontend/package.json#L11-L27)
|
||||
|
|
@ -0,0 +1,510 @@
|
|||
# 第三方集成
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/workers/platforms/base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [backend/app/workers/platforms/kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [backend/app/workers/platforms/wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/services/auth.py](file://backend/app/services/auth.py)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/models/user.py](file://backend/app/models/user.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/models/query_task.py](file://backend/app/models/query_task.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py)
|
||||
- [backend/app/schemas/auth.py](file://backend/app/schemas/auth.py)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本指南面向需要在 GEO 平台上集成新的第三方能力的开发者,覆盖以下主题:
|
||||
- 新 AI 平台接入:适配器接口实现、平台认证配置与查询逻辑适配
|
||||
- 新数据库支持:SQLAlchemy 模型扩展、迁移脚本编写与连接配置
|
||||
- 新认证方式集成:OAuth 提供商配置、JWT 令牌处理与权限系统扩展
|
||||
- 插件系统使用:插件注册机制、生命周期管理与错误处理
|
||||
- 实战示例与常见问题解决方案
|
||||
|
||||
## 项目结构
|
||||
后端采用 FastAPI + SQLAlchemy Async + APScheduler 的架构,核心模块如下:
|
||||
- workers:爬虫式 AI 平台适配器与引用检测引擎、定时调度器
|
||||
- api:FastAPI 路由层,负责请求入口与权限校验
|
||||
- services:业务服务层,封装认证、查询等核心逻辑
|
||||
- models/schemas:数据模型与请求/响应模型
|
||||
- database/config:数据库连接与配置
|
||||
- alembic:数据库迁移工具
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "API 层"
|
||||
RAuth["路由: 认证 /api/auth/*"]
|
||||
RQueries["路由: 查询 /api/queries/*"]
|
||||
end
|
||||
subgraph "服务层"
|
||||
SAuth["服务: 认证"]
|
||||
SQuery["服务: 查询"]
|
||||
end
|
||||
subgraph "工作器"
|
||||
CE["引擎: 引用检测"]
|
||||
SCH["调度器: 定时任务"]
|
||||
ADP["适配器: AI 平台"]
|
||||
end
|
||||
subgraph "数据层"
|
||||
DB["数据库: Async Engine"]
|
||||
MUser["模型: User"]
|
||||
MQuery["模型: Query"]
|
||||
MTask["模型: QueryTask"]
|
||||
end
|
||||
RAuth --> SAuth
|
||||
RQueries --> SQuery
|
||||
SQuery --> CE
|
||||
SCH --> CE
|
||||
CE --> ADP
|
||||
SAuth --> DB
|
||||
SQuery --> DB
|
||||
CE --> DB
|
||||
DB --> MUser
|
||||
DB --> MQuery
|
||||
DB --> MTask
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
|
||||
## 核心组件
|
||||
- 适配器接口与具体实现:统一的 AI 平台查询接口,内置 Kimi 与文心一言适配器
|
||||
- 引用检测引擎:对平台返回内容进行品牌引用检测与竞争品牌识别
|
||||
- 定时调度器:基于 APScheduler 的周期性查询任务调度
|
||||
- 认证与权限:基于 JWT 的用户认证与权限控制
|
||||
- 数据模型与迁移:基于 SQLAlchemy Async 的模型与 Alembic 迁移
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:1-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L1-L128)
|
||||
|
||||
## 架构总览
|
||||
下图展示从 API 请求到定时任务执行、再到 AI 平台查询与结果入库的整体流程。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant API as "FastAPI 路由"
|
||||
participant Service as "业务服务"
|
||||
participant Scheduler as "定时调度器"
|
||||
participant Engine as "引用检测引擎"
|
||||
participant Adapter as "AI 平台适配器"
|
||||
participant DB as "数据库"
|
||||
Client->>API : "登录/注册/查询请求"
|
||||
API->>Service : "认证/授权/业务处理"
|
||||
Service->>DB : "读写模型数据"
|
||||
Note over Scheduler : "定时触发"
|
||||
Scheduler->>Engine : "执行到期查询"
|
||||
Engine->>Adapter : "调用平台查询"
|
||||
Adapter-->>Engine : "返回原始响应"
|
||||
Engine->>DB : "写入引用记录/任务状态"
|
||||
Engine-->>Scheduler : "更新查询时间"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 组件A:AI 平台适配器体系
|
||||
- 接口设计:抽象基类定义统一的平台名称、URL 与查询接口
|
||||
- 具体实现:Kimi 与文心一言适配器均继承基类,实现稳定的页面交互与响应等待逻辑
|
||||
- 错误处理:重试机制、指数退避、超时处理与资源清理
|
||||
- 扩展建议:新增平台时,遵循相同接口与错误处理模式
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class BasePlatformAdapter {
|
||||
+string platform_name
|
||||
+string platform_url
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class KimiAdapter {
|
||||
+platform_name = "kimi"
|
||||
+platform_url = "https : //kimi.moonshot.cn"
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
class WenxinAdapter {
|
||||
+platform_name = "wenxin"
|
||||
+platform_url = "https : //yiyan.baidu.com"
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
BasePlatformAdapter <|-- KimiAdapter
|
||||
BasePlatformAdapter <|-- WenxinAdapter
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
|
||||
### 组件B:引用检测引擎与查询任务
|
||||
- 引擎职责:遍历查询配置的平台,执行查询与品牌匹配,记录结果与任务状态
|
||||
- 品牌匹配:精确/别名/模糊匹配,并提取置信度与上下文片段
|
||||
- 竞争品牌检测:基于预设品牌库识别文本中的竞品
|
||||
- 任务管理:为每个查询与平台维护独立的任务记录,支持失败重试与状态追踪
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["开始执行查询"]) --> Init["初始化 BrandMatcher 与平台列表"]
|
||||
Init --> Loop{"遍历平台"}
|
||||
Loop --> |执行| Single["执行单平台查询"]
|
||||
Single --> Detect["品牌匹配与竞品检测"]
|
||||
Detect --> Record["写入引用记录与任务状态"]
|
||||
Record --> Next{"还有平台?"}
|
||||
Next --> |是| Loop
|
||||
Next --> |否| Update["更新查询下次执行时间"]
|
||||
Update --> End(["结束"])
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
|
||||
### 组件C:定时调度器
|
||||
- 触发策略:每小时检查一次到期查询
|
||||
- 执行流程:定位状态为 active 且 next_query_at 已到达的查询,委派引擎执行
|
||||
- 生命周期:启动/关闭时清理资源,优雅退出
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Timer as "APScheduler"
|
||||
participant Scheduler as "QueryScheduler"
|
||||
participant DB as "数据库"
|
||||
participant Engine as "CitationEngine"
|
||||
Timer->>Scheduler : "定时触发"
|
||||
Scheduler->>DB : "查询到期的 Query"
|
||||
DB-->>Scheduler : "返回待执行查询集合"
|
||||
loop "逐条执行"
|
||||
Scheduler->>Engine : "execute_query(query)"
|
||||
Engine-->>Scheduler : "返回引用记录"
|
||||
end
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
|
||||
### 组件D:认证与权限系统
|
||||
- 用户模型:包含邮箱、密码哈希、计划与配额等字段
|
||||
- 登录/注册:服务层进行密码哈希与校验,生成 JWT
|
||||
- 路由保护:通过依赖注入获取当前用户,实现基于角色的访问控制
|
||||
- 权限扩展:可基于用户计划与配额限制查询频率与数量
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant API as "认证路由"
|
||||
participant Service as "认证服务"
|
||||
participant DB as "数据库"
|
||||
Client->>API : "POST /api/auth/login"
|
||||
API->>Service : "authenticate_user(email, password)"
|
||||
Service->>DB : "查询用户并校验密码"
|
||||
DB-->>Service : "返回用户信息"
|
||||
Service-->>API : "生成 JWT"
|
||||
API-->>Client : "返回 TokenResponse"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/schemas/auth.py:1-34](file://backend/app/schemas/auth.py#L1-L34)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/schemas/auth.py:1-34](file://backend/app/schemas/auth.py#L1-L34)
|
||||
|
||||
### 组件E:数据库模型与迁移
|
||||
- 模型关系:User 与 Query、Subscription;Query 与 CitationRecord、QueryTask
|
||||
- 字段设计:JSONB 存储平台列表与别名,UUID 主键,索引优化查询
|
||||
- 迁移脚本:初始版本包含 users、queries、citation_records、query_tasks、subscriptions 表及索引
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
USERS {
|
||||
uuid id PK
|
||||
string email UK
|
||||
string password_hash
|
||||
string name
|
||||
string plan
|
||||
int max_queries
|
||||
boolean is_active
|
||||
timestamp created_at
|
||||
timestamp updated_at
|
||||
}
|
||||
QUERIES {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
timestamp last_queried_at
|
||||
timestamp next_query_at
|
||||
timestamp created_at
|
||||
timestamp updated_at
|
||||
}
|
||||
QUERY_TASKS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
string status
|
||||
text error_message
|
||||
timestamp scheduled_at
|
||||
timestamp started_at
|
||||
timestamp completed_at
|
||||
}
|
||||
CITATION_RECORDS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
boolean cited
|
||||
int citation_position
|
||||
text citation_text
|
||||
jsonb competitor_brands
|
||||
text raw_response
|
||||
timestamp queried_at
|
||||
}
|
||||
SUBSCRIPTIONS {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string plan
|
||||
string status
|
||||
date start_date
|
||||
date end_date
|
||||
numeric amount
|
||||
string payment_method
|
||||
string payment_id
|
||||
timestamp created_at
|
||||
}
|
||||
USERS ||--o{ QUERIES : "拥有"
|
||||
USERS ||--o{ SUBSCRIPTIONS : "拥有"
|
||||
QUERIES ||--o{ QUERY_TASKS : "包含"
|
||||
QUERIES ||--o{ CITATION_RECORDS : "产生"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:21-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L21-L128)
|
||||
|
||||
章节来源
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:1-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L1-L128)
|
||||
|
||||
## 依赖分析
|
||||
- 组件耦合:引擎依赖适配器接口;调度器依赖引擎;API 依赖服务层;服务层依赖数据库
|
||||
- 外部依赖:FastAPI、SQLAlchemy Async、APScheduler、Pydantic、JWTS、Passlib、Playwright
|
||||
- 配置集中:数据库 URL、Redis URL、JWT 秘钥与过期时间、平台 API Key 等集中于配置类
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
APIAuth["API: 认证"] --> SAuth["服务: 认证"]
|
||||
APIQueries["API: 查询"] --> SQuery["服务: 查询"]
|
||||
SQuery --> CE["引擎: 引用检测"]
|
||||
SCH["调度器: 定时任务"] --> CE
|
||||
CE --> ADP["适配器: AI 平台"]
|
||||
SAuth --> DB["数据库: Async Engine"]
|
||||
SQuery --> DB
|
||||
CE --> DB
|
||||
DB --> CFG["配置: Settings"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
章节来源
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
|
||||
## 性能考虑
|
||||
- 引擎并发:适配器查询采用异步与重试策略,避免阻塞;建议在高并发场景下增加适配器池与限流
|
||||
- 数据库索引:对查询表的关键字段建立索引,减少调度器扫描成本
|
||||
- 缓存策略:可引入 Redis 缓存热点查询结果与任务状态
|
||||
- 超时与退避:适配器已内置超时与指数退避,建议结合平台限速策略调整重试参数
|
||||
|
||||
## 故障排查指南
|
||||
- 适配器启动失败(Playwright 浏览器未安装)
|
||||
- 现象:启动浏览器时报错,提示需安装 Chromium
|
||||
- 处理:按照日志提示安装浏览器镜像
|
||||
- 参考路径:[backend/app/workers/platforms/kimi.py:21-32](file://backend/app/workers/platforms/kimi.py#L21-L32)、[backend/app/workers/platforms/wenxin.py:21-32](file://backend/app/workers/platforms/wenxin.py#L21-L32)
|
||||
- 页面元素找不到导致查询失败
|
||||
- 现象:无法定位输入框或发送按钮
|
||||
- 处理:检查页面选择器策略,必要时更新选择器集合
|
||||
- 参考路径:[backend/app/workers/platforms/kimi.py:67-111](file://backend/app/workers/platforms/kimi.py#L67-L111)、[backend/app/workers/platforms/wenxin.py:67-112](file://backend/app/workers/platforms/wenxin.py#L67-L112)
|
||||
- 引擎执行异常
|
||||
- 现象:平台查询抛出异常,任务状态标记为 failed
|
||||
- 处理:查看任务错误消息,确认网络与平台可用性
|
||||
- 参考路径:[backend/app/workers/citation_engine.py:211-228](file://backend/app/workers/citation_engine.py#L211-L228)
|
||||
- 认证失败
|
||||
- 现象:登录返回 401,提示邮箱或密码错误
|
||||
- 处理:确认用户存在且密码正确,检查 JWT 秘钥与过期时间
|
||||
- 参考路径:[backend/app/api/auth.py:22-37](file://backend/app/api/auth.py#L22-L37)、[backend/app/services/auth.py:55-69](file://backend/app/services/auth.py#L55-L69)
|
||||
- 数据库连接异常
|
||||
- 现象:连接字符串错误或数据库不可达
|
||||
- 处理:核对 DATABASE_URL,确保数据库服务正常
|
||||
- 参考路径:[backend/app/config.py](file://backend/app/config.py#L7)、[backend/app/database.py:6-10](file://backend/app/database.py#L6-L10)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/platforms/kimi.py:21-32](file://backend/app/workers/platforms/kimi.py#L21-L32)
|
||||
- [backend/app/workers/platforms/wenxin.py:21-32](file://backend/app/workers/platforms/wenxin.py#L21-L32)
|
||||
- [backend/app/workers/citation_engine.py:211-228](file://backend/app/workers/citation_engine.py#L211-L228)
|
||||
- [backend/app/api/auth.py:22-37](file://backend/app/api/auth.py#L22-L37)
|
||||
- [backend/app/services/auth.py:55-69](file://backend/app/services/auth.py#L55-L69)
|
||||
- [backend/app/config.py](file://backend/app/config.py#L7)
|
||||
- [backend/app/database.py:6-10](file://backend/app/database.py#L6-L10)
|
||||
|
||||
## 结论
|
||||
本指南提供了 GEO 平台集成第三方能力的系统化方法论与实践路径。通过适配器接口与引擎解耦、定时调度与任务状态管理、认证与权限控制以及数据库模型与迁移规范,开发者可以快速、安全地扩展平台能力。建议在生产环境中结合缓存、限流与监控进一步完善整体稳定性与可观测性。
|
||||
|
||||
## 附录
|
||||
|
||||
### 新 AI 平台接入流程(步骤清单)
|
||||
- 实现适配器
|
||||
- 继承抽象基类,定义平台名称与 URL
|
||||
- 实现查询方法,包含重试、超时与资源清理
|
||||
- 参考路径:[backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)、[backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)、[backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
- 注册到引擎
|
||||
- 在引擎平台映射中加入新适配器实例
|
||||
- 参考路径:[backend/app/workers/citation_engine.py:152-155](file://backend/app/workers/citation_engine.py#L152-L155)
|
||||
- 配置与部署
|
||||
- 确认 Playwright 浏览器可用,或按需替换为 API 方案
|
||||
- 参考路径:[backend/app/config.py](file://backend/app/config.py#L11)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
- [backend/app/workers/citation_engine.py:152-155](file://backend/app/workers/citation_engine.py#L152-L155)
|
||||
- [backend/app/config.py](file://backend/app/config.py#L11)
|
||||
|
||||
### 新数据库支持(步骤清单)
|
||||
- 扩展模型
|
||||
- 在 models 目录新增 SQLAlchemy 模型,继承 Base
|
||||
- 参考路径:[backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)、[backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)、[backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
- 编写迁移
|
||||
- 使用 Alembic 生成并编写迁移脚本,包含索引与约束
|
||||
- 参考路径:[backend/alembic/versions/488d0bd5ab01_initial_migration.py:21-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L21-L128)
|
||||
- 连接配置
|
||||
- 在配置类中设置 DATABASE_URL,确保连接参数正确
|
||||
- 参考路径:[backend/app/config.py](file://backend/app/config.py#L7)、[backend/app/database.py:6-10](file://backend/app/database.py#L6-L10)
|
||||
|
||||
章节来源
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:21-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L21-L128)
|
||||
- [backend/app/config.py](file://backend/app/config.py#L7)
|
||||
- [backend/app/database.py:6-10](file://backend/app/database.py#L6-L10)
|
||||
|
||||
### 新认证方式集成(步骤清单)
|
||||
- OAuth 集成
|
||||
- 在认证服务中新增 OAuth 登录流程,返回用户信息与 JWT
|
||||
- 参考路径:[backend/app/services/auth.py:37-52](file://backend/app/services/auth.py#L37-L52)、[backend/app/api/auth.py:13-19](file://backend/app/api/auth.py#L13-L19)
|
||||
- JWT 令牌处理
|
||||
- 使用现有 JWT 工具函数生成与验证令牌
|
||||
- 参考路径:[backend/app/services/auth.py:24-34](file://backend/app/services/auth.py#L24-L34)
|
||||
- 权限系统扩展
|
||||
- 在路由层通过依赖注入获取当前用户,按用户计划与配额控制访问
|
||||
- 参考路径:[backend/app/api/auth.py:40-43](file://backend/app/api/auth.py#L40-L43)、[backend/app/models/user.py:22-24](file://backend/app/models/user.py#L22-L24)
|
||||
|
||||
章节来源
|
||||
- [backend/app/services/auth.py:24-34](file://backend/app/services/auth.py#L24-L34)
|
||||
- [backend/app/api/auth.py:13-19](file://backend/app/api/auth.py#L13-L19)
|
||||
- [backend/app/api/auth.py:40-43](file://backend/app/api/auth.py#L40-L43)
|
||||
- [backend/app/models/user.py:22-24](file://backend/app/models/user.py#L22-L24)
|
||||
|
||||
### 插件系统使用指南(概念性说明)
|
||||
- 注册机制
|
||||
- 将插件适配器注册到引擎映射,实现动态扩展
|
||||
- 参考路径:[backend/app/workers/citation_engine.py:152-155](file://backend/app/workers/citation_engine.py#L152-L155)
|
||||
- 生命周期管理
|
||||
- 在调度器启动/关闭时统一管理插件资源
|
||||
- 参考路径:[backend/app/workers/scheduler.py:30-40](file://backend/app/workers/scheduler.py#L30-L40)、[backend/app/workers/scheduler.py:86-90](file://backend/app/workers/scheduler.py#L86-L90)
|
||||
- 错误处理
|
||||
- 插件异常不影响全局流程,记录错误并继续执行其他插件
|
||||
- 参考路径:[backend/app/workers/citation_engine.py:211-228](file://backend/app/workers/citation_engine.py#L211-L228)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/citation_engine.py:152-155](file://backend/app/workers/citation_engine.py#L152-L155)
|
||||
- [backend/app/workers/scheduler.py:30-40](file://backend/app/workers/scheduler.py#L30-L40)
|
||||
- [backend/app/workers/scheduler.py:86-90](file://backend/app/workers/scheduler.py#L86-L90)
|
||||
- [backend/app/workers/citation_engine.py:211-228](file://backend/app/workers/citation_engine.py#L211-L228)
|
||||
|
|
@ -0,0 +1,329 @@
|
|||
# 配置定制
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/platforms/base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [backend/app/workers/platforms/kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [backend/app/workers/platforms/wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [frontend/tailwind.config.ts](file://frontend/tailwind.config.ts)
|
||||
- [frontend/postcss.config.mjs](file://frontend/postcss.config.mjs)
|
||||
- [frontend/next.config.mjs](file://frontend/next.config.mjs)
|
||||
- [frontend/package.json](file://frontend/package.json)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件面向GEO项目的配置定制需求,系统性梳理后端环境变量配置、数据库与缓存连接、AI平台API密钥管理、功能开关与热更新机制、性能调优参数,以及前端主题定制(Tailwind CSS、颜色方案、响应式断点)。同时给出生产环境最佳实践与安全配置建议,帮助团队在不同部署环境中快速、安全地完成配置落地。
|
||||
|
||||
## 项目结构
|
||||
- 后端采用FastAPI + SQLAlchemy异步ORM + APScheduler异步调度,配置通过Pydantic Settings集中管理,支持.env文件注入。
|
||||
- 前端基于Next.js 14 + Tailwind CSS,PostCSS负责Tailwind集成,主题变量通过CSS自定义属性扩展。
|
||||
- Docker Compose编排数据库(PostgreSQL)、缓存(Redis)与前后端服务,统一挂载.env以注入环境变量。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "后端"
|
||||
CFG["配置模块<br/>Settings"]
|
||||
DB["数据库引擎<br/>AsyncEngine"]
|
||||
SCH["调度器<br/>AsyncIOScheduler"]
|
||||
CE["引用引擎<br/>CitationEngine"]
|
||||
KIMI["Kimi适配器"]
|
||||
WENXIN["文心一言适配器"]
|
||||
end
|
||||
subgraph "前端"
|
||||
TW["Tailwind配置<br/>tailwind.config.ts"]
|
||||
PC["PostCSS配置<br/>postcss.config.mjs"]
|
||||
NEXT["Next配置<br/>next.config.mjs"]
|
||||
end
|
||||
subgraph "基础设施"
|
||||
PG["PostgreSQL"]
|
||||
RD["Redis"]
|
||||
end
|
||||
CFG --> DB
|
||||
CFG --> RD
|
||||
CFG --> CE
|
||||
CE --> KIMI
|
||||
CE --> WENXIN
|
||||
SCH --> CE
|
||||
DB --> PG
|
||||
RD -.-> 应用
|
||||
TW --> 应用
|
||||
PC --> TW
|
||||
NEXT --> 应用
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
- [frontend/tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
- [frontend/postcss.config.mjs:1-9](file://frontend/postcss.config.mjs#L1-L9)
|
||||
- [frontend/next.config.mjs:1-5](file://frontend/next.config.mjs#L1-L5)
|
||||
|
||||
章节来源
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [frontend/tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
## 核心组件
|
||||
- 配置中心(Settings):集中声明数据库、缓存、JWT、浏览器路径、AI平台密钥等关键参数,并通过.env文件注入。
|
||||
- 数据库层:基于SQLAlchemy异步引擎,提供会话工厂与基础模型元数据。
|
||||
- 引擎与调度:CitationEngine负责跨平台查询与品牌匹配;QueryScheduler周期性触发任务。
|
||||
- 前端主题:Tailwind CSS通过CSS变量扩展颜色与圆角体系,PostCSS集成Tailwind,Next配置保持默认。
|
||||
|
||||
章节来源
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [frontend/tailwind.config.ts:10-54](file://frontend/tailwind.config.ts#L10-L54)
|
||||
|
||||
## 架构总览
|
||||
下图展示配置如何贯穿应用生命周期:从.env注入Settings,再到数据库与缓存连接、AI平台适配器初始化、调度器启动与任务执行。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant ENV as ".env"
|
||||
participant S as "Settings"
|
||||
participant DB as "数据库引擎"
|
||||
participant RED as "Redis"
|
||||
participant CE as "CitationEngine"
|
||||
participant SCH as "QueryScheduler"
|
||||
participant K as "Kimi适配器"
|
||||
participant W as "文心一言适配器"
|
||||
ENV-->>S : 注入键值对
|
||||
S-->>DB : 提供DATABASE_URL
|
||||
S-->>RED : 提供REDIS_URL
|
||||
S-->>CE : 提供API密钥(可选)
|
||||
SCH->>CE : 触发任务执行
|
||||
CE->>K : query(keyword)
|
||||
CE->>W : query(keyword)
|
||||
K-->>CE : 原始响应文本
|
||||
W-->>CE : 原始响应文本
|
||||
CE-->>DB : 写入引用记录
|
||||
CE-->>RED : 可选缓存写入
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/platforms/kimi.py:33-125](file://backend/app/workers/platforms/kimi.py#L33-L125)
|
||||
- [backend/app/workers/platforms/wenxin.py:33-124](file://backend/app/workers/platforms/wenxin.py#L33-L124)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 环境变量与配置管理
|
||||
- 配置来源:Settings通过.env文件注入,支持额外键忽略策略,便于本地与CI/CD环境解耦。
|
||||
- 关键参数:
|
||||
- 数据库连接:DATABASE_URL
|
||||
- 缓存连接:REDIS_URL
|
||||
- 安全相关:JWT_SECRET、JWT_EXPIRE_HOURS
|
||||
- 浏览器自动化:PLAYWRIGHT_BROWSERS_PATH
|
||||
- AI平台密钥:ZHIPU_API_KEY、TONGYI_API_KEY(当前为空,需在生产环境注入)
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class Settings {
|
||||
+DATABASE_URL : string
|
||||
+REDIS_URL : string
|
||||
+JWT_SECRET : string
|
||||
+JWT_EXPIRE_HOURS : number
|
||||
+PLAYWRIGHT_BROWSERS_PATH : string
|
||||
+ZHIPU_API_KEY : string
|
||||
+TONGYI_API_KEY : string
|
||||
}
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/config.py:4-16](file://backend/app/config.py#L4-L16)
|
||||
|
||||
章节来源
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [docker-compose.yml:42-43](file://docker-compose.yml#L42-L43)
|
||||
|
||||
### 数据库连接配置
|
||||
- 引擎创建:使用异步引擎,echo关闭,future开启。
|
||||
- 会话工厂:设置过期与自动提交/刷洗策略,保证事务一致性。
|
||||
- 依赖注入:通过依赖函数提供会话,确保请求生命周期内正确释放。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["应用启动"]) --> LoadCfg["读取Settings.DATABASE_URL"]
|
||||
LoadCfg --> CreateEngine["创建异步引擎"]
|
||||
CreateEngine --> SessionFactory["创建会话工厂"]
|
||||
SessionFactory --> ProvideDB["依赖注入会话"]
|
||||
ProvideDB --> End(["请求处理完成,关闭会话"])
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/database.py:6-28](file://backend/app/database.py#L6-L28)
|
||||
|
||||
章节来源
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
|
||||
### Redis缓存配置
|
||||
- 连接字符串:REDIS_URL由Settings提供。
|
||||
- 使用场景:可作为任务状态、查询结果或会话存储的缓存层(当前代码未直接使用Redis,可在业务层扩展)。
|
||||
- 建议:生产环境务必独立Redis实例,设置密码与网络隔离。
|
||||
|
||||
章节来源
|
||||
- [backend/app/config.py:8](file://backend/app/config.py#L8)
|
||||
- [backend/requirements.txt:21](file://backend/requirements.txt#L21)
|
||||
|
||||
### AI平台API密钥管理
|
||||
- 当前实现:Settings声明密钥字段,但默认为空;引用引擎在执行平台查询时依赖适配器封装的内部逻辑。
|
||||
- 建议:在生产环境通过.env注入密钥,或通过机密管理服务挂载至容器。
|
||||
|
||||
章节来源
|
||||
- [backend/app/config.py:12-13](file://backend/app/config.py#L12-L13)
|
||||
- [backend/app/workers/citation_engine.py:244-246](file://backend/app/workers/citation_engine.py#L244-L246)
|
||||
|
||||
### 功能开关与动态配置
|
||||
- 现状:代码未实现运行时功能开关与配置热更新。
|
||||
- 建议实现方式:
|
||||
- 引入配置中心(如Consul、etcd或PostgreSQL表),定期拉取最新配置。
|
||||
- 使用装饰器或中间件在请求入口处进行功能开关判断。
|
||||
- 结合信号或健康检查接口触发重载,避免重启服务。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:13-21](file://backend/app/main.py#L13-L21)
|
||||
|
||||
### 性能调优参数
|
||||
- 数据库连接池:可通过异步引擎参数调整最大连接数、空闲连接数、连接超时等(当前未显式配置,使用默认值)。
|
||||
- 异步任务并发:调度器周期固定为1小时;平台查询采用指数退避与单次任务串行化,避免过度并发。
|
||||
- 缓存策略:建议在业务层引入Redis缓存热点查询结果,降低重复抓取成本。
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:32-38](file://backend/app/workers/scheduler.py#L32-L38)
|
||||
- [backend/app/workers/platforms/kimi.py:38-48](file://backend/app/workers/platforms/kimi.py#L38-L48)
|
||||
- [backend/app/workers/platforms/wenxin.py:38-48](file://backend/app/workers/platforms/wenxin.py#L38-L48)
|
||||
|
||||
### 前端主题定制指南
|
||||
- Tailwind CSS配置:通过CSS变量扩展colors与borderRadius,darkMode使用class模式。
|
||||
- PostCSS:仅启用tailwindcss插件,简化构建流程。
|
||||
- Next配置:默认配置,无需特殊改动。
|
||||
- 定制步骤:
|
||||
- 颜色方案:在tailwind.config.ts的theme.extend.colors中新增或覆盖HSL变量。
|
||||
- 圆角体系:在theme.extend.borderRadius中扩展半径变量。
|
||||
- 响应式断点:在theme.extend.screens中添加自定义断点名称与宽度。
|
||||
- 主题切换:通过在html或body上切换class实现明暗主题切换。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A["修改 tailwind.config.ts"] --> B["扩展 colors 或 borderRadius"]
|
||||
B --> C["构建并预览"]
|
||||
C --> D["在组件中使用新变量"]
|
||||
D --> E["发布"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [frontend/tailwind.config.ts:10-54](file://frontend/tailwind.config.ts#L10-L54)
|
||||
- [frontend/postcss.config.mjs:1-9](file://frontend/postcss.config.mjs#L1-L9)
|
||||
- [frontend/next.config.mjs:1-5](file://frontend/next.config.mjs#L1-L5)
|
||||
|
||||
章节来源
|
||||
- [frontend/tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
- [frontend/postcss.config.mjs:1-9](file://frontend/postcss.config.mjs#L1-L9)
|
||||
- [frontend/next.config.mjs:1-5](file://frontend/next.config.mjs#L1-L5)
|
||||
- [frontend/package.json:11-38](file://frontend/package.json#L11-L38)
|
||||
|
||||
### 生产环境配置最佳实践
|
||||
- 环境变量:
|
||||
- 使用独立.env文件,禁止提交到版本库;通过CI/CD注入。
|
||||
- 设置JWT_SECRET为强随机字符串,JWT_EXPIRE_HOURS按业务安全策略设定。
|
||||
- DATABASE_URL与REDIS_URL指向独立实例,启用TLS与访问控制。
|
||||
- 安全加固:
|
||||
- 限制CORS白名单,仅允许受信域名。
|
||||
- 启用HTTPS与安全头(由网关或反向代理层处理)。
|
||||
- 定期轮换密钥与证书。
|
||||
- 可靠性:
|
||||
- 数据库与Redis增加健康检查与自动重启策略。
|
||||
- 后端服务设置优雅停机与进程监控。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:30-36](file://backend/app/main.py#L30-L36)
|
||||
- [docker-compose.yml:8-20](file://docker-compose.yml#L8-L20)
|
||||
- [docker-compose.yml:25-34](file://docker-compose.yml#L25-L34)
|
||||
- [docker-compose.yml:46-51](file://docker-compose.yml#L46-L51)
|
||||
|
||||
## 依赖分析
|
||||
- 后端依赖:FastAPI、SQLAlchemy异步、Pydantic/Settings、Redis、APScheduler、Playwright。
|
||||
- 前端依赖:Next.js、Tailwind CSS、Radix UI组件、Recharts等。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
R["requirements.txt"] --> F["FastAPI"]
|
||||
R --> SA["SQLAlchemy异步"]
|
||||
R --> PS["Pydantic/Settings"]
|
||||
R --> RD["Redis"]
|
||||
R --> AP["APScheduler"]
|
||||
R --> PW["Playwright"]
|
||||
P["package.json"] --> NX["Next.js"]
|
||||
P --> TW["Tailwind CSS"]
|
||||
P --> UI["@radix-ui/*"]
|
||||
P --> RC["Recharts"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:11-38](file://frontend/package.json#L11-L38)
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:11-38](file://frontend/package.json#L11-L38)
|
||||
|
||||
## 性能考虑
|
||||
- 数据库:根据QPS与查询复杂度调整连接池大小;开启连接复用与超时控制。
|
||||
- 缓存:对热点查询结果进行缓存,设置合理TTL;避免缓存穿透与雪崩。
|
||||
- 并发:平台查询采用指数退避与串行化,避免平台限流;必要时引入队列与限速器。
|
||||
- 前端:Tailwind按需扫描内容文件,减少未使用样式体积;生产构建启用压缩与Tree Shaking。
|
||||
|
||||
## 故障排查指南
|
||||
- 数据库连接失败:检查DATABASE_URL格式与可达性;确认容器网络与卷挂载。
|
||||
- Redis连接失败:检查REDIS_URL与网络策略;确认容器健康检查状态。
|
||||
- Playwright浏览器问题:确认PLAYWRIGHT_BROWSERS_PATH与浏览器安装;查看日志中的超时与选择器匹配失败信息。
|
||||
- 调度器未启动:确认lifespan钩子与main.py中include_router顺序;查看日志输出。
|
||||
- CORS错误:核对allow_origins与凭证设置;确保前端请求头一致。
|
||||
|
||||
章节来源
|
||||
- [backend/app/database.py:6-10](file://backend/app/database.py#L6-L10)
|
||||
- [backend/app/config.py:11](file://backend/app/config.py#L11)
|
||||
- [backend/app/workers/platforms/kimi.py:21-32](file://backend/app/workers/platforms/kimi.py#L21-L32)
|
||||
- [backend/app/workers/platforms/wenxin.py:21-32](file://backend/app/workers/platforms/wenxin.py#L21-L32)
|
||||
- [backend/app/main.py:13-21](file://backend/app/main.py#L13-L21)
|
||||
- [backend/app/main.py:30-36](file://backend/app/main.py#L30-L36)
|
||||
|
||||
## 结论
|
||||
本文档提供了GEO项目配置定制的完整指南,涵盖后端环境变量、数据库与缓存、AI平台密钥、前端主题定制与生产安全实践。当前代码未实现运行时功能开关与配置热更新,建议在后续迭代中引入配置中心与热重载机制,以提升系统的可运维性与安全性。
|
||||
|
||||
## 附录
|
||||
- Docker Compose编排:数据库、缓存与前后端服务均通过.env注入环境变量,建议在生产环境使用独立网络与只读根文件系统。
|
||||
- 版本与依赖:后端与前端均采用稳定版本,升级前请进行兼容性测试。
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:11-38](file://frontend/package.json#L11-L38)
|
||||
|
|
@ -0,0 +1,480 @@
|
|||
# 故障排除与FAQ
|
||||
|
||||
<cite>
|
||||
**本文引用的文件**
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/services/auth.py](file://backend/app/services/auth.py)
|
||||
- [backend/app/models/user.py](file://backend/app/models/user.py)
|
||||
- [backend/app/schemas/auth.py](file://backend/app/schemas/auth.py)
|
||||
- [backend/app/api/deps.py](file://backend/app/api/deps.py)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [frontend/app/api/auth/[...nextauth]/route.ts](file://frontend/app/api/auth/[...nextauth]/route.ts)
|
||||
- [frontend/lib/auth.ts](file://frontend/lib/auth.ts)
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排除指南](#故障排除指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件面向GEO平台的运维与开发人员,提供系统化的故障排除与常见问题解答(FAQ)。内容覆盖认证问题、数据库连接问题、API调用问题、性能问题的定位与修复;同时给出系统监控与诊断方法(健康检查、错误日志分析、性能指标监控)、调试技巧与工具使用(开发/生产环境),以及常见错误码含义与预防性维护最佳实践,并提供紧急故障恢复流程与应急处理方案。
|
||||
|
||||
## 项目结构
|
||||
- 后端采用FastAPI + SQLAlchemy异步ORM + Redis/APScheduler任务调度,通过Docker Compose编排PostgreSQL、Redis、后端与前端服务。
|
||||
- 前端使用Next.js + NextAuth进行会话管理,通过NextAuth路由对接后端认证接口。
|
||||
- 关键路径:前端登录请求 → NextAuth → 后端认证接口 → 数据库用户校验 → JWT签发 → 前端会话存储。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
FE["前端应用<br/>Next.js + NextAuth"] --> NA["NextAuth路由<br/>/api/auth/[...nextauth]"]
|
||||
NA --> BA["后端认证接口<br/>/api/v1/auth/*"]
|
||||
BA --> DB["数据库<br/>PostgreSQL"]
|
||||
BA --> CFG["配置中心<br/>settings/DATABASE_URL/JWT等"]
|
||||
BA --> SCH["任务调度器<br/>APScheduler"]
|
||||
BE["后端服务<br/>FastAPI"] --> DB
|
||||
BE --> REDIS["缓存/队列<br/>Redis"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:24-47](file://backend/app/main.py#L24-L47)
|
||||
- [frontend/app/api/auth/[...nextauth]/route.ts](file://frontend/app/api/auth/[...nextauth]/route.ts#L1-L7)
|
||||
- [backend/app/config.py:7-13](file://backend/app/config.py#L7-L13)
|
||||
- [backend/app/database.py:6-18](file://backend/app/database.py#L6-L18)
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
## 核心组件
|
||||
- 应用入口与生命周期:定义FastAPI应用、CORS中间件、路由注册与健康检查端点。
|
||||
- 配置管理:集中读取DATABASE_URL、REDIS_URL、JWT密钥、API密钥等。
|
||||
- 数据库层:异步引擎与会话工厂,提供依赖注入式数据库会话。
|
||||
- 认证模块:注册/登录/当前用户信息,JWT生成与校验,OAuth2 Bearer令牌解析。
|
||||
- 查询模块:查询任务的增删改查,权限校验与分页参数控制。
|
||||
- 任务调度:基于APScheduler的异步调度器,周期性扫描并执行到期查询任务。
|
||||
- 前端认证:NextAuth凭据提供者对接后端登录接口,JWT会话策略。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:13-47](file://backend/app/main.py#L13-L47)
|
||||
- [backend/app/config.py:4-16](file://backend/app/config.py#L4-L16)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [frontend/lib/auth.ts:1-56](file://frontend/lib/auth.ts#L1-L56)
|
||||
|
||||
## 架构总览
|
||||
下图展示从浏览器到后端、数据库与外部平台的关键交互路径,以及认证与任务调度的运行时关系。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant U as "用户浏览器"
|
||||
participant F as "前端NextAuth"
|
||||
participant A as "后端认证接口"
|
||||
participant D as "数据库"
|
||||
participant J as "JWT令牌"
|
||||
participant S as "任务调度器"
|
||||
U->>F : "提交登录表单"
|
||||
F->>A : "POST /api/v1/auth/login"
|
||||
A->>D : "按邮箱查询用户"
|
||||
D-->>A : "返回用户记录"
|
||||
A->>A : "校验密码"
|
||||
A->>J : "签发访问令牌"
|
||||
A-->>F : "返回令牌与用户信息"
|
||||
F-->>U : "建立会话并跳转"
|
||||
Note over S,D : "后台定时扫描到期查询并执行"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [frontend/lib/auth.ts:13-31](file://frontend/lib/auth.ts#L13-L31)
|
||||
- [backend/app/api/auth.py:22-37](file://backend/app/api/auth.py#L22-L37)
|
||||
- [backend/app/services/auth.py:55-69](file://backend/app/services/auth.py#L55-L69)
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 认证组件分析
|
||||
- 登录流程要点
|
||||
- 前端NextAuth凭据提供者收集邮箱/密码,调用后端登录接口。
|
||||
- 后端根据邮箱查询用户,校验密码,成功则签发JWT。
|
||||
- 前端以JWT策略存储会话,后续受保护路由自动携带Bearer令牌。
|
||||
- 常见问题与定位
|
||||
- 用户名或密码错误:后端返回未授权错误,需检查输入格式与用户是否存在。
|
||||
- JWT过期或密钥不一致:令牌解析失败导致“凭据无效”,需核对JWT_SECRET与过期时间。
|
||||
- CORS跨域:若前端端口变更但后端允许源未更新,会出现跨域错误。
|
||||
- 安全建议
|
||||
- 生产环境必须更换默认JWT密钥,设置合理过期时间。
|
||||
- 密码使用哈希存储,传输层启用HTTPS。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as "客户端"
|
||||
participant N as "NextAuth"
|
||||
participant R as "认证路由"
|
||||
participant M as "模型/服务"
|
||||
participant T as "JWT"
|
||||
C->>N : "凭据提交"
|
||||
N->>R : "POST /api/v1/auth/login"
|
||||
R->>M : "查询用户并校验密码"
|
||||
M-->>R : "返回用户或空"
|
||||
alt "用户存在且密码正确"
|
||||
R->>T : "生成访问令牌"
|
||||
R-->>N : "返回令牌与用户"
|
||||
N-->>C : "会话建立"
|
||||
else "用户不存在或密码错误"
|
||||
R-->>N : "401 未授权"
|
||||
N-->>C : "登录失败"
|
||||
end
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [frontend/lib/auth.ts:13-31](file://frontend/lib/auth.ts#L13-L31)
|
||||
- [backend/app/api/auth.py:22-37](file://backend/app/api/auth.py#L22-L37)
|
||||
- [backend/app/services/auth.py:37-69](file://backend/app/services/auth.py#L37-L69)
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
|
||||
章节来源
|
||||
- [frontend/app/api/auth/[...nextauth]/route.ts](file://frontend/app/api/auth/[...nextauth]/route.ts#L1-L7)
|
||||
- [frontend/lib/auth.ts:1-56](file://frontend/lib/auth.ts#L1-L56)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/schemas/auth.py:1-34](file://backend/app/schemas/auth.py#L1-L34)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
|
||||
### 数据库与会话组件分析
|
||||
- 连接与依赖
|
||||
- 异步引擎与会话工厂统一由配置驱动,依赖注入在每个请求内创建会话并自动关闭。
|
||||
- 数据库URL来自环境变量,容器化部署通过compose的env_file注入。
|
||||
- 常见问题
|
||||
- 连接字符串错误:确认主机、端口、数据库名、用户名与密码。
|
||||
- 权限不足:确保用户具备数据库访问权限。
|
||||
- 连接池耗尽:检查并发量与超时设置,必要时调整连接数。
|
||||
- 诊断步骤
|
||||
- 在后端容器内使用psql连接数据库验证连通性。
|
||||
- 查看数据库慢查询日志与锁等待情况。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["请求进入"]) --> GetCfg["读取配置<br/>DATABASE_URL"]
|
||||
GetCfg --> BuildEngine["创建异步引擎"]
|
||||
BuildEngine --> Session["创建会话工厂"]
|
||||
Session --> UseDB["依赖注入获取会话"]
|
||||
UseDB --> Exec["执行SQL操作"]
|
||||
Exec --> Close["关闭会话"]
|
||||
Close --> End(["请求结束"])
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/config.py:7-13](file://backend/app/config.py#L7-L13)
|
||||
- [backend/app/database.py:6-29](file://backend/app/database.py#L6-L29)
|
||||
|
||||
章节来源
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [docker-compose.yml:4-21](file://docker-compose.yml#L4-L21)
|
||||
|
||||
### 查询与任务调度组件分析
|
||||
- 查询管理
|
||||
- 支持分页列表、创建、查询、更新、删除;创建时进行权限校验与配额检查。
|
||||
- 查询状态与下次执行时间字段用于调度决策。
|
||||
- 任务调度
|
||||
- 每小时扫描一次到期查询,逐条执行并记录日志;异常不影响整体调度。
|
||||
- 调度器随应用生命周期启动与关闭。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Tick["每小时触发"] --> Scan["扫描到期查询"]
|
||||
Scan --> Found{"找到待执行项?"}
|
||||
Found -- "否" --> Sleep["等待下次触发"]
|
||||
Found -- "是" --> Exec["逐条执行查询"]
|
||||
Exec --> Log["记录执行结果/异常"]
|
||||
Log --> Sleep
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/scheduler.py:30-90](file://backend/app/workers/scheduler.py#L30-L90)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
|
||||
## 依赖分析
|
||||
- 组件耦合
|
||||
- 认证路由依赖数据库与JWT服务;依赖注入贯穿API层与服务层。
|
||||
- 查询API依赖当前用户上下文与数据库会话。
|
||||
- 调度器依赖数据库会话与引用引擎,日志输出用于可观测性。
|
||||
- 外部依赖
|
||||
- 数据库:PostgreSQL(异步驱动)
|
||||
- 缓存/队列:Redis
|
||||
- 任务调度:APScheduler
|
||||
- 浏览览器自动化:Playwright
|
||||
- 认证:JWT、BCrypt
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
AUTH["认证路由"] --> DEPS["依赖注入"]
|
||||
AUTH --> JWT["JWT服务"]
|
||||
AUTH --> DB["数据库"]
|
||||
QUERIES["查询路由"] --> DEPS
|
||||
QUERIES --> DB
|
||||
SCHED["调度器"] --> DB
|
||||
SCHED --> CE["引用引擎"]
|
||||
MAIN["应用入口"] --> AUTH
|
||||
MAIN --> QUERIES
|
||||
MAIN --> SCHED
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:6-42](file://backend/app/main.py#L6-L42)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/workers/scheduler.py:18-95](file://backend/app/workers/scheduler.py#L18-L95)
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
|
||||
## 性能考虑
|
||||
- 数据库层
|
||||
- 使用异步ORM减少阻塞;为高频查询字段建立索引(如查询表的用户ID、状态、下次执行时间)。
|
||||
- 控制事务范围,避免长事务;批量写入时合并提交。
|
||||
- API层
|
||||
- 合理设置分页参数上限,避免一次性返回过多数据。
|
||||
- 对热点接口增加缓存(如用户信息、平台配置)。
|
||||
- 调度与并发
|
||||
- 调度间隔与任务粒度平衡;单次任务失败不应阻塞其他任务。
|
||||
- 监控任务执行时长与积压情况,必要时拆分任务或引入队列。
|
||||
- 前端
|
||||
- 减少不必要的重渲染,使用稳定的数据结构;对长列表使用虚拟滚动。
|
||||
- 观测性
|
||||
- 记录关键链路耗时与错误率;结合日志与指标进行根因分析。
|
||||
|
||||
[本节为通用指导,无需列出章节来源]
|
||||
|
||||
## 故障排除指南
|
||||
|
||||
### 认证相关问题
|
||||
- 症状:登录返回401未授权
|
||||
- 可能原因
|
||||
- 邮箱不存在或密码错误
|
||||
- JWT密钥不匹配或过期
|
||||
- CORS限制导致预检失败
|
||||
- 排查步骤
|
||||
- 确认用户是否已注册且密码正确
|
||||
- 检查后端JWT_SECRET与前端会话策略
|
||||
- 核对后端CORS允许源是否包含前端地址
|
||||
- 解决方案
|
||||
- 修正凭据或重置密码
|
||||
- 更新JWT密钥并重启服务
|
||||
- 调整CORS配置
|
||||
|
||||
- 症状:受保护接口返回401凭据无效
|
||||
- 可能原因
|
||||
- 令牌缺失或格式错误
|
||||
- 令牌解析失败(算法或密钥不一致)
|
||||
- 用户被禁用或不存在
|
||||
- 排查步骤
|
||||
- 检查请求头是否包含Bearer令牌
|
||||
- 校验JWT_SECRET与过期时间
|
||||
- 确认用户仍存在于数据库且处于激活状态
|
||||
- 解决方案
|
||||
- 重新登录获取新令牌
|
||||
- 修复配置并刷新令牌
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:22-37](file://backend/app/api/auth.py#L22-L37)
|
||||
- [backend/app/services/auth.py:24-34](file://backend/app/services/auth.py#L24-L34)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/main.py:30-36](file://backend/app/main.py#L30-L36)
|
||||
|
||||
### 数据库连接问题
|
||||
- 症状:应用启动时报数据库连接失败
|
||||
- 可能原因
|
||||
- DATABASE_URL配置错误
|
||||
- PostgreSQL未就绪或端口未映射
|
||||
- 用户/密码/数据库名不正确
|
||||
- 排查步骤
|
||||
- 在后端容器内使用psql连接验证
|
||||
- 检查compose健康检查与依赖顺序
|
||||
- 确认防火墙与网络策略放行
|
||||
- 解决方案
|
||||
- 修正.env中的DATABASE_URL
|
||||
- 等待数据库健康检查通过后再启动后端
|
||||
- 修正凭据与数据库名称
|
||||
|
||||
- 症状:运行中频繁出现连接超时或连接池耗尽
|
||||
- 可能原因
|
||||
- 并发过高或事务过长
|
||||
- 连接池参数过小
|
||||
- 排查步骤
|
||||
- 查看数据库连接数与慢查询
|
||||
- 检查后端连接池配置
|
||||
- 解决方案
|
||||
- 优化查询与事务边界
|
||||
- 调整连接池大小与超时参数
|
||||
|
||||
章节来源
|
||||
- [backend/app/config.py:7-13](file://backend/app/config.py#L7-L13)
|
||||
- [backend/app/database.py:6-18](file://backend/app/database.py#L6-L18)
|
||||
- [docker-compose.yml:46-51](file://docker-compose.yml#L46-L51)
|
||||
|
||||
### API调用问题
|
||||
- 症状:查询列表/详情/创建/更新/删除返回404
|
||||
- 可能原因
|
||||
- 资源不存在或ID格式错误
|
||||
- 权限不足(非本人资源)
|
||||
- 排查步骤
|
||||
- 校验UUID格式与资源归属
|
||||
- 检查当前用户上下文
|
||||
- 解决方案
|
||||
- 使用正确的用户与ID
|
||||
- 确保仅操作本人资源
|
||||
|
||||
- 症状:创建查询返回403
|
||||
- 可能原因
|
||||
- 配额不足或计划限制
|
||||
- 排查步骤
|
||||
- 检查用户计划与最大查询数
|
||||
- 解决方案
|
||||
- 升级计划或清理历史任务释放额度
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/queries.py:26-85](file://backend/app/api/queries.py#L26-L85)
|
||||
|
||||
### 性能问题
|
||||
- 症状:查询列表加载缓慢
|
||||
- 可能原因
|
||||
- 分页参数过大
|
||||
- 缺少索引导致排序/过滤慢
|
||||
- 排查步骤
|
||||
- 降低limit并观察响应时间
|
||||
- 分析数据库执行计划
|
||||
- 解决方案
|
||||
- 合理设置分页上限
|
||||
- 为常用查询字段添加索引
|
||||
|
||||
- 症状:定时任务执行延迟或堆积
|
||||
- 可能原因
|
||||
- 单次任务耗时过长
|
||||
- 调度器未正确启动或关闭
|
||||
- 排查步骤
|
||||
- 查看调度器日志与任务耗时
|
||||
- 检查后端生命周期钩子
|
||||
- 解决方案
|
||||
- 将长任务拆分为多个短任务
|
||||
- 确保调度器随应用启动/停止
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:30-90](file://backend/app/workers/scheduler.py#L30-L90)
|
||||
- [backend/app/main.py:13-21](file://backend/app/main.py#L13-L21)
|
||||
|
||||
### 健康检查与日志分析
|
||||
- 健康检查
|
||||
- 后端健康端点:GET /health
|
||||
- compose健康检查:PostgreSQL与Redis分别提供ping/ready检测
|
||||
- 日志分析
|
||||
- 后端日志:关注认证失败、数据库异常、任务执行错误
|
||||
- 前端日志:关注NextAuth回调与网络错误
|
||||
- 性能指标
|
||||
- 记录关键API耗时、数据库查询耗时、任务执行耗时
|
||||
- 监控连接池使用率与队列长度
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:45-47](file://backend/app/main.py#L45-L47)
|
||||
- [docker-compose.yml:16-34](file://docker-compose.yml#L16-L34)
|
||||
- [backend/app/workers/scheduler.py:22-90](file://backend/app/workers/scheduler.py#L22-L90)
|
||||
|
||||
### 调试技巧与工具使用
|
||||
- 开发环境调试
|
||||
- 启用后端reload与详细日志
|
||||
- 使用curl或Postman测试认证与查询接口
|
||||
- 在前端浏览器开发者工具查看Network与Console
|
||||
- 生产环境排查
|
||||
- 通过日志聚合与告警系统定位异常
|
||||
- 快速回滚至上一版本以隔离变更
|
||||
- 使用只读副本进行慢查询分析
|
||||
- 性能分析
|
||||
- 使用数据库性能分析工具识别慢查询
|
||||
- 对热点接口进行压力测试与容量评估
|
||||
|
||||
[本节为通用指导,无需列出章节来源]
|
||||
|
||||
### 常见错误码与含义
|
||||
- 400 Bad Request
|
||||
- 场景:注册时邮箱已存在、密码长度不足
|
||||
- 处理:修正输入并重试
|
||||
- 401 Unauthorized
|
||||
- 场景:登录失败、令牌无效、凭据无法验证
|
||||
- 处理:重新登录或检查JWT配置
|
||||
- 403 Forbidden
|
||||
- 场景:创建查询时配额不足
|
||||
- 处理:升级计划或清理历史任务
|
||||
- 404 Not Found
|
||||
- 场景:查询资源不存在或非本人资源
|
||||
- 处理:确认ID与权限
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:17-19](file://backend/app/api/auth.py#L17-L19)
|
||||
- [backend/app/api/queries.py:34-38](file://backend/app/api/queries.py#L34-L38)
|
||||
- [backend/app/api/queries.py:49-53](file://backend/app/api/queries.py#L49-L53)
|
||||
|
||||
### 预防性维护最佳实践
|
||||
- 定期备份数据库与Redis
|
||||
- 保持依赖版本更新与安全补丁
|
||||
- 设置合理的日志保留与轮转策略
|
||||
- 对关键接口与任务设置告警阈值
|
||||
- 制定发布前的回归测试清单
|
||||
|
||||
[本节为通用指导,无需列出章节来源]
|
||||
|
||||
### 紧急故障恢复流程与应急处理
|
||||
- 立即行动
|
||||
- 降级非关键功能,优先保障认证与核心查询
|
||||
- 回滚最近一次变更
|
||||
- 诊断与修复
|
||||
- 快速定位日志与指标异常点
|
||||
- 修复配置错误、扩容资源或修复慢查询
|
||||
- 验证与复盘
|
||||
- 发布后持续监控关键指标
|
||||
- 形成故障报告与改进措施
|
||||
|
||||
[本节为通用指导,无需列出章节来源]
|
||||
|
||||
## 结论
|
||||
通过明确的组件职责、完善的健康检查与日志体系、规范的调试与性能分析流程,以及针对认证、数据库、API与调度的专项排障清单,可以显著提升GEO平台的稳定性与可维护性。建议将本指南纳入团队知识库,并定期演练应急流程以提升响应效率。
|
||||
|
||||
## 附录
|
||||
- 快速检查清单
|
||||
- 数据库连通性与权限
|
||||
- JWT密钥与过期时间
|
||||
- CORS允许源配置
|
||||
- 调度器状态与日志
|
||||
- 健康端点与compose健康检查
|
||||
- 参考文件
|
||||
- 后端入口与路由:[backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- 配置与数据库:[backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)、[backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- 认证与用户模型:[backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)、[backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)、[backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- 查询与调度:[backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)、[backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)、[backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- 前端认证:[frontend/lib/auth.ts:1-56](file://frontend/lib/auth.ts#L1-L56)、[frontend/app/api/auth/[...nextauth]/route.ts](file://frontend/app/api/auth/[...nextauth]/route.ts#L1-L7)
|
||||
- 编排与依赖:[docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)、[backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
|
|
@ -0,0 +1,436 @@
|
|||
# 数据库架构
|
||||
|
||||
<cite>
|
||||
**本文引用的文件**
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/alembic/env.py](file://backend/alembic/env.py)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
- [backend/app/models/user.py](file://backend/app/models/user.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [backend/app/models/query_task.py](file://backend/app/models/query_task.py)
|
||||
- [backend/app/models/subscription.py](file://backend/app/models/subscription.py)
|
||||
- [backend/app/models/__init__.py](file://backend/app/models/__init__.py)
|
||||
- [backend/app/api/deps.py](file://backend/app/api/deps.py)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考量](#性能考量)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件系统性梳理 GEO 平台的数据库架构,重点覆盖以下方面:
|
||||
- 基于 PostgreSQL 的异步数据库连接配置:SQLAlchemy 异步引擎、连接池与会话生命周期管理
|
||||
- 连接字符串配置、环境变量管理与安全注意事项
|
||||
- 异步数据库操作的优势、性能特征与最佳实践
|
||||
- 连接监控、错误处理与故障恢复机制
|
||||
- 生产环境配置建议与性能调优指南
|
||||
|
||||
## 项目结构
|
||||
后端采用 FastAPI + SQLAlchemy 2.x 异步 ORM 架构,数据库层由异步引擎与会话工厂构成,配合 Alembic 进行迁移管理;模型定义位于 models 子包中,并通过 API 层注入依赖进行使用。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "后端应用"
|
||||
CFG["配置模块<br/>app/config.py"]
|
||||
DB["数据库引擎与会话<br/>app/database.py"]
|
||||
MODELS["ORM 模型<br/>app/models/*"]
|
||||
ALEMBIC["迁移环境<br/>alembic/env.py"]
|
||||
MIGRATE["初始迁移脚本<br/>alembic/versions/*"]
|
||||
API_DEPS["API 依赖注入<br/>app/api/deps.py"]
|
||||
API_AUTH["认证路由<br/>app/api/auth.py"]
|
||||
MAIN["应用入口与生命周期<br/>app/main.py"]
|
||||
end
|
||||
subgraph "外部服务"
|
||||
PG["PostgreSQL 实例"]
|
||||
REDIS["Redis 实例"]
|
||||
end
|
||||
CFG --> DB
|
||||
DB --> MODELS
|
||||
ALEMBIC --> MIGRATE
|
||||
API_DEPS --> DB
|
||||
API_AUTH --> DB
|
||||
MAIN --> API_AUTH
|
||||
MAIN --> API_DEPS
|
||||
DB --> PG
|
||||
REDIS -. 缓存/任务队列 .-> MAIN
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:1-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L1-L128)
|
||||
- [backend/app/api/deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
|
||||
章节来源
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:1-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L1-L128)
|
||||
- [backend/app/api/deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
|
||||
## 核心组件
|
||||
- 异步引擎与会话工厂
|
||||
- 使用 SQLAlchemy 异步引擎创建数据库连接,关闭 echo,启用 future 模式
|
||||
- 会话工厂配置为非自动提交、非自动刷新、关闭提交时过期,确保事务边界清晰
|
||||
- 提供异步上下文管理的会话生成器,保证在请求结束时正确关闭会话
|
||||
- 配置与连接字符串
|
||||
- 通过 Pydantic Settings 从 .env 文件加载配置,包含 DATABASE_URL、REDIS_URL、JWT_SECRET 等
|
||||
- 默认 DATABASE_URL 指向本地或容器内 PostgreSQL 实例
|
||||
- 模型与索引
|
||||
- 用户、查询、引用记录、查询任务、订阅等核心实体,均采用 PostgreSQL UUID 主键与 JSONB 字段
|
||||
- 在高频查询字段上建立复合索引,提升读取性能
|
||||
- 迁移与初始化
|
||||
- Alembic 环境支持离线与在线迁移,使用异步引擎执行迁移
|
||||
- 初始迁移脚本创建表结构与索引,定义外键约束与默认值
|
||||
|
||||
章节来源
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
- [backend/app/models/subscription.py:1-37](file://backend/app/models/subscription.py#L1-L37)
|
||||
- [backend/alembic/env.py:64-88](file://backend/alembic/env.py#L64-L88)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:21-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L21-L128)
|
||||
|
||||
## 架构总览
|
||||
下图展示异步数据库连接在应用中的整体流转:FastAPI 路由通过依赖注入获取 AsyncSession,执行数据库操作后自动释放;迁移通过 Alembic 异步引擎执行;配置从 .env 加载 DATABASE_URL。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant API as "FastAPI 路由"
|
||||
participant Deps as "依赖注入(get_db)"
|
||||
participant Session as "AsyncSession"
|
||||
participant Engine as "异步引擎"
|
||||
participant PG as "PostgreSQL"
|
||||
Client->>API : "HTTP 请求"
|
||||
API->>Deps : "解析依赖(获取会话)"
|
||||
Deps->>Session : "创建/获取会话"
|
||||
Session->>Engine : "执行 SQL"
|
||||
Engine->>PG : "发送查询"
|
||||
PG-->>Engine : "返回结果"
|
||||
Engine-->>Session : "映射为模型"
|
||||
Session-->>API : "业务结果"
|
||||
API-->>Client : "响应"
|
||||
API->>Deps : "请求结束"
|
||||
Deps->>Session : "关闭会话"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/auth.py:13-42](file://backend/app/api/auth.py#L13-L42)
|
||||
- [backend/app/api/deps.py:16-42](file://backend/app/api/deps.py#L16-L42)
|
||||
- [backend/app/database.py:23-28](file://backend/app/database.py#L23-L28)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 异步引擎与会话生命周期
|
||||
- 引擎创建
|
||||
- 使用异步引擎,echo 关闭,future 启用,避免冗余日志与兼容性问题
|
||||
- 会话工厂
|
||||
- 非自动提交、非自动刷新、关闭提交时过期,便于显式控制事务
|
||||
- 会话生成器
|
||||
- 通过上下文管理器确保异常时也能正确关闭会话,避免连接泄漏
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["进入 get_db"]) --> NewSession["创建 AsyncSessionLocal()"]
|
||||
NewSession --> TryBlock["进入 try 块并 yield 会话"]
|
||||
TryBlock --> FinallyBlock["finally 块中关闭会话"]
|
||||
FinallyBlock --> End(["退出 get_db"])
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/database.py:23-28](file://backend/app/database.py#L23-L28)
|
||||
|
||||
章节来源
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
|
||||
### 连接字符串与环境变量管理
|
||||
- 连接字符串
|
||||
- 默认 DATABASE_URL 指向 PostgreSQL 实例,可在 .env 中覆盖
|
||||
- 环境变量加载
|
||||
- 通过 Pydantic SettingsConfigDict 从 .env 加载,忽略未知字段
|
||||
- 安全注意事项
|
||||
- 生产环境务必使用强密钥与加密传输
|
||||
- 不要在代码中硬编码敏感信息,优先使用环境变量与密钥管理服务
|
||||
|
||||
章节来源
|
||||
- [backend/app/config.py:4-16](file://backend/app/config.py#L4-L16)
|
||||
|
||||
### 数据模型与索引策略
|
||||
- 用户模型
|
||||
- UUID 主键、邮箱唯一、密码哈希、计划与配额字段
|
||||
- 查询模型
|
||||
- 外键关联用户、JSONB 存储平台与别名、多字段索引优化
|
||||
- 引用记录模型
|
||||
- JSONB 存储竞品品牌与原始响应,按查询与时间、平台建立索引
|
||||
- 查询任务模型
|
||||
- 状态字段索引,便于任务调度与状态统计
|
||||
- 订阅模型
|
||||
- 金额与支付信息存储,日期范围与状态管理
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
USERS {
|
||||
uuid id PK
|
||||
string email UK
|
||||
string password_hash
|
||||
string name
|
||||
string plan
|
||||
int max_queries
|
||||
bool is_active
|
||||
timestamptz created_at
|
||||
timestamptz updated_at
|
||||
}
|
||||
QUERIES {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
timestamptz last_queried_at
|
||||
timestamptz next_query_at
|
||||
timestamptz created_at
|
||||
timestamptz updated_at
|
||||
}
|
||||
CITATION_RECORDS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
bool cited
|
||||
int citation_position
|
||||
text citation_text
|
||||
jsonb competitor_brands
|
||||
text raw_response
|
||||
timestamptz queried_at
|
||||
}
|
||||
QUERY_TASKS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
string status
|
||||
text error_message
|
||||
timestamptz scheduled_at
|
||||
timestamptz started_at
|
||||
timestamptz completed_at
|
||||
}
|
||||
SUBSCRIPTIONS {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string plan
|
||||
string status
|
||||
date start_date
|
||||
date end_date
|
||||
numeric amount
|
||||
string payment_method
|
||||
string payment_id
|
||||
timestamptz created_at
|
||||
}
|
||||
USERS ||--o{ QUERIES : "拥有"
|
||||
QUERIES ||--o{ CITATION_RECORDS : "产生"
|
||||
QUERIES ||--o{ QUERY_TASKS : "驱动"
|
||||
USERS ||--o{ SUBSCRIPTIONS : "订阅"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/subscription.py:11-37](file://backend/app/models/subscription.py#L11-L37)
|
||||
|
||||
章节来源
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
- [backend/app/models/subscription.py:1-37](file://backend/app/models/subscription.py#L1-L37)
|
||||
|
||||
### 迁移与初始化
|
||||
- 离线/在线迁移
|
||||
- 离线模式直接使用 DATABASE_URL;在线模式通过异步引擎建立连接并执行迁移
|
||||
- 使用 NullPool 避免额外连接池开销
|
||||
- 初始迁移
|
||||
- 创建 users、queries、citation_records、query_tasks、subscriptions 表
|
||||
- 添加必要的外键与索引,确保查询性能与数据一致性
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant CLI as "Alembic CLI"
|
||||
participant Env as "env.py"
|
||||
participant Engine as "异步引擎"
|
||||
participant Conn as "连接"
|
||||
participant DB as "PostgreSQL"
|
||||
CLI->>Env : "run_migrations_online()"
|
||||
Env->>Engine : "create_async_engine(DATABASE_URL, poolclass=NullPool)"
|
||||
Engine->>Conn : "connect()"
|
||||
Conn->>DB : "执行迁移"
|
||||
DB-->>Conn : "返回迁移结果"
|
||||
Conn-->>Engine : "完成迁移"
|
||||
Engine-->>Env : "dispose()"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/alembic/env.py:64-88](file://backend/alembic/env.py#L64-L88)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:21-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L21-L128)
|
||||
|
||||
章节来源
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:1-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L1-L128)
|
||||
|
||||
### API 依赖注入与会话使用
|
||||
- 依赖注入
|
||||
- OAuth2 密钥流用于令牌校验;通过 get_db 获取 AsyncSession
|
||||
- 典型流程
|
||||
- 注册/登录接口使用 AsyncSession 执行写入与查询
|
||||
- 当前用户接口通过令牌解析与数据库查询返回用户信息
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant Auth as "认证路由"
|
||||
participant Deps as "get_current_user"
|
||||
participant DB as "AsyncSession"
|
||||
participant Model as "User 模型"
|
||||
Client->>Auth : "POST /api/v1/auth/login"
|
||||
Auth->>DB : "执行认证查询"
|
||||
DB-->>Auth : "返回用户对象"
|
||||
Auth-->>Client : "返回访问令牌与用户信息"
|
||||
Client->>Auth : "GET /api/v1/auth/me"
|
||||
Auth->>Deps : "校验令牌并解析用户ID"
|
||||
Deps->>DB : "查询用户"
|
||||
DB-->>Deps : "返回用户"
|
||||
Deps-->>Client : "返回当前用户"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/auth.py:13-42](file://backend/app/api/auth.py#L13-L42)
|
||||
- [backend/app/api/deps.py:16-42](file://backend/app/api/deps.py#L16-L42)
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
|
||||
## 依赖分析
|
||||
- 组件耦合
|
||||
- API 层通过依赖注入使用数据库会话,降低耦合度
|
||||
- 模型与数据库层解耦,仅依赖 Base 与类型注解
|
||||
- 外部依赖
|
||||
- SQLAlchemy 2.x 异步 ORM、asyncpg、Alembic
|
||||
- FastAPI、Pydantic Settings、Redis(缓存/任务)
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
API_AUTH["app/api/auth.py"] --> DEPS["app/api/deps.py"]
|
||||
DEPS --> DB["app/database.py"]
|
||||
API_AUTH --> DB
|
||||
DB --> CONFIG["app/config.py"]
|
||||
DB --> MODELS["app/models/*"]
|
||||
ALEMBIC_ENV["alembic/env.py"] --> CONFIG
|
||||
ALEMBIC_ENV --> MODELS
|
||||
ALEMBIC_MIG["alembic/versions/*"] --> MODELS
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:1-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L1-L128)
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
|
||||
## 性能考量
|
||||
- 异步优势
|
||||
- 高并发场景下减少阻塞,提升吞吐量
|
||||
- 与 FastAPI 协同,充分利用事件循环
|
||||
- 连接池与会话
|
||||
- 默认未显式配置连接池参数,建议在生产中根据 QPS 与实例规格调整最大连接数、空闲连接数与超时
|
||||
- 索引与查询
|
||||
- 已在高频查询字段建立索引,避免全表扫描
|
||||
- 对 JSONB 字段的查询建议结合 GIN 索引(如需)
|
||||
- 写入优化
|
||||
- 批量写入与事务合并可显著降低写入延迟
|
||||
- 监控与观测
|
||||
- 建议集成数据库性能视图与慢查询日志
|
||||
- 结合应用指标(请求耗时、错误率)定位瓶颈
|
||||
|
||||
## 故障排查指南
|
||||
- 连接失败
|
||||
- 检查 DATABASE_URL 是否正确,网络连通性与防火墙
|
||||
- 确认 PostgreSQL 实例健康状态与认证凭据
|
||||
- 迁移失败
|
||||
- 离线/在线模式分别检查配置与权限
|
||||
- 查看 Alembic 输出与 PostgreSQL 日志
|
||||
- 会话泄漏
|
||||
- 确保所有分支均能到达 finally 块,避免异常中断导致会话未关闭
|
||||
- 错误处理
|
||||
- API 层对认证失败与数据库异常进行标准化响应
|
||||
- 建议增加重试与熔断策略(如适用)
|
||||
|
||||
章节来源
|
||||
- [backend/app/database.py:23-28](file://backend/app/database.py#L23-L28)
|
||||
- [backend/alembic/env.py:64-88](file://backend/alembic/env.py#L64-L88)
|
||||
- [backend/app/api/auth.py:13-42](file://backend/app/api/auth.py#L13-L42)
|
||||
- [backend/app/api/deps.py:16-42](file://backend/app/api/deps.py#L16-L42)
|
||||
|
||||
## 结论
|
||||
GEO 项目采用 SQLAlchemy 异步 ORM 与 FastAPI 构建了高并发、可维护的数据库层。通过合理的模型设计、索引策略与依赖注入,实现了清晰的职责分离与良好的扩展性。建议在生产环境中完善连接池参数、监控体系与安全策略,持续优化查询与写入路径。
|
||||
|
||||
## 附录
|
||||
|
||||
### 生产环境配置建议
|
||||
- 连接字符串与环境变量
|
||||
- 使用独立 .env 文件管理 DATABASE_URL,避免硬编码
|
||||
- 在容器编排中通过环境注入,确保不同环境隔离
|
||||
- 连接池与会话
|
||||
- 显式配置最大连接数、空闲连接数、连接超时与回收策略
|
||||
- 控制会话生命周期,避免长事务与长时间持有连接
|
||||
- 安全
|
||||
- 使用强口令与 TLS 加密传输
|
||||
- 限制数据库用户权限,最小化授权
|
||||
- 监控与可观测性
|
||||
- 集成数据库性能视图与慢查询日志
|
||||
- 应用层记录关键指标(QPS、P95/P99、错误率)
|
||||
- 备份与恢复
|
||||
- 定期备份策略与演练,确保可快速恢复
|
||||
|
||||
### 性能调优清单
|
||||
- 索引优化
|
||||
- 对高频过滤与排序字段建立合适索引
|
||||
- 对 JSONB 字段查询考虑 GIN 索引
|
||||
- 查询优化
|
||||
- 减少 N+1 查询,使用 select_in_load 或 joinedload
|
||||
- 合理分页与投影,避免 SELECT *
|
||||
- 写入优化
|
||||
- 批量插入与事务合并
|
||||
- 写入热点拆分与读写分离(如需要)
|
||||
- 连接与资源
|
||||
- 合理设置连接池大小,避免过度占用
|
||||
- 及时关闭会话与释放资源
|
||||
|
|
@ -0,0 +1,603 @@
|
|||
# 数据库设计
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/models/user.py](file://backend/app/models/user.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [backend/app/models/query_task.py](file://backend/app/models/query_task.py)
|
||||
- [backend/app/models/subscription.py](file://backend/app/models/subscription.py)
|
||||
- [backend/app/models/__init__.py](file://backend/app/models/__init__.py)
|
||||
- [backend/alembic/env.py](file://backend/alembic/env.py)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py)
|
||||
- [backend/alembic/versions/b2c4d6e8fa10_add_confidence_match_type_to_citation_records.py](file://backend/alembic/versions/b2c4d6e8fa10_add_confidence_match_type_to_citation_records.py)
|
||||
- [backend/alembic.ini](file://backend/alembic.ini)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [backend/app/services/query.py](file://backend/app/services/query.py)
|
||||
- [backend/app/services/citation.py](file://backend/app/services/citation.py)
|
||||
- [backend/app/schemas/citation.py](file://backend/app/schemas/citation.py)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [backend/app/api/reports.py](file://backend/app/api/reports.py)
|
||||
</cite>
|
||||
|
||||
## 更新摘要
|
||||
**变更内容**
|
||||
- 新增数据库迁移版本,添加confidence和match_type字段到citation_records表
|
||||
- 更新CitationRecord模型以支持新的字段定义
|
||||
- 增强报告功能,支持匹配置信度和匹配类型的统计分析
|
||||
- 更新CSV导出功能,包含新的字段输出
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件为 GEOF 智能检索与引用监测平台的数据库设计文档,聚焦于基于 PostgreSQL 的关系型数据库架构,涵盖:
|
||||
- 表结构设计与实体关系映射(ER)
|
||||
- 索引策略与查询优化
|
||||
- SQLAlchemy ORM 模型实现(模型定义、关系配置、查询封装)
|
||||
- 数据库迁移管理、版本控制与部署策略
|
||||
- 数据完整性约束、事务处理与并发控制
|
||||
- 性能优化、查询分析与缓存策略
|
||||
- 备份、恢复与维护最佳实践
|
||||
|
||||
## 项目结构
|
||||
后端采用 FastAPI + SQLAlchemy Async + Alembic 迁移的典型分层架构:
|
||||
- 配置层:读取环境变量,提供数据库连接字符串
|
||||
- 数据库引擎与会话:异步连接池与会话工厂
|
||||
- ORM 模型层:用户、查询、引用记录、任务、订阅
|
||||
- 迁移层:Alembic 初始化迁移脚本
|
||||
- 服务层:业务查询封装,统一事务边界
|
||||
- 容器编排:Docker Compose 启动 Postgres 与 Redis
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "应用层"
|
||||
SvcQ["服务: 查询<br/>backend/app/services/query.py"]
|
||||
SvcC["服务: 引用<br/>backend/app/services/citation.py"]
|
||||
API["API: 引用<br/>backend/app/api/citations.py"]
|
||||
Reports["API: 报告<br/>backend/app/api/reports.py"]
|
||||
end
|
||||
subgraph "ORM 层"
|
||||
MUser["模型: 用户<br/>backend/app/models/user.py"]
|
||||
MQuery["模型: 查询<br/>backend/app/models/query.py"]
|
||||
MCit["模型: 引用记录<br/>backend/app/models/citation_record.py"]
|
||||
MTask["模型: 查询任务<br/>backend/app/models/query_task.py"]
|
||||
MSub["模型: 订阅<br/>backend/app/models/subscription.py"]
|
||||
end
|
||||
subgraph "基础设施"
|
||||
DB["PostgreSQL 数据库"]
|
||||
RD["Redis 缓存"]
|
||||
end
|
||||
Cfg["配置: DATABASE_URL<br/>backend/app/config.py"]
|
||||
Eng["引擎与会话<br/>backend/app/database.py"]
|
||||
Alembic["迁移: Alembic<br/>backend/alembic/*"]
|
||||
Cfg --> Eng
|
||||
Eng --> DB
|
||||
SvcQ --> Eng
|
||||
SvcC --> Eng
|
||||
API --> SvcC
|
||||
Reports --> SvcC
|
||||
MUser --> Eng
|
||||
MQuery --> Eng
|
||||
MCit --> Eng
|
||||
MTask --> Eng
|
||||
MSub --> Eng
|
||||
Alembic --> DB
|
||||
SvcQ --> RD
|
||||
SvcC --> RD
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/citation_record.py:1-44](file://backend/app/models/citation_record.py#L1-L44)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
- [backend/app/models/subscription.py:1-37](file://backend/app/models/subscription.py#L1-L37)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
章节来源
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
## 核心组件
|
||||
- 数据库引擎与会话
|
||||
- 使用异步引擎与会话工厂,开启自动提交、自动刷新、自动回滚等参数,确保事务一致性与资源释放。
|
||||
- 提供依赖注入式 get_db 生成器,保证每个请求生命周期内复用同一会话。
|
||||
- ORM 模型
|
||||
- 所有模型继承自统一的 Base,表名与字段类型均在模型中显式声明。
|
||||
- 关系通过 relationship 显式配置,支持级联删除与孤儿对象清理。
|
||||
- 迁移与版本控制
|
||||
- Alembic 环境集成 SQLAlchemy Base 元数据,支持离线/在线迁移。
|
||||
- 初始迁移脚本定义了用户、查询、引用记录、查询任务、订阅五张表及必要索引。
|
||||
- 新增迁移版本支持confidence和match_type字段,增强报告功能。
|
||||
- 服务层封装
|
||||
- 查询与引用统计、导出等业务逻辑封装在服务层,统一执行 SQL 并返回结果。
|
||||
- 对外暴露清晰的查询接口,内部进行权限校验与计数限制。
|
||||
|
||||
章节来源
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/models/__init__.py:1-14](file://backend/app/models/__init__.py#L1-L14)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:1-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L1-L128)
|
||||
- [backend/alembic/versions/b2c4d6e8fa10_add_confidence_match_type_to_citation_records.py:1-37](file://backend/alembic/versions/b2c4d6e8fa10_add_confidence_match_type_to_citation_records.py#L1-L37)
|
||||
- [backend/app/services/query.py:1-130](file://backend/app/services/query.py#L1-L130)
|
||||
- [backend/app/services/citation.py:1-429](file://backend/app/services/citation.py#L1-L429)
|
||||
|
||||
## 架构总览
|
||||
下图展示数据库层与应用层的交互关系,以及迁移与容器编排对数据库的影响。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant API as "FastAPI 应用"
|
||||
participant SvcC as "引用服务"
|
||||
participant Reports as "报告服务"
|
||||
participant DB as "PostgreSQL"
|
||||
participant Alembic as "迁移工具"
|
||||
Client->>API : 请求 /citations 或 /reports
|
||||
API->>SvcC : 调用引用服务
|
||||
SvcC->>DB : 执行查询/统计/插入
|
||||
API->>Reports : 调用报告服务
|
||||
Reports->>DB : 执行导出查询
|
||||
DB-->>Reports : 返回统计数据
|
||||
DB-->>SvcC : 返回引用记录
|
||||
Reports-->>API : 返回CSV内容
|
||||
SvcC-->>API : 返回查询列表/详情
|
||||
API-->>Client : 响应数据
|
||||
Note over Alembic,DB : 首次启动或升级时执行迁移
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/services/citation.py:1-429](file://backend/app/services/citation.py#L1-L429)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 实体关系映射(ER)
|
||||
- 用户(users):主键 id,唯一邮箱,计划与配额字段,活跃状态,时间戳。
|
||||
- 查询(queries):外键 user_id,关键词、目标品牌、别名列表、平台列表、频率、状态、下次查询时间,时间戳。
|
||||
- 引用记录(citation_records):外键 query_id,平台、是否引用、引用位置、引用文本、竞争品牌列表、原始响应、匹配置信度、匹配类型、查询时间。
|
||||
- 查询任务(query_tasks):外键 query_id,平台、状态、错误信息、调度/开始/完成时间。
|
||||
- 订阅(subscriptions):外键 user_id,计划、状态、起止日期、金额、支付方式与流水号,时间戳。
|
||||
|
||||
**更新** 新增confidence和match_type字段到引用记录表,支持增强的报告功能。
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
USERS {
|
||||
uuid id PK
|
||||
string email UK
|
||||
string password_hash
|
||||
string name
|
||||
string plan
|
||||
integer max_queries
|
||||
boolean is_active
|
||||
timestamptz created_at
|
||||
timestamptz updated_at
|
||||
}
|
||||
QUERIES {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
timestamptz last_queried_at
|
||||
timestamptz next_query_at
|
||||
timestamptz created_at
|
||||
timestamptz updated_at
|
||||
}
|
||||
CITATION_RECORDS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
boolean cited
|
||||
integer citation_position
|
||||
text citation_text
|
||||
jsonb competitor_brands
|
||||
text raw_response
|
||||
float confidence
|
||||
string match_type
|
||||
timestamptz queried_at
|
||||
}
|
||||
QUERY_TASKS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
string status
|
||||
text error_message
|
||||
timestamptz scheduled_at
|
||||
timestamptz started_at
|
||||
timestamptz completed_at
|
||||
}
|
||||
SUBSCRIPTIONS {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string plan
|
||||
string status
|
||||
date start_date
|
||||
date end_date
|
||||
numeric amount
|
||||
string payment_method
|
||||
string payment_id
|
||||
timestamptz created_at
|
||||
}
|
||||
USERS ||--o{ QUERIES : "拥有"
|
||||
QUERIES ||--o{ CITATION_RECORDS : "产生"
|
||||
QUERIES ||--o{ QUERY_TASKS : "触发"
|
||||
USERS ||--o{ SUBSCRIPTIONS : "订阅"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/citation_record.py:1-44](file://backend/app/models/citation_record.py#L1-L44)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
- [backend/app/models/subscription.py:1-37](file://backend/app/models/subscription.py#L1-L37)
|
||||
|
||||
章节来源
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/citation_record.py:1-44](file://backend/app/models/citation_record.py#L1-L44)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
- [backend/app/models/subscription.py:1-37](file://backend/app/models/subscription.py#L1-L37)
|
||||
|
||||
### 索引策略
|
||||
- 查询表(queries)
|
||||
- idx_queries_user_id:按用户过滤
|
||||
- idx_queries_status:按状态过滤
|
||||
- idx_queries_next_query_at:按下次查询时间调度
|
||||
- 引用记录表(citation_records)
|
||||
- idx_citation_records_query_id:按查询聚合
|
||||
- idx_citation_records_queried_at:按时间排序/范围
|
||||
- idx_citation_records_platform:按平台统计
|
||||
- **新增** idx_citation_records_confidence:按匹配置信度过滤(建议)
|
||||
- **新增** idx_citation_records_match_type:按匹配类型过滤(建议)
|
||||
|
||||
**更新** 新增针对confidence和match_type字段的索引建议,支持更精细的查询过滤。
|
||||
|
||||
这些索引覆盖了常见查询路径与统计场景,有助于提升分页、过滤、排序与聚合的性能。
|
||||
|
||||
章节来源
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:57-94](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L57-L94)
|
||||
- [backend/alembic/versions/b2c4d6e8fa10_add_confidence_match_type_to_citation_records.py:21-37](file://backend/alembic/versions/b2c4d6e8fa10_add_confidence_match_type_to_citation_records.py#L21-L37)
|
||||
- [backend/app/models/query.py:50-54](file://backend/app/models/query.py#L50-L54)
|
||||
- [backend/app/models/citation_record.py:37-44](file://backend/app/models/citation_record.py#L37-L44)
|
||||
- [backend/app/models/query_task.py:36-38](file://backend/app/models/query_task.py#L36-L38)
|
||||
|
||||
### SQLAlchemy ORM 模型实现
|
||||
- 字段类型与默认值
|
||||
- UUID 主键与外键,JSONB 存储数组/字典,布尔、整数、文本、数值、时间戳等。
|
||||
- 默认值通过 server_default/onupdate 设置,减少应用层重复逻辑。
|
||||
- **新增** confidence字段(Float类型,nullable=True)用于存储匹配的可信度评分。
|
||||
- **新增** match_type字段(String类型,长度20,nullable=True)用于标识匹配类型。
|
||||
- 关系配置
|
||||
- 用户与查询、订阅为一对多;查询与引用记录、任务为一对多。
|
||||
- 级联删除与孤儿对象清理(delete-orphan),避免悬挂数据。
|
||||
- 查询封装
|
||||
- 服务层使用 select + func + join 封装复杂查询,统一处理分页与计数。
|
||||
- 权限校验:仅允许访问当前用户的资源,防止越权。
|
||||
|
||||
**更新** CitationRecord模型新增confidence和match_type字段定义,支持增强的报告功能。
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class User {
|
||||
+id : uuid
|
||||
+email : string
|
||||
+password_hash : string
|
||||
+name : string
|
||||
+plan : string
|
||||
+max_queries : int
|
||||
+is_active : bool
|
||||
+created_at : datetime
|
||||
+updated_at : datetime
|
||||
+queries
|
||||
+subscriptions
|
||||
}
|
||||
class Query {
|
||||
+id : uuid
|
||||
+user_id : uuid
|
||||
+keyword : string
|
||||
+target_brand : string
|
||||
+brand_aliases : list
|
||||
+platforms : list
|
||||
+frequency : string
|
||||
+status : string
|
||||
+last_queried_at : datetime
|
||||
+next_query_at : datetime
|
||||
+created_at : datetime
|
||||
+updated_at : datetime
|
||||
+user
|
||||
+citation_records
|
||||
+query_tasks
|
||||
}
|
||||
class CitationRecord {
|
||||
+id : uuid
|
||||
+query_id : uuid
|
||||
+platform : string
|
||||
+cited : bool
|
||||
+citation_position : int
|
||||
+citation_text : text
|
||||
+competitor_brands : list
|
||||
+raw_response : text
|
||||
+confidence : float
|
||||
+match_type : string
|
||||
+queried_at : datetime
|
||||
+query
|
||||
}
|
||||
class QueryTask {
|
||||
+id : uuid
|
||||
+query_id : uuid
|
||||
+platform : string
|
||||
+status : string
|
||||
+error_message : text
|
||||
+scheduled_at : datetime
|
||||
+started_at : datetime
|
||||
+completed_at : datetime
|
||||
+query
|
||||
}
|
||||
class Subscription {
|
||||
+id : uuid
|
||||
+user_id : uuid
|
||||
+plan : string
|
||||
+status : string
|
||||
+start_date : date
|
||||
+end_date : date
|
||||
+amount : float
|
||||
+payment_method : string
|
||||
+payment_id : string
|
||||
+created_at : datetime
|
||||
+user
|
||||
}
|
||||
User "1" <-- "many" Query : "拥有"
|
||||
Query "1" <-- "many" CitationRecord : "产生"
|
||||
Query "1" <-- "many" QueryTask : "触发"
|
||||
User "1" <-- "many" Subscription : "订阅"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/app/models/query.py:11-48](file://backend/app/models/query.py#L11-L48)
|
||||
- [backend/app/models/citation_record.py:11-44](file://backend/app/models/citation_record.py#L11-L44)
|
||||
- [backend/app/models/query_task.py:11-34](file://backend/app/models/query_task.py#L11-L34)
|
||||
- [backend/app/models/subscription.py:11-36](file://backend/app/models/subscription.py#L11-L36)
|
||||
|
||||
章节来源
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/citation_record.py:1-44](file://backend/app/models/citation_record.py#L1-L44)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
- [backend/app/models/subscription.py:1-37](file://backend/app/models/subscription.py#L1-L37)
|
||||
|
||||
### 数据库迁移管理、版本控制与部署策略
|
||||
- 迁移入口
|
||||
- Alembic 环境加载 Base 元数据,支持离线/在线迁移。
|
||||
- 在线迁移通过异步引擎连接数据库,避免阻塞。
|
||||
- 迁移版本
|
||||
- **初始版本** (488d0bd5ab01):创建 users、queries、citation_records、query_tasks、subscriptions 表,并建立必要索引。
|
||||
- **新增版本** (b2c4d6e8fa10):向 citation_records 表添加 confidence 和 match_type 字段,支持增强的报告功能。
|
||||
- 外键约束与级联删除策略明确,确保数据一致性。
|
||||
- 部署策略
|
||||
- Docker Compose 启动 PostgreSQL 与 Redis,应用容器依赖数据库健康检查。
|
||||
- 生产环境建议将数据库与缓存分离,使用独立卷持久化数据。
|
||||
|
||||
**更新** 新增b2c4d6e8fa10迁移版本,支持confidence和match_type字段的添加。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["开始"]) --> CheckEnv["检查 DATABASE_URL"]
|
||||
CheckEnv --> LoadBase["加载 Base 元数据"]
|
||||
LoadBase --> Mode{"运行模式"}
|
||||
Mode --> |离线| Offline["配置 URL 与元数据"]
|
||||
Mode --> |在线| Online["创建异步引擎并连接"]
|
||||
Offline --> RunMigs["执行迁移"]
|
||||
Online --> RunMigs
|
||||
RunMigs --> Version{"检查版本"}
|
||||
Version --> |488d0bd5ab01| InitMigration["初始迁移"]
|
||||
Version --> |b2c4d6e8fa10| AddFields["添加新字段"]
|
||||
InitMigration --> Done(["完成"])
|
||||
AddFields --> Done
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/alembic/env.py:33-88](file://backend/alembic/env.py#L33-L88)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:21-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L21-L128)
|
||||
- [backend/alembic/versions/b2c4d6e8fa10_add_confidence_match_type_to_citation_records.py:21-37](file://backend/alembic/versions/b2c4d6e8fa10_add_confidence_match_type_to_citation_records.py#L21-L37)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
章节来源
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:1-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L1-L128)
|
||||
- [backend/alembic/versions/b2c4d6e8fa10_add_confidence_match_type_to_citation_records.py:1-37](file://backend/alembic/versions/b2c4d6e8fa10_add_confidence_match_type_to_citation_records.py#L1-L37)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
### 数据完整性约束、事务处理与并发控制
|
||||
- 完整性约束
|
||||
- 唯一约束:用户邮箱唯一
|
||||
- 外键约束:查询、任务、订阅均对用户做级联删除
|
||||
- JSONB 字段默认值:空数组/字典,避免 NULL 导致的条件判断复杂化
|
||||
- **新增** confidence字段允许NULL值,match_type字段限制长度为20字符
|
||||
- 事务处理
|
||||
- 服务层方法在单个事务内执行插入/更新/删除,提交后刷新对象状态
|
||||
- 会话工厂设置 expire_on_commit=False,减少后续查询的额外开销
|
||||
- 并发控制
|
||||
- 异步连接池与会话隔离,避免阻塞
|
||||
- 服务层在执行前进行权限校验与配额检查,降低并发冲突概率
|
||||
|
||||
**更新** 新增对confidence和match_type字段的完整性约束说明。
|
||||
|
||||
章节来源
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:36-111](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L36-L111)
|
||||
- [backend/alembic/versions/b2c4d6e8fa10_add_confidence_match_type_to_citation_records.py:21-37](file://backend/alembic/versions/b2c4d6e8fa10_add_confidence_match_type_to_citation_records.py#L21-L37)
|
||||
- [backend/app/services/query.py:45-81](file://backend/app/services/query.py#L45-L81)
|
||||
- [backend/app/database.py:12-18](file://backend/app/database.py#L12-L18)
|
||||
|
||||
### 查询流程与优化要点
|
||||
- 查询列表与计数
|
||||
- 分页查询与计数分离,避免重复扫描全表
|
||||
- 使用索引覆盖 user_id 与 created_at 排序
|
||||
- **新增** 支持按confidence和match_type过滤查询
|
||||
- 引用统计
|
||||
- 使用 JOIN 查询限定到用户所属的查询
|
||||
- 按平台分组统计,利用索引加速 queried_at 与 platform
|
||||
- **新增** 支持按匹配置信度和匹配类型进行统计分析
|
||||
- 导出 CSV
|
||||
- 以查询为维度导出,先验证所有权再执行导出
|
||||
- **新增** 输出confidence和match_type字段到CSV文件
|
||||
|
||||
**更新** 增强查询流程,支持新的字段过滤和统计功能。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant API as "API"
|
||||
participant Svc as "服务"
|
||||
participant DB as "数据库"
|
||||
API->>Svc : get_citations(user_id, query_id?, platform?, dates?, confidence?, match_type?)
|
||||
Svc->>DB : SELECT ... FROM citation_records JOIN queries WHERE ...
|
||||
DB-->>Svc : 记录集
|
||||
Svc->>DB : COUNT ... FROM citation_records JOIN queries WHERE ...
|
||||
DB-->>Svc : 总数
|
||||
Svc-->>API : 结果与总数
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/services/citation.py:30-79](file://backend/app/services/citation.py#L30-L79)
|
||||
|
||||
章节来源
|
||||
- [backend/app/services/query.py:12-32](file://backend/app/services/query.py#L12-L32)
|
||||
- [backend/app/services/citation.py:30-79](file://backend/app/services/citation.py#L30-L79)
|
||||
|
||||
### 报告功能增强
|
||||
- **匹配置信度分析**
|
||||
- confidence字段用于存储匹配的可信度评分(0.0-1.0)
|
||||
- 支持按置信度区间进行统计分析
|
||||
- 在CSV导出中显示详细的置信度信息
|
||||
- **匹配类型分类**
|
||||
- match_type字段标识匹配类型:exact(精确匹配)、alias(别名匹配)、fuzzy(模糊匹配)
|
||||
- 支持按匹配类型进行分组统计
|
||||
- 在报告中提供中文显示(精确匹配、别名匹配、模糊匹配)
|
||||
- **增强的统计指标**
|
||||
- 支持按置信度和匹配类型的组合进行交叉分析
|
||||
- 提供更精细的引用质量评估
|
||||
|
||||
**新增** 报告功能章节,详细介绍新增的confidence和match_type字段的应用。
|
||||
|
||||
章节来源
|
||||
- [backend/app/services/citation.py:298-308](file://backend/app/services/citation.py#L298-L308)
|
||||
- [backend/app/services/citation.py:342-429](file://backend/app/services/citation.py#L342-L429)
|
||||
- [backend/app/schemas/citation.py:7-18](file://backend/app/schemas/citation.py#L7-L18)
|
||||
|
||||
## 依赖分析
|
||||
- 模块耦合
|
||||
- 模型层仅依赖 Base 与 SQLAlchemy 类型,低耦合
|
||||
- 服务层依赖模型与会话,职责清晰
|
||||
- Alembic 依赖 Base 与配置,迁移脚本与模型同步演进
|
||||
- **新增** API层依赖服务层,提供RESTful接口
|
||||
- 外部依赖
|
||||
- PostgreSQL 异步驱动(asyncpg)
|
||||
- Redis(用于缓存,如需要)
|
||||
|
||||
**更新** 新增API层依赖关系,支持新的报告功能接口。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
Cfg["配置<br/>config.py"] --> DB["数据库引擎<br/>database.py"]
|
||||
DB --> Models["ORM 模型<br/>models/*"]
|
||||
Models --> Services["服务层<br/>services/*"]
|
||||
Services --> API["API层<br/>api/*"]
|
||||
Alembic["迁移<br/>alembic/*"] --> DB
|
||||
Docker["编排<br/>docker-compose.yml"] --> DB
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/models/__init__.py:1-14](file://backend/app/models/__init__.py#L1-L14)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
章节来源
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/models/__init__.py:1-14](file://backend/app/models/__init__.py#L1-L14)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
## 性能考虑
|
||||
- 索引优化
|
||||
- 为高频过滤字段建立单列索引(user_id/status/next_query_at/platform/queried_at/confidence/match_type)
|
||||
- 对 JSONB 字段可考虑 GIN 索引(如需复杂查询),当前迁移脚本未启用
|
||||
- **新增** 建议为confidence和match_type字段建立单独索引,支持高效过滤
|
||||
- 查询优化
|
||||
- 分页与计数分离,避免重复扫描
|
||||
- 使用 JOIN 限定用户范围,减少全表扫描
|
||||
- 时间范围查询使用索引覆盖
|
||||
- **新增** 支持按confidence范围和match_type进行高效过滤
|
||||
- 缓存策略
|
||||
- 引用统计与趋势数据可缓存至 Redis,设置合理过期时间
|
||||
- 导出 CSV 可缓存热点查询结果,降低数据库压力
|
||||
- **新增** 报告统计数据可缓存,提高频繁访问的响应速度
|
||||
- 连接与并发
|
||||
- 使用异步连接池,避免阻塞
|
||||
- 控制并发度,避免大量写入导致锁争用
|
||||
|
||||
**更新** 新增针对confidence和match_type字段的性能优化建议。
|
||||
|
||||
## 故障排查指南
|
||||
- 迁移失败
|
||||
- 检查 DATABASE_URL 是否正确,Alembic 配置与环境变量一致
|
||||
- 确认数据库已初始化且用户具备权限
|
||||
- **新增** 检查新字段的默认值和约束条件
|
||||
- 查询异常
|
||||
- 核对服务层权限校验逻辑,确认 user_id 与查询归属一致
|
||||
- 检查索引是否存在,必要时重建索引
|
||||
- **新增** 验证confidence和match_type字段的数据类型和取值范围
|
||||
- 导出失败
|
||||
- 确认查询所有权校验通过
|
||||
- 检查 CSV 写入逻辑与字符编码
|
||||
- **新增** 验证新字段在导出过程中的处理逻辑
|
||||
|
||||
**更新** 新增针对新字段的故障排查指导。
|
||||
|
||||
章节来源
|
||||
- [backend/alembic/env.py:33-88](file://backend/alembic/env.py#L33-L88)
|
||||
- [backend/app/services/citation.py:14-22](file://backend/app/services/citation.py#L14-L22)
|
||||
|
||||
## 结论
|
||||
本数据库设计围绕用户、查询、引用记录、任务与订阅五大实体展开,采用 PostgreSQL + SQLAlchemy Async + Alembic 的成熟技术栈,具备良好的扩展性与可维护性。通过合理的索引策略、事务边界与服务层封装,能够满足日常查询、统计与导出需求。
|
||||
|
||||
**更新** 新增的confidence和match_type字段显著增强了报告功能,提供了更精细的引用质量分析能力。建议在生产环境中进一步引入缓存与监控,持续优化查询路径与索引覆盖。
|
||||
|
||||
## 附录
|
||||
- 部署与运维
|
||||
- 使用 Docker Compose 启动数据库与应用,确保数据库健康检查通过后再启动应用
|
||||
- 生产环境建议使用独立数据库实例与只读副本,配合连接池与慢查询日志
|
||||
- **新增** 升级时确保迁移脚本按顺序执行,从初始版本到最新版本
|
||||
- 备份与恢复
|
||||
- 使用 pg_dump/pg_restore 进行逻辑备份与恢复
|
||||
- 对关键表定期增量备份,结合 WAL 归档实现点-in-time 恢复
|
||||
- **新增** 新字段变更需要纳入备份策略,确保数据完整性
|
||||
- 监控与告警
|
||||
- 监控连接数、查询延迟、索引命中率与慢查询
|
||||
- 对迁移脚本变更进行版本化管理与回滚演练
|
||||
- **新增** 监控新字段的使用情况和性能影响
|
||||
|
||||
**更新** 新增针对新字段的部署和运维指导。
|
||||
|
|
@ -0,0 +1,319 @@
|
|||
# 数据库迁移
|
||||
|
||||
<cite>
|
||||
**本文引用的文件**
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py)
|
||||
- [backend/alembic/env.py](file://backend/alembic/env.py)
|
||||
- [backend/alembic.ini](file://backend/alembic.ini)
|
||||
- [backend/alembic/script.py.mako](file://backend/alembic/script.py.mako)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/models/__init__.py](file://backend/app/models/__init__.py)
|
||||
- [backend/app/models/user.py](file://backend/app/models/user.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [backend/app/models/query_task.py](file://backend/app/models/query_task.py)
|
||||
- [backend/app/models/subscription.py](file://backend/app/models/subscription.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件面向GEO项目的数据库迁移与版本管理,系统性说明基于Alembic的迁移框架配置与使用方式,覆盖迁移脚本生成、版本管理、数据库升级策略、初始迁移实现(含表创建、索引与约束)、迁移命令(upgrade、downgrade、autogenerate)以及生产环境最佳实践与风险控制。读者无需深入Python或SQLAlchemy即可理解并安全地执行迁移。
|
||||
|
||||
## 项目结构
|
||||
GEO后端采用异步SQLAlchemy与Alembic进行数据库迁移管理,迁移相关代码集中在backend/alembic目录中,并通过env.py与应用模型(models)建立连接。数据库连接由应用配置提供,迁移脚本位于versions子目录中,按版本顺序命名。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "迁移配置"
|
||||
A["alembic.ini<br/>脚本位置、日志、URL等"]
|
||||
B["env.py<br/>在线/离线迁移入口"]
|
||||
C["script.py.mako<br/>迁移模板"]
|
||||
end
|
||||
subgraph "模型定义"
|
||||
D["models/__init__.py<br/>导出所有模型"]
|
||||
E["models/user.py"]
|
||||
F["models/query.py"]
|
||||
G["models/citation_record.py"]
|
||||
H["models/query_task.py"]
|
||||
I["models/subscription.py"]
|
||||
end
|
||||
subgraph "数据库"
|
||||
J["PostgreSQL 实例"]
|
||||
end
|
||||
A --> B
|
||||
B --> D
|
||||
D --> E
|
||||
D --> F
|
||||
D --> G
|
||||
D --> H
|
||||
D --> I
|
||||
B --> J
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/alembic.ini:1-150](file://backend/alembic.ini#L1-L150)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
- [backend/alembic/script.py.mako:1-29](file://backend/alembic/script.py.mako#L1-L29)
|
||||
- [backend/app/models/__init__.py:1-14](file://backend/app/models/__init__.py#L1-L14)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
- [backend/app/models/subscription.py:1-37](file://backend/app/models/subscription.py#L1-L37)
|
||||
|
||||
**章节来源**
|
||||
- [backend/alembic.ini:1-150](file://backend/alembic.ini#L1-L150)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
- [backend/app/models/__init__.py:1-14](file://backend/app/models/__init__.py#L1-L14)
|
||||
|
||||
## 核心组件
|
||||
- 迁移配置与入口
|
||||
- alembic.ini:定义脚本位置、日志级别、数据库URL等全局设置。
|
||||
- env.py:根据运行模式(在线/离线)加载目标元数据并执行迁移。
|
||||
- script.py.mako:迁移脚本模板,定义upgrade/downgrade骨架。
|
||||
- 模型与元数据
|
||||
- models/__init__.py:聚合导出所有ORM模型,供Alembic扫描。
|
||||
- 各模型文件:定义表结构、字段类型、索引与外键关系。
|
||||
- 数据库引擎
|
||||
- app/database.py:创建异步引擎与Base,为迁移提供元数据基础。
|
||||
- app/config.py:提供DATABASE_URL,被env.py与alembic.ini共同使用。
|
||||
|
||||
**章节来源**
|
||||
- [backend/alembic.ini:1-150](file://backend/alembic.ini#L1-L150)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
- [backend/alembic/script.py.mako:1-29](file://backend/alembic/script.py.mako#L1-L29)
|
||||
- [backend/app/models/__init__.py:1-14](file://backend/app/models/__init__.py#L1-L14)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
- [backend/app/models/query_task.py:1-39](file://backend/app/models/query_task.py#L1-L39)
|
||||
- [backend/app/models/subscription.py:1-37](file://backend/app/models/subscription.py#L1-L37)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
## 架构总览
|
||||
下图展示从命令到数据库的迁移执行链路,包括在线与离线两种模式,以及模型元数据对迁移的影响。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant CLI as "命令行"
|
||||
participant Alembic as "Alembic CLI"
|
||||
participant Env as "env.py"
|
||||
participant DB as "PostgreSQL"
|
||||
CLI->>Alembic : 执行迁移命令
|
||||
Alembic->>Env : 加载配置与元数据
|
||||
Env->>Env : 判断在线/离线模式
|
||||
alt 在线模式
|
||||
Env->>DB : 异步连接并执行事务
|
||||
else 离线模式
|
||||
Env->>DB : 使用URL直接执行DDL
|
||||
end
|
||||
Env-->>Alembic : 返回迁移结果
|
||||
Alembic-->>CLI : 输出状态与日志
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/alembic/env.py:33-89](file://backend/alembic/env.py#L33-L89)
|
||||
- [backend/alembic.ini:86-89](file://backend/alembic.ini#L86-L89)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 初始迁移脚本(488d0bd5ab01)
|
||||
该脚本实现首次数据库初始化,包含以下要点:
|
||||
- 表创建
|
||||
- users:用户主表,包含唯一邮箱、计划等级、配额、激活状态等字段。
|
||||
- queries:查询任务表,关联users,包含关键词、品牌、平台列表、频率、状态及时间戳。
|
||||
- citation_records:引用记录表,关联queries,包含平台、是否引用、竞品品牌、原始响应等。
|
||||
- query_tasks:查询任务执行表,关联queries,包含状态、错误信息与调度/开始/完成时间。
|
||||
- subscriptions:订阅表,关联users,包含计划、状态、起止日期、金额与支付信息。
|
||||
- 索引建立
|
||||
- queries:按user_id、status、next_query_at建立索引。
|
||||
- citation_records:按query_id、queried_at、platform建立索引。
|
||||
- query_tasks:按status建立索引。
|
||||
- 约束与默认值
|
||||
- 外键约束:各子表均对父表执行级联删除。
|
||||
- 默认值:广泛使用服务器默认值(如NOW()、JSONB空数组、字符串默认值)。
|
||||
- 回滚策略
|
||||
- downgrade按逆序删除表与索引,确保幂等与可恢复。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["执行初始迁移"]) --> CreateUsers["创建 users 表"]
|
||||
CreateUsers --> CreateQueries["创建 queries 表<br/>建立索引"]
|
||||
CreateQueries --> CreateRecords["创建 citation_records 表<br/>建立索引"]
|
||||
CreateRecords --> CreateTasks["创建 query_tasks 表<br/>建立索引"]
|
||||
CreateTasks --> CreateSubs["创建 subscriptions 表"]
|
||||
CreateSubs --> End(["完成"])
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:21-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L21-L128)
|
||||
|
||||
**章节来源**
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:1-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L1-L128)
|
||||
|
||||
### 迁移环境与元数据
|
||||
- 元数据来源
|
||||
- env.py通过app.database.Base.metadata提供目标元数据,确保Alembic能识别应用模型的变更。
|
||||
- models/__init__.py统一导出所有模型,避免遗漏。
|
||||
- 在线/离线模式
|
||||
- 在线模式:使用异步引擎连接数据库,适合生产与容器环境。
|
||||
- 离线模式:直接使用URL执行DDL,便于测试与快速验证。
|
||||
- 配置来源
|
||||
- DATABASE_URL来自app/config.py,同时在alembic.ini中也有默认示例URL。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
A["env.py"] --> B["Base.metadata"]
|
||||
B --> C["models/*"]
|
||||
A --> D["在线/离线执行器"]
|
||||
D --> E["PostgreSQL"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/alembic/env.py:10-25](file://backend/alembic/env.py#L10-L25)
|
||||
- [backend/app/models/__init__.py:1-14](file://backend/app/models/__init__.py#L1-L14)
|
||||
- [backend/app/database.py](file://backend/app/database.py#L20)
|
||||
- [backend/app/config.py](file://backend/app/config.py#L7)
|
||||
|
||||
**章节来源**
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
- [backend/app/models/__init__.py:1-14](file://backend/app/models/__init__.py#L1-L14)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
### 迁移命令与工作流
|
||||
- 常用命令
|
||||
- 升级:将数据库迁移到最新版本。
|
||||
- 降级:将数据库回退到指定版本。
|
||||
- 自动化生成:基于模型变更自动生成迁移脚本。
|
||||
- 工作流建议
|
||||
- 开发阶段:先在本地dev环境验证,再合并到测试环境。
|
||||
- 测试阶段:在隔离数据库上执行,验证数据完整性与索引有效性。
|
||||
- 生产阶段:严格遵循“只读优先”原则,先备份,再执行,最后验证。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Dev["开发环境"] --> Test["测试环境"]
|
||||
Test --> Prod["生产环境"]
|
||||
Dev --> |dry-run/验证| Test
|
||||
Test --> |备份/演练| Prod
|
||||
```
|
||||
|
||||
[本节为通用流程说明,不直接分析具体文件,故无“章节来源”]
|
||||
|
||||
### 数据保护、回滚与版本控制
|
||||
- 数据保护
|
||||
- 在生产环境执行前务必备份数据库。
|
||||
- 对大表操作(如重建索引)选择维护窗口,避免高峰时段。
|
||||
- 回滚机制
|
||||
- 初始迁移提供完整的downgrade路径,确保可逆。
|
||||
- 建议每次迁移仅包含单一逻辑变更,降低回滚复杂度。
|
||||
- 版本控制
|
||||
- 迁移脚本以版本号命名,保持顺序与可追溯性。
|
||||
- 将迁移脚本纳入版本控制系统,配合提交信息描述变更意图。
|
||||
|
||||
**章节来源**
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:114-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L114-L128)
|
||||
|
||||
### 生产环境最佳实践与风险控制
|
||||
- 风险控制
|
||||
- 限制迁移窗口,避免业务高峰期。
|
||||
- 使用只读副本或独立测试库先行验证。
|
||||
- 对DDL操作进行分批执行,逐步替换索引与约束。
|
||||
- 可观测性
|
||||
- 记录迁移日志,监控执行时长与失败原因。
|
||||
- 在应用侧增加迁移状态检查,防止未完成迁移导致的数据不一致。
|
||||
- 安全与合规
|
||||
- 严格管理DATABASE_URL与访问权限。
|
||||
- 对敏感字段(如密码哈希)迁移需遵循最小暴露原则。
|
||||
|
||||
**章节来源**
|
||||
- [backend/alembic.ini:115-150](file://backend/alembic.ini#L115-L150)
|
||||
- [backend/alembic/env.py:33-89](file://backend/alembic/env.py#L33-L89)
|
||||
|
||||
## 依赖分析
|
||||
- 组件耦合
|
||||
- env.py依赖app/config.py提供的DATABASE_URL与app/database.py的Base元数据。
|
||||
- models/__init__.py集中导出所有模型,提升扫描效率与一致性。
|
||||
- 外部依赖
|
||||
- PostgreSQL作为目标数据库,UUID、JSONB、时间戳等类型在迁移脚本中被广泛使用。
|
||||
- 潜在问题
|
||||
- 若models/__init__.py遗漏导出,可能导致autogenerate无法识别新模型。
|
||||
- 在线迁移依赖异步引擎,需确保网络与连接池配置正确。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
Env["env.py"] --> Cfg["app/config.py"]
|
||||
Env --> DB["app/database.py"]
|
||||
DB --> Meta["Base.metadata"]
|
||||
Meta --> Models["models/*"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/alembic/env.py:10-25](file://backend/alembic/env.py#L10-L25)
|
||||
- [backend/app/config.py](file://backend/app/config.py#L7)
|
||||
- [backend/app/database.py](file://backend/app/database.py#L20)
|
||||
- [backend/app/models/__init__.py:1-14](file://backend/app/models/__init__.py#L1-L14)
|
||||
|
||||
**章节来源**
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/models/__init__.py:1-14](file://backend/app/models/__init__.py#L1-L14)
|
||||
|
||||
## 性能考虑
|
||||
- 索引设计
|
||||
- 初始迁移已为高频查询字段建立索引,有助于提升查询性能。
|
||||
- 对于大表,建议在维护窗口内重建索引,避免阻塞。
|
||||
- 迁移窗口
|
||||
- 将DDL密集型操作安排在低峰时段,减少对线上服务的影响。
|
||||
- 幂等性
|
||||
- 迁移脚本应具备幂等特性,避免重复执行造成资源浪费。
|
||||
|
||||
**章节来源**
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:57-94](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L57-L94)
|
||||
|
||||
## 故障排查指南
|
||||
- 常见问题
|
||||
- 迁移失败:检查DATABASE_URL是否正确,确认数据库可达。
|
||||
- 模型未识别:确保models/__init__.py导出了新增模型。
|
||||
- 在线/离线模式混淆:确认运行环境与配置文件一致。
|
||||
- 排查步骤
|
||||
- 查看日志:调整alembic.ini中的日志级别以获取更详细输出。
|
||||
- 回滚验证:使用downgrade验证回滚路径是否完整。
|
||||
- 数据核对:迁移完成后执行简单查询,核对关键索引与约束是否生效。
|
||||
|
||||
**章节来源**
|
||||
- [backend/alembic.ini:115-150](file://backend/alembic.ini#L115-L150)
|
||||
- [backend/alembic/env.py:33-89](file://backend/alembic/env.py#L33-L89)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:114-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L114-L128)
|
||||
|
||||
## 结论
|
||||
GEO项目已建立完善的Alembic迁移体系:清晰的配置、可靠的元数据扫描、完备的初始迁移脚本与回滚路径。遵循本文的命令使用方法、最佳实践与风险控制措施,可在保障数据安全的前提下高效推进数据库演进。
|
||||
|
||||
## 附录
|
||||
- 迁移命令速查
|
||||
- 升级到最新:alembic upgrade head
|
||||
- 降级到上一版本:alembic downgrade -1
|
||||
- 降级到指定版本:alembic downgrade <版本号>
|
||||
- 自动生成迁移:alembic revision --autogenerate -m "<描述>"
|
||||
- 关键文件定位
|
||||
- 配置:backend/alembic.ini
|
||||
- 环境:backend/alembic/env.py
|
||||
- 模板:backend/alembic/script.py.mako
|
||||
- 初始迁移:backend/alembic/versions/488d0bd5ab01_initial_migration.py
|
||||
- 模型聚合:backend/app/models/__init__.py
|
||||
- 数据库配置:backend/app/config.py
|
||||
|
|
@ -0,0 +1,443 @@
|
|||
# 数据模型
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/models/__init__.py](file://backend/app/models/__init__.py)
|
||||
- [backend/app/models/user.py](file://backend/app/models/user.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/models/query_task.py](file://backend/app/models/query_task.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [backend/app/models/subscription.py](file://backend/app/models/subscription.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py)
|
||||
- [backend/app/schemas/query.py](file://backend/app/schemas/query.py)
|
||||
- [backend/app/schemas/citation.py](file://backend/app/schemas/citation.py)
|
||||
- [backend/app/services/query.py](file://backend/app/services/query.py)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/services/citation.py](file://backend/app/services/citation.py)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/api/deps.py](file://backend/app/api/deps.py)
|
||||
</cite>
|
||||
|
||||
## 更新摘要
|
||||
**所做更改**
|
||||
- 完善了用户模型的字段映射和关系配置说明
|
||||
- 详细补充了查询模型的索引策略和生命周期管理
|
||||
- 新增了查询任务模型的状态机和任务调度机制
|
||||
- 完善了引用记录模型的统计分析功能说明
|
||||
- 补充了订阅模型的支付信息字段和状态管理
|
||||
- 增强了模型间关系映射和级联策略的技术细节
|
||||
- 完善了序列化、反序列化与数据验证机制
|
||||
- 新增了使用示例和最佳实践指南
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖关系分析](#依赖关系分析)
|
||||
7. [性能考量](#性能考量)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件系统性梳理 GEO 平台的 SQLAlchemy ORM 数据模型,覆盖模型类定义、字段映射与关系配置,解释模型间的关系映射(含级联与外键约束)、序列化/反序列化与数据验证机制、生命周期管理与事件钩子、以及使用示例与最佳实践。目标是帮助开发者快速理解并正确使用数据模型,确保在查询、任务调度、引用记录与订阅管理等场景中保持一致性与可维护性。
|
||||
|
||||
## 项目结构
|
||||
后端采用异步 SQLAlchemy(SQLAlchemy 2.x + asyncpg)与 Pydantic 模式进行数据建模与 API 层交互。数据库初始化通过 declarative_base 创建基类,迁移脚本由 Alembic 管理,模型导出统一在 models 包的 __init__.py 中聚合。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "模型层"
|
||||
U["User<br/>用户"]
|
||||
Q["Query<br/>查询任务"]
|
||||
QT["QueryTask<br/>查询任务执行单元"]
|
||||
CR["CitationRecord<br/>引用记录"]
|
||||
S["Subscription<br/>订阅"]
|
||||
end
|
||||
subgraph "数据库"
|
||||
DB["PostgreSQL"]
|
||||
end
|
||||
subgraph "服务层"
|
||||
SVCQ["Query 服务"]
|
||||
SVCC["Citation 服务"]
|
||||
end
|
||||
subgraph "API 层"
|
||||
APIQ["Queries API"]
|
||||
APIC["Citations API"]
|
||||
end
|
||||
U --> Q
|
||||
Q --> CR
|
||||
Q --> QT
|
||||
U --> S
|
||||
SVCQ --> Q
|
||||
SVCC --> CR
|
||||
APIQ --> SVCQ
|
||||
APIC --> SVCC
|
||||
Q --- DB
|
||||
CR --- DB
|
||||
QT --- DB
|
||||
S --- DB
|
||||
U --- DB
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/models/subscription.py:11-37](file://backend/app/models/subscription.py#L11-L37)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/services/query.py:1-123](file://backend/app/services/query.py#L1-L123)
|
||||
- [backend/app/services/citation.py:1-359](file://backend/app/services/citation.py#L1-L359)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/models/__init__.py:1-14](file://backend/app/models/__init__.py#L1-L14)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
|
||||
## 核心组件
|
||||
本节概述各模型的职责、关键字段与关系,为后续深入分析打基础。
|
||||
|
||||
- 用户(User)
|
||||
- 负责平台用户信息与配额控制,支持计划类型与最大查询数限制。
|
||||
- 关系:一对多到 Query、Subscription,删除时级联删除孤儿对象。
|
||||
- 查询(Query)
|
||||
- 描述关键词、目标品牌、别名、平台集合、频率、状态及下次查询时间等。
|
||||
- 关系:多对一到 User;一对多到 CitationRecord、QueryTask;删除时级联删除孤儿对象。
|
||||
- 查询任务(QueryTask)
|
||||
- 记录单次平台查询任务的状态、错误信息与时间戳。
|
||||
- 关系:多对一到 Query;删除时级联删除。
|
||||
- 引用记录(CitationRecord)
|
||||
- 记录某次查询在特定平台上的引用情况、竞品品牌列表与原始响应摘要。
|
||||
- 关系:多对一到 Query;删除时级联删除。
|
||||
- 订阅(Subscription)
|
||||
- 记录用户的订阅计划、有效期、支付信息与状态。
|
||||
- 关系:多对一到 User;删除时级联删除。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/models/subscription.py:11-37](file://backend/app/models/subscription.py#L11-L37)
|
||||
|
||||
## 架构总览
|
||||
下图展示模型与数据库表、索引、外键约束之间的对应关系,以及服务层与 API 层如何通过模型进行数据访问。
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
USERS {
|
||||
uuid id PK
|
||||
string email UK
|
||||
string password_hash
|
||||
string name
|
||||
string plan
|
||||
integer max_queries
|
||||
boolean is_active
|
||||
timestamptz created_at
|
||||
timestamptz updated_at
|
||||
}
|
||||
QUERIES {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
timestamptz last_queried_at
|
||||
timestamptz next_query_at
|
||||
timestamptz created_at
|
||||
timestamptz updated_at
|
||||
}
|
||||
CITATION_RECORDS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
boolean cited
|
||||
integer citation_position
|
||||
text citation_text
|
||||
jsonb competitor_brands
|
||||
text raw_response
|
||||
timestamptz queried_at
|
||||
}
|
||||
QUERY_TASKS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
string status
|
||||
text error_message
|
||||
timestamptz scheduled_at
|
||||
timestamptz started_at
|
||||
timestamptz completed_at
|
||||
}
|
||||
SUBSCRIPTIONS {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string plan
|
||||
string status
|
||||
date start_date
|
||||
date end_date
|
||||
numeric amount
|
||||
string payment_method
|
||||
string payment_id
|
||||
timestamptz created_at
|
||||
}
|
||||
USERS ||--o{ QUERIES : "拥有"
|
||||
USERS ||--o{ SUBSCRIPTIONS : "拥有"
|
||||
QUERIES ||--o{ CITATION_RECORDS : "产生"
|
||||
QUERIES ||--o{ QUERY_TASKS : "拆分执行"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:21-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L21-L128)
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/app/models/subscription.py:11-37](file://backend/app/models/subscription.py#L11-L37)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 用户模型(User)
|
||||
- 表名与主键
|
||||
- 表名:users;主键:UUID 类型的 id。
|
||||
- 字段映射
|
||||
- 邮箱唯一且必填;密码哈希必填;名称可空;计划默认 free;最大查询数默认 5;激活状态默认 true;创建/更新时间自动填充。
|
||||
- 关系
|
||||
- 一对多到 Query、Subscription,删除用户时级联删除孤儿对象。
|
||||
- 生命周期与事件
|
||||
- created_at/updated_at 使用 server_default/onupdate 注入数据库侧时间戳。
|
||||
- 序列化/反序列化
|
||||
- 通过 Pydantic 模型(如查询响应)在 API 层进行序列化;ORM 对象可直接用于 FastAPI 响应模型(from_attributes)。
|
||||
- 最佳实践
|
||||
- 在创建/更新用户时避免直接修改计划或配额,建议通过专门的服务接口进行校验与审计。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:23-37](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L23-L37)
|
||||
|
||||
### 查询模型(Query)
|
||||
- 表名与主键
|
||||
- 表名:queries;主键:UUID 类型的 id。
|
||||
- 字段映射
|
||||
- 外键 user_id 指向 users.id,删除时级联;关键词与目标品牌必填;品牌别名与平台集合默认值合理;频率默认 weekly;状态默认 active;下次查询时间可空;创建/更新时间自动填充。
|
||||
- 关系
|
||||
- 多对一到 User;一对多到 CitationRecord、QueryTask,删除查询时级联删除孤儿对象。
|
||||
- 索引
|
||||
- 为 user_id、status、next_query_at 建立索引以优化查询。
|
||||
- 生命周期与事件
|
||||
- created_at/updated_at 使用 server_default/onupdate 注入数据库侧时间戳。
|
||||
- 序列化/反序列化
|
||||
- 响应模型 QueryResponse 支持 from_attributes,便于 ORM 对象直接转为 API 响应。
|
||||
- 最佳实践
|
||||
- 更新频率时同步更新 next_query_at;在创建查询前检查用户配额。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:39-59](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L39-L59)
|
||||
|
||||
### 查询任务模型(QueryTask)
|
||||
- 表名与主键
|
||||
- 表名:query_tasks;主键:UUID 类型的 id。
|
||||
- 字段映射
|
||||
- 外键 query_id 指向 queries.id,删除时级联;平台必填;状态默认 pending;错误信息可空;调度/开始/完成时间可空。
|
||||
- 关系
|
||||
- 多对一到 Query;删除查询任务时级联删除。
|
||||
- 索引
|
||||
- 为 status 建立索引以支持任务调度筛选。
|
||||
- 生命周期与事件
|
||||
- scheduled_at 默认当前时间;其他时间戳按需更新。
|
||||
- 序列化/反序列化
|
||||
- 通过 Pydantic 模型进行 API 层序列化。
|
||||
- 最佳实践
|
||||
- 任务状态机:pending -> started -> completed 或 failed;失败时记录 error_message。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:80-94](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L80-L94)
|
||||
|
||||
### 引用记录模型(CitationRecord)
|
||||
- 表名与主键
|
||||
- 表名:citation_records;主键:UUID 类型的 id。
|
||||
- 字段映射
|
||||
- 外键 query_id 指向 queries.id,删除时级联;平台必填;是否引用默认 false;引用位置可空;引用文本可空;竞品品牌列表默认空数组;原始响应可空;查询时间默认当前时间。
|
||||
- 关系
|
||||
- 多对一到 Query;删除查询时级联删除。
|
||||
- 索引
|
||||
- 为 query_id、queried_at、platform 建立索引以优化统计与检索。
|
||||
- 生命周期与事件
|
||||
- queried_at 默认当前时间。
|
||||
- 序列化/反序列化
|
||||
- 通过 CitationResponse 进行 API 层序列化。
|
||||
- 最佳实践
|
||||
- 统计时按平台与日期聚合,结合索引提升性能。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:61-78](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L61-L78)
|
||||
|
||||
### 订阅模型(Subscription)
|
||||
- 表名与主键
|
||||
- 表名:subscriptions;主键:UUID 类型的 id。
|
||||
- 字段映射
|
||||
- 外键 user_id 指向 users.id,删除时级联;计划必填;状态默认 active;起止日期必填;金额可空;支付方式与支付 ID 可空;创建时间默认当前时间。
|
||||
- 关系
|
||||
- 多对一到 User;删除用户时级联删除。
|
||||
- 生命周期与事件
|
||||
- created_at 默认当前时间。
|
||||
- 序列化/反序列化
|
||||
- 通过 Pydantic 模型进行 API 层序列化。
|
||||
- 最佳实践
|
||||
- 订阅到期后应自动调整用户配额与功能权限。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/models/subscription.py:11-37](file://backend/app/models/subscription.py#L11-L37)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:96-111](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L96-L111)
|
||||
|
||||
### 模型关系与级联策略
|
||||
- 外键约束
|
||||
- 所有子表均设置外键指向父表主键,并在删除时采用 CASCADE,确保数据一致性。
|
||||
- 级联删除孤儿对象
|
||||
- User 的 queries、subscriptions;Query 的 citation_records、query_tasks 均配置了"all, delete-orphan",保证删除父对象时自动清理其子对象。
|
||||
- 索引策略
|
||||
- 查询高频字段(如 user_id、status、next_query_at、queried_at、platform)建立索引,提升查询性能。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/models/user.py:35-40](file://backend/app/models/user.py#L35-L40)
|
||||
- [backend/app/models/query.py:43-48](file://backend/app/models/query.py#L43-L48)
|
||||
- [backend/app/models/query_task.py:36-38](file://backend/app/models/query_task.py#L36-L38)
|
||||
- [backend/app/models/citation_record.py:37-41](file://backend/app/models/citation_record.py#L37-L41)
|
||||
|
||||
### 序列化、反序列化与数据验证
|
||||
- Pydantic 验证
|
||||
- 查询创建/更新请求体包含平台集合、频率、状态等字段的严格校验,非法值会抛出异常。
|
||||
- ORM 到 API 的转换
|
||||
- 响应模型启用 from_attributes,允许直接将 ORM 对象转为 JSON 响应。
|
||||
- API 层集成
|
||||
- Queries API 将请求体绑定到 Pydantic 模型,调用服务层进行业务处理,再返回 ORM 对象或 Pydantic 响应模型。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/schemas/query.py:11-94](file://backend/app/schemas/query.py#L11-L94)
|
||||
- [backend/app/schemas/citation.py:7-50](file://backend/app/schemas/citation.py#L7-L50)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
|
||||
### 生命周期管理与事件钩子
|
||||
- 时间戳管理
|
||||
- 所有模型的 created_at/updated_at 使用 server_default/onupdate 注入数据库侧时间戳,减少应用层负担。
|
||||
- 任务状态流转
|
||||
- QueryTask 的状态从 pending 到 started 再到 completed 或 failed,配合 scheduled_at/started_at/completed_at 字段记录生命周期节点。
|
||||
- 查询调度
|
||||
- 服务层根据频率计算 next_query_at,便于定时任务调度。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/models/user.py:25-33](file://backend/app/models/user.py#L25-L33)
|
||||
- [backend/app/models/query.py:32-40](file://backend/app/models/query.py#L32-L40)
|
||||
- [backend/app/models/query_task.py:27-32](file://backend/app/models/query_task.py#L27-L32)
|
||||
- [backend/app/services/query.py:62-77](file://backend/app/services/query.py#L62-L77)
|
||||
|
||||
### 使用示例与最佳实践
|
||||
- 创建查询
|
||||
- 步骤:校验用户配额 -> 计算 next_query_at -> 构造 Query -> 提交事务 -> 刷新对象。
|
||||
- 参考路径:[创建查询服务:45-81](file://backend/app/services/query.py#L45-L81)
|
||||
- 更新查询
|
||||
- 步骤:读取查询 -> 排除未设置字段 -> 若更新频率则重算 next_query_at -> 提交事务 -> 刷新对象。
|
||||
- 参考路径:[更新查询服务:84-113](file://backend/app/services/query.py#L84-L113)
|
||||
- 删除查询
|
||||
- 步骤:按用户与查询 ID 定位 -> 删除 -> 提交事务。
|
||||
- 参考路径:[删除查询服务:116-129](file://backend/app/services/query.py#L116-L129)
|
||||
- API 调用
|
||||
- GET /queries -> 返回 QueryListResponse
|
||||
- POST /queries -> 返回 QueryResponse
|
||||
- GET /queries/{query_id} -> 返回 QueryResponse
|
||||
- PUT /queries/{query_id} -> 返回 QueryResponse
|
||||
- DELETE /queries/{query_id} -> 204 No Content
|
||||
- 参考路径:[查询 API:15-86](file://backend/app/api/queries.py#L15-L86)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/services/query.py:45-129](file://backend/app/services/query.py#L45-L129)
|
||||
- [backend/app/api/queries.py:15-86](file://backend/app/api/queries.py#L15-L86)
|
||||
|
||||
## 依赖关系分析
|
||||
- 数据库引擎与会话
|
||||
- 使用异步引擎与 async_sessionmaker,Base 作为 declarative_base 基类。
|
||||
- 模型导出
|
||||
- models/__init__.py 统一导出所有模型,便于上层模块按需导入。
|
||||
- 迁移脚本
|
||||
- Alembic 初始迁移脚本定义了表结构、索引与外键约束,与模型定义保持一致。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
CFG["配置<br/>DATABASE_URL"] --> ENG["异步引擎"]
|
||||
ENG --> SESS["AsyncSessionLocal"]
|
||||
SESS --> BASE["declarative_base"]
|
||||
BASE --> MODELS["模型类"]
|
||||
MODELS --> DB["PostgreSQL"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py](file://backend/app/config.py#L7)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-23](file://backend/app/config.py#L1-L23)
|
||||
|
||||
## 性能考量
|
||||
- 索引设计
|
||||
- queries:user_id、status、next_query_at
|
||||
- citation_records:query_id、queried_at、platform
|
||||
- query_tasks:status
|
||||
- 建议:基于实际查询模式持续评估与补充索引。
|
||||
- 查询优化
|
||||
- 使用 select + order_by + offset + limit 实现分页与排序。
|
||||
- 使用 func.count 统计总数,避免不必要的全量加载。
|
||||
- 异步 I/O
|
||||
- 使用 asyncpg 与 SQLAlchemy 异步引擎,降低并发场景下的阻塞风险。
|
||||
- 缓存策略
|
||||
- 对热点查询结果(如用户配额、订阅状态)可引入 Redis 缓存,减少数据库压力。
|
||||
|
||||
## 故障排查指南
|
||||
- 查询配额超限
|
||||
- 现象:创建查询时报错"PermissionError: Query limit exceeded"
|
||||
- 处理:检查用户 max_queries 与当前查询数量,必要时升级计划或清理历史查询。
|
||||
- 参考路径:[创建查询服务:45-81](file://backend/app/services/query.py#L45-L81)
|
||||
- 查询不存在
|
||||
- 现象:GET/PUT/DELETE 查询返回 404
|
||||
- 处理:确认 query_id 与当前用户匹配;检查软删除/级联删除是否生效。
|
||||
- 参考路径:[查询 API:42-85](file://backend/app/api/queries.py#L42-L85)
|
||||
- 平台/频率参数非法
|
||||
- 现象:Pydantic 校验失败
|
||||
- 处理:核对平台集合是否在允许集合内,频率是否为 daily/weekly。
|
||||
- 参考路径:[查询请求体校验:18-33](file://backend/app/schemas/query.py#L18-L33)
|
||||
- 任务状态异常
|
||||
- 现象:任务长时间 pending 或失败
|
||||
- 处理:检查 error_message 字段;核对平台可用性与 API 密钥配置。
|
||||
- 参考路径:[查询任务模型:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/services/query.py:45-129](file://backend/app/services/query.py#L45-L129)
|
||||
- [backend/app/api/queries.py:42-85](file://backend/app/api/queries.py#L42-L85)
|
||||
- [backend/app/schemas/query.py:18-33](file://backend/app/schemas/query.py#L18-L33)
|
||||
- [backend/app/models/query_task.py:11-39](file://backend/app/models/query_task.py#L11-L39)
|
||||
|
||||
## 结论
|
||||
GEO 项目的数据模型围绕用户、查询、任务、引用记录与订阅五大实体构建,采用异步 SQLAlchemy ORM 与 Alembic 迁移管理,配合 Pydantic 的输入输出验证,形成清晰的领域模型与 API 边界。通过合理的外键约束、级联策略与索引设计,既保证了数据一致性,也兼顾了查询性能。建议在生产环境中持续监控查询性能与缓存命中率,并完善事件钩子与审计日志以增强可观测性。
|
||||
|
||||
## 附录
|
||||
- 数据库连接配置
|
||||
- DATABASE_URL:PostgreSQL 异步连接字符串
|
||||
- 参考路径:[配置](file://backend/app/config.py#L12)
|
||||
- 模型导出入口
|
||||
- models/__init__.py 统一导出所有模型
|
||||
- 参考路径:[模型导出:1-14](file://backend/app/models/__init__.py#L1-L14)
|
||||
- JWT 认证配置
|
||||
- JWT_SECRET:JWT 密钥
|
||||
- JWT_EXPIRE_HOURS:JWT 过期时间(小时)
|
||||
- 参考路径:[认证依赖:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
|
|
@ -0,0 +1,501 @@
|
|||
# 表结构设计
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [user.py](file://backend/app/models/user.py)
|
||||
- [query.py](file://backend/app/models/query.py)
|
||||
- [citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [query_task.py](file://backend/app/models/query_task.py)
|
||||
- [subscription.py](file://backend/app/models/subscription.py)
|
||||
- [initial_migration.py](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py)
|
||||
- [database.py](file://backend/app/database.py)
|
||||
- [config.py](file://backend/app/config.py)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构概览](#架构概览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排除指南](#故障排除指南)
|
||||
9. [结论](#结论)
|
||||
|
||||
## 简介
|
||||
|
||||
本文件详细描述GEO项目的数据库表结构设计,涵盖用户查询监控平台的核心数据模型。该系统采用PostgreSQL作为主要数据库,使用SQLAlchemy ORM进行对象关系映射,支持异步数据库操作。系统包含五个核心表:users(用户表)、queries(查询表)、citation_records(引用记录表)、query_tasks(查询任务表)和subscriptions(订阅表),这些表通过外键关系相互关联,形成完整的业务数据模型。
|
||||
|
||||
## 项目结构
|
||||
|
||||
GEO项目的数据库层采用分层架构设计,主要包含以下组件:
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "数据库层"
|
||||
DB[(PostgreSQL数据库)]
|
||||
Engine[异步引擎]
|
||||
Session[会话管理]
|
||||
end
|
||||
subgraph "ORM模型层"
|
||||
User[User模型]
|
||||
Query[Query模型]
|
||||
Citation[CitationRecord模型]
|
||||
Task[QueryTask模型]
|
||||
Subscription[Subscription模型]
|
||||
end
|
||||
subgraph "配置层"
|
||||
Config[配置管理]
|
||||
Alembic[迁移管理]
|
||||
end
|
||||
Config --> Engine
|
||||
Engine --> DB
|
||||
Session --> DB
|
||||
User --> DB
|
||||
Query --> DB
|
||||
Citation --> DB
|
||||
Task --> DB
|
||||
Subscription --> DB
|
||||
Alembic --> DB
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [config.py:4-17](file://backend/app/config.py#L4-L17)
|
||||
|
||||
**章节来源**
|
||||
- [database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [config.py:4-17](file://backend/app/config.py#L4-L17)
|
||||
|
||||
## 核心组件
|
||||
|
||||
### 数据库连接配置
|
||||
|
||||
系统使用异步PostgreSQL连接,配置参数如下:
|
||||
- 数据库URL: `postgresql+asyncpg://postgres:postgres123@db:5432/geo_platform`
|
||||
- Redis连接: `redis://redis:6379/0`
|
||||
- JWT密钥: `your-secret-key-change-in-production`
|
||||
- Playwright浏览器路径: `/ms-playwright`
|
||||
|
||||
### 异步数据库引擎
|
||||
|
||||
使用SQLAlchemy异步引擎创建数据库连接池,支持:
|
||||
- 异步会话管理
|
||||
- 连接池配置
|
||||
- 自动事务处理
|
||||
- 连接生命周期管理
|
||||
|
||||
**章节来源**
|
||||
- [config.py:7](file://backend/app/config.py#L7)
|
||||
- [database.py:6-18](file://backend/app/database.py#L6-L18)
|
||||
|
||||
## 架构概览
|
||||
|
||||
GEO系统的数据库架构采用标准的三层设计模式:
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
USERS {
|
||||
uuid id PK
|
||||
string email UK
|
||||
string password_hash
|
||||
string name
|
||||
string plan
|
||||
integer max_queries
|
||||
boolean is_active
|
||||
timestamp created_at
|
||||
timestamp updated_at
|
||||
}
|
||||
QUERIES {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
timestamp last_queried_at
|
||||
timestamp next_query_at
|
||||
timestamp created_at
|
||||
timestamp updated_at
|
||||
}
|
||||
CITATION_RECORDS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
boolean cited
|
||||
integer citation_position
|
||||
text citation_text
|
||||
jsonb competitor_brands
|
||||
text raw_response
|
||||
timestamp queried_at
|
||||
}
|
||||
QUERY_TASKS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
string status
|
||||
text error_message
|
||||
timestamp scheduled_at
|
||||
timestamp started_at
|
||||
timestamp completed_at
|
||||
}
|
||||
SUBSCRIPTIONS {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string plan
|
||||
string status
|
||||
date start_date
|
||||
date end_date
|
||||
numeric amount
|
||||
string payment_method
|
||||
string payment_id
|
||||
timestamp created_at
|
||||
}
|
||||
USERS ||--o{ QUERIES : "拥有"
|
||||
QUERIES ||--o{ CITATION_RECORDS : "包含"
|
||||
QUERIES ||--o{ QUERY_TASKS : "包含"
|
||||
USERS ||--o{ SUBSCRIPTIONS : "拥有"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [initial_migration.py:24-37](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L24-L37)
|
||||
- [initial_migration.py:40-56](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L40-L56)
|
||||
- [initial_migration.py:62-78](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L62-L78)
|
||||
- [initial_migration.py:81-94](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L81-L94)
|
||||
- [initial_migration.py:97-111](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L97-L111)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 用户表 (users)
|
||||
|
||||
用户表是整个系统的核心实体,存储平台用户的基本信息和账户状态。
|
||||
|
||||
#### 字段定义
|
||||
|
||||
| 字段名 | 数据类型 | 约束条件 | 描述 |
|
||||
|--------|----------|----------|------|
|
||||
| id | UUID | 主键, 非空, 默认值 | 用户唯一标识符 |
|
||||
| email | String(255) | 唯一, 非空 | 用户邮箱地址 |
|
||||
| password_hash | String(255) | 非空 | 用户密码哈希值 |
|
||||
| name | String(100) | 可空 | 用户姓名 |
|
||||
| plan | String(20) | 非空, 默认值: "free" | 用户套餐类型 |
|
||||
| max_queries | Integer | 非空, 默认值: 5 | 最大查询次数限制 |
|
||||
| is_active | Boolean | 非空, 默认值: true | 账户激活状态 |
|
||||
| created_at | Timestamp | 非空, 默认值: NOW() | 创建时间 |
|
||||
| updated_at | Timestamp | 非空, 默认值: NOW() | 更新时间 |
|
||||
|
||||
#### 约束和索引
|
||||
|
||||
- 主键: id (UUID)
|
||||
- 唯一约束: email
|
||||
- 外键: 无直接外键关系
|
||||
- 关系: 与查询表(一对多)、订阅表(一对多)
|
||||
|
||||
#### 业务规则
|
||||
|
||||
- 用户必须提供唯一的邮箱地址
|
||||
- 默认免费套餐,最大查询次数为5次
|
||||
- 账户默认激活状态
|
||||
- 时间戳自动管理
|
||||
|
||||
**章节来源**
|
||||
- [user.py:14-33](file://backend/app/models/user.py#L14-L33)
|
||||
- [initial_migration.py:24-37](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L24-L37)
|
||||
|
||||
### 查询表 (queries)
|
||||
|
||||
查询表记录用户的搜索关键词和品牌监控配置。
|
||||
|
||||
#### 字段定义
|
||||
|
||||
| 字段名 | 数据类型 | 约束条件 | 描述 |
|
||||
|--------|----------|----------|------|
|
||||
| id | UUID | 主键, 非空, 默认值 | 查询记录唯一标识符 |
|
||||
| user_id | UUID | 外键, 非空 | 关联用户ID |
|
||||
| keyword | String(200) | 非空 | 搜索关键词 |
|
||||
| target_brand | String(100) | 非空 | 目标品牌名称 |
|
||||
| brand_aliases | JSONB | 非空, 默认值: [] | 品牌别名列表 |
|
||||
| platforms | JSONB | 非空, 默认值: ["wenxin","kimi"] | 监控平台列表 |
|
||||
| frequency | String(20) | 非空, 默认值: "weekly" | 查询频率 |
|
||||
| status | String(20) | 非空, 默认值: "active" | 查询状态 |
|
||||
| last_queried_at | Timestamp | 可空 | 最后查询时间 |
|
||||
| next_query_at | Timestamp | 可空 | 下次查询时间 |
|
||||
| created_at | Timestamp | 非空, 默认值: NOW() | 创建时间 |
|
||||
| updated_at | Timestamp | 非空, 默认值: NOW() | 更新时间 |
|
||||
|
||||
#### 约束和索引
|
||||
|
||||
- 主键: id (UUID)
|
||||
- 外键: user_id → users.id (级联删除)
|
||||
- 索引: idx_queries_user_id, idx_queries_status, idx_queries_next_query_at
|
||||
- 关系: 与用户表(多对一)、引用记录表(一对多)、查询任务表(一对多)
|
||||
|
||||
#### 业务规则
|
||||
|
||||
- 必须关联有效用户
|
||||
- 平台默认监控"wenxin"和"kimi"
|
||||
- 状态默认为"active"
|
||||
- 支持定时查询调度
|
||||
- 级联删除确保数据一致性
|
||||
|
||||
**章节来源**
|
||||
- [query.py:14-48](file://backend/app/models/query.py#L14-L48)
|
||||
- [initial_migration.py:40-56](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L40-L56)
|
||||
|
||||
### 引用记录表 (citation_records)
|
||||
|
||||
引用记录表存储每次查询的具体结果和竞争情报。
|
||||
|
||||
#### 字段定义
|
||||
|
||||
| 字段名 | 数据类型 | 约束条件 | 描述 |
|
||||
|--------|----------|----------|------|
|
||||
| id | UUID | 主键, 非空, 默认值 | 引用记录唯一标识符 |
|
||||
| query_id | UUID | 外键, 非空 | 关联查询ID |
|
||||
| platform | String(50) | 非空 | 查询平台名称 |
|
||||
| cited | Boolean | 非空, 默认值: false | 是否被引用 |
|
||||
| citation_position | Integer | 可空 | 引用位置 |
|
||||
| citation_text | Text | 可空 | 引用文本内容 |
|
||||
| competitor_brands | JSONB | 非空, 默认值: [] | 竞争品牌列表 |
|
||||
| raw_response | Text | 可空 | 原始响应内容 |
|
||||
| queried_at | Timestamp | 非空, 默认值: NOW() | 查询时间 |
|
||||
|
||||
#### 约束和索引
|
||||
|
||||
- 主键: id (UUID)
|
||||
- 外键: query_id → queries.id (级联删除)
|
||||
- 索引: idx_citation_records_query_id, idx_citation_records_queried_at, idx_citation_records_platform
|
||||
- 关系: 与查询表(多对一)
|
||||
|
||||
#### 业务规则
|
||||
|
||||
- 必须关联有效查询
|
||||
- 引用状态默认未引用
|
||||
- 支持JSON格式的竞争品牌数据
|
||||
- 按平台和查询时间建立索引优化查询性能
|
||||
- 级联删除确保查询历史完整清理
|
||||
|
||||
**章节来源**
|
||||
- [citation_record.py:14-35](file://backend/app/models/citation_record.py#L14-L35)
|
||||
- [initial_migration.py:62-78](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L62-L78)
|
||||
|
||||
### 查询任务表 (query_tasks)
|
||||
|
||||
查询任务表管理异步查询任务的状态和执行信息。
|
||||
|
||||
#### 字段定义
|
||||
|
||||
| 字段名 | 数据类型 | 约束条件 | 描述 |
|
||||
|--------|----------|----------|------|
|
||||
| id | UUID | 主键, 非空, 默认值 | 任务唯一标识符 |
|
||||
| query_id | UUID | 外键, 非空 | 关联查询ID |
|
||||
| platform | String(50) | 非空 | 执行平台 |
|
||||
| status | String(20) | 非空, 默认值: "pending" | 任务状态 |
|
||||
| error_message | Text | 可空 | 错误信息 |
|
||||
| scheduled_at | Timestamp | 非空, 默认值: NOW() | 调度时间 |
|
||||
| started_at | Timestamp | 可空 | 开始执行时间 |
|
||||
| completed_at | Timestamp | 可空 | 完成时间 |
|
||||
|
||||
#### 约束和索引
|
||||
|
||||
- 主键: id (UUID)
|
||||
- 外键: query_id → queries.id (级联删除)
|
||||
- 索引: idx_query_tasks_status
|
||||
- 关系: 与查询表(多对一)
|
||||
|
||||
#### 业务规则
|
||||
|
||||
- 必须关联有效查询
|
||||
- 任务状态默认为"pending"
|
||||
- 支持任务执行跟踪
|
||||
- 状态变更自动记录时间戳
|
||||
- 级联删除确保任务历史清理
|
||||
|
||||
**章节来源**
|
||||
- [query_task.py:14-34](file://backend/app/models/query_task.py#L14-L34)
|
||||
- [initial_migration.py:81-94](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L81-L94)
|
||||
|
||||
### 订阅表 (subscriptions)
|
||||
|
||||
订阅表管理用户的付费订阅信息和状态。
|
||||
|
||||
#### 字段定义
|
||||
|
||||
| 字段名 | 数据类型 | 约束条件 | 描述 |
|
||||
|--------|----------|----------|------|
|
||||
| id | UUID | 主键, 非空, 默认值 | 订阅记录唯一标识符 |
|
||||
| user_id | UUID | 外键, 非空 | 关联用户ID |
|
||||
| plan | String(20) | 非空 | 套餐类型 |
|
||||
| status | String(20) | 非空, 默认值: "active" | 订阅状态 |
|
||||
| start_date | Date | 非空 | 订阅开始日期 |
|
||||
| end_date | Date | 非空 | 订阅结束日期 |
|
||||
| amount | Numeric(10,2) | 可空 | 支付金额 |
|
||||
| payment_method | String(50) | 可空 | 支付方式 |
|
||||
| payment_id | String(255) | 可空 | 支付ID |
|
||||
| created_at | Timestamp | 非空, 默认值: NOW() | 创建时间 |
|
||||
|
||||
#### 约束和索引
|
||||
|
||||
- 主键: id (UUID)
|
||||
- 外键: user_id → users.id (级联删除)
|
||||
- 关系: 与用户表(多对一)
|
||||
|
||||
#### 业务规则
|
||||
|
||||
- 必须关联有效用户
|
||||
- 状态默认为"active"
|
||||
- 支持多种支付方式
|
||||
- 日期范围确保订阅有效性
|
||||
- 级联删除确保订阅历史清理
|
||||
|
||||
**章节来源**
|
||||
- [subscription.py:14-36](file://backend/app/models/subscription.py#L14-L36)
|
||||
- [initial_migration.py:97-111](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L97-L111)
|
||||
|
||||
## 依赖分析
|
||||
|
||||
### 外键关系图
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
Users["Users表<br/>主键: id"] --> |外键: user_id| Queries["Queries表<br/>主键: id"]
|
||||
Queries --> |外键: query_id| CitationRecords["CitationRecords表<br/>主键: id"]
|
||||
Queries --> |外键: query_id| QueryTasks["QueryTasks表<br/>主键: id"]
|
||||
Users --> |外键: user_id| Subscriptions["Subscriptions表<br/>主键: id"]
|
||||
style Users fill:#e1f5fe
|
||||
style Queries fill:#f3e5f5
|
||||
style CitationRecords fill:#e8f5e8
|
||||
style QueryTasks fill:#fff3e0
|
||||
style Subscriptions fill:#fce4ec
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [initial_migration.py:55](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L55)
|
||||
- [initial_migration.py:74](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L74)
|
||||
- [initial_migration.py:92](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L92)
|
||||
- [initial_migration.py:110](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L110)
|
||||
|
||||
### 关系类型分析
|
||||
|
||||
#### 一对一关系
|
||||
- users ↔ subscriptions: 一个用户对应一个当前有效的订阅记录
|
||||
|
||||
#### 一对多关系
|
||||
- users → queries: 一个用户可以有多个查询记录
|
||||
- users → subscriptions: 一个用户可以有多个订阅记录
|
||||
- queries → citation_records: 一个查询可以有多个引用记录
|
||||
- queries → query_tasks: 一个查询可以有多个任务
|
||||
|
||||
#### 多对多关系
|
||||
- 通过中间表实现的多对多关系不存在于当前设计中
|
||||
|
||||
### 级联删除策略
|
||||
|
||||
所有外键关系都设置了级联删除策略:
|
||||
- 删除用户时,自动删除其所有查询、订阅记录
|
||||
- 删除查询时,自动删除其所有引用记录和任务
|
||||
|
||||
**章节来源**
|
||||
- [initial_migration.py:55](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L55)
|
||||
- [initial_migration.py:74](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L74)
|
||||
- [initial_migration.py:92](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L92)
|
||||
- [initial_migration.py:110](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L110)
|
||||
|
||||
## 性能考虑
|
||||
|
||||
### 索引策略
|
||||
|
||||
系统为关键查询字段建立了专门的索引:
|
||||
|
||||
#### 查询表索引
|
||||
- idx_queries_user_id: 提升按用户查询的性能
|
||||
- idx_queries_status: 支持状态过滤查询
|
||||
- idx_queries_next_query_at: 优化定时任务调度
|
||||
|
||||
#### 引用记录表索引
|
||||
- idx_citation_records_query_id: 提升按查询获取记录的性能
|
||||
- idx_citation_records_queried_at: 支持时间序列查询
|
||||
- idx_citation_records_platform: 优化平台过滤
|
||||
|
||||
#### 查询任务表索引
|
||||
- idx_query_tasks_status: 支持任务状态统计和调度
|
||||
|
||||
### 数据类型优化
|
||||
|
||||
- 使用UUID作为主键,避免序列号暴露业务信息
|
||||
- JSONB类型存储动态配置,支持高效查询和更新
|
||||
- 数值类型使用Numeric精确存储金额数据
|
||||
- 时间戳使用带时区的DateTime确保时区一致性
|
||||
|
||||
### 查询优化建议
|
||||
|
||||
1. **批量操作**: 对于大量数据的插入和更新,使用批量操作减少数据库往返
|
||||
2. **分页查询**: 对于列表查询,实现分页机制避免一次性加载过多数据
|
||||
3. **缓存策略**: 结合Redis实现热点数据缓存
|
||||
4. **连接池管理**: 合理配置连接池大小以平衡性能和资源使用
|
||||
|
||||
## 故障排除指南
|
||||
|
||||
### 常见问题及解决方案
|
||||
|
||||
#### 数据库连接问题
|
||||
- **症状**: 应用启动时报数据库连接错误
|
||||
- **原因**: DATABASE_URL配置不正确或数据库服务未启动
|
||||
- **解决**: 检查.env文件中的DATABASE_URL配置,确认数据库服务正常运行
|
||||
|
||||
#### 外键约束冲突
|
||||
- **症状**: 删除用户时报外键约束错误
|
||||
- **原因**: 存在相关记录导致级联删除失败
|
||||
- **解决**: 确保级联删除策略正确配置,检查数据完整性
|
||||
|
||||
#### 索引性能问题
|
||||
- **症状**: 查询响应缓慢
|
||||
- **原因**: 缺少必要的索引或索引设计不当
|
||||
- **解决**: 分析查询计划,添加适当的索引
|
||||
|
||||
#### 数据类型转换错误
|
||||
- **症状**: JSONB字段操作报错
|
||||
- **原因**: 数据类型不匹配或格式错误
|
||||
- **解决**: 确保JSONB数据格式正确,使用适当的序列化方法
|
||||
|
||||
### 调试工具
|
||||
|
||||
1. **数据库监控**: 使用PostgreSQL内置监控工具查看查询性能
|
||||
2. **日志分析**: 启用SQLAlchemy日志输出分析查询执行情况
|
||||
3. **性能分析**: 使用EXPLAIN ANALYZE分析慢查询
|
||||
|
||||
**章节来源**
|
||||
- [config.py:7](file://backend/app/config.py#L7)
|
||||
- [initial_migration.py:114-127](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L114-L127)
|
||||
|
||||
## 结论
|
||||
|
||||
GEO项目的数据库表结构设计体现了现代Web应用的最佳实践:
|
||||
|
||||
### 设计优势
|
||||
|
||||
1. **清晰的数据模型**: 五个核心表覆盖了完整的业务场景
|
||||
2. **合理的外键关系**: 确保数据一致性和完整性
|
||||
3. **高效的索引策略**: 针对常见查询模式优化性能
|
||||
4. **异步数据库支持**: 提供良好的并发处理能力
|
||||
5. **灵活的数据类型**: JSONB支持动态配置需求
|
||||
|
||||
### 业务逻辑实现
|
||||
|
||||
- 用户权限控制通过用户表的激活状态实现
|
||||
- 查询配额控制通过用户表的max_queries字段实现
|
||||
- 订阅管理通过订阅表的日期范围和状态字段实现
|
||||
- 数据清理通过级联删除策略自动维护
|
||||
|
||||
### 扩展性考虑
|
||||
|
||||
系统设计具有良好的扩展性:
|
||||
- 新增表时可复用现有的UUID主键模式
|
||||
- JSONB字段支持未来功能的动态配置
|
||||
- 异步架构支持水平扩展
|
||||
- 清晰的关系设计便于添加新的业务实体
|
||||
|
||||
这个数据模型为GEO平台提供了坚实的基础,能够支持用户查询监控、竞争情报分析和订阅管理等核心业务功能。
|
||||
|
|
@ -0,0 +1,497 @@
|
|||
# 单元测试
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [tests/conftest.py](file://tests/conftest.py)
|
||||
- [tests/test_auth.py](file://tests/test_auth.py)
|
||||
- [tests/test_citation_engine.py](file://tests/test_citation_engine.py)
|
||||
- [tests/test_citations.py](file://tests/test_citations.py)
|
||||
- [tests/test_queries.py](file://tests/test_queries.py)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/api/deps.py](file://backend/app/api/deps.py)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/services/auth.py](file://backend/app/services/auth.py)
|
||||
- [backend/app/models/user.py](file://backend/app/models/user.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件系统性梳理 GEO 项目的单元测试实现,覆盖认证模块、引用引擎、查询处理与引用数据四大模块的测试用例设计与最佳实践。文档重点说明测试夹具(fixture)的使用方法(如 mock_user、auth_token、auth_headers、override_get_current_user),断言编写方式与覆盖率建议,并提供异步函数、依赖注入与错误处理的测试示例路径与流程图。
|
||||
|
||||
## 项目结构
|
||||
测试目录位于仓库根目录 tests/,采用按功能分模块组织的方式,分别对应后端 API 层与核心业务组件:
|
||||
- 认证模块测试:tests/test_auth.py
|
||||
- 引用引擎测试:tests/test_citation_engine.py
|
||||
- 查询处理测试:tests/test_queries.py
|
||||
- 引用数据测试:tests/test_citations.py
|
||||
- 测试夹具与异步客户端:tests/conftest.py
|
||||
- 后端应用入口与路由:backend/app/main.py、backend/app/api/*.py
|
||||
- 核心业务组件:backend/app/workers/citation_engine.py
|
||||
- 服务层与模型:backend/app/services/*、backend/app/models/*
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "测试层"
|
||||
T1["tests/test_auth.py"]
|
||||
T2["tests/test_citation_engine.py"]
|
||||
T3["tests/test_queries.py"]
|
||||
T4["tests/test_citations.py"]
|
||||
C["tests/conftest.py"]
|
||||
end
|
||||
subgraph "后端应用"
|
||||
M["backend/app/main.py"]
|
||||
A1["backend/app/api/auth.py"]
|
||||
A2["backend/app/api/queries.py"]
|
||||
A3["backend/app/api/citations.py"]
|
||||
D["backend/app/api/deps.py"]
|
||||
S1["backend/app/services/auth.py"]
|
||||
W["backend/app/workers/citation_engine.py"]
|
||||
U["backend/app/models/user.py"]
|
||||
CR["backend/app/models/citation_record.py"]
|
||||
end
|
||||
T1 --> A1
|
||||
T2 --> W
|
||||
T3 --> A2
|
||||
T4 --> A3
|
||||
T1 --> D
|
||||
T2 --> W
|
||||
T3 --> D
|
||||
T4 --> D
|
||||
A1 --> S1
|
||||
A2 --> D
|
||||
A3 --> D
|
||||
D --> U
|
||||
W --> CR
|
||||
M --> A1
|
||||
M --> A2
|
||||
M --> A3
|
||||
C --> M
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [tests/conftest.py:1-71](file://tests/conftest.py#L1-L71)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/api/deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
|
||||
章节来源
|
||||
- [tests/conftest.py:1-71](file://tests/conftest.py#L1-L71)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
|
||||
## 核心组件
|
||||
本节概述各模块测试关注点与测试夹具的作用范围。
|
||||
|
||||
- 认证模块测试(tests/test_auth.py)
|
||||
- 注册成功/重复邮箱、登录成功/密码错误、用户信息获取(含未认证场景)
|
||||
- 使用夹具:mock_registered_user、async_client、patch 注入服务层行为
|
||||
- 关键断言:HTTP 状态码、响应体字段、错误详情消息
|
||||
|
||||
- 引用引擎测试(tests/test_citation_engine.py)
|
||||
- 品牌匹配器精确/别名/模糊/无匹配;竞争品牌检测;引用位置与上下文提取
|
||||
- 使用夹具:直接构造类实例进行纯函数式断言,无需外部依赖
|
||||
|
||||
- 查询处理测试(tests/test_queries.py)
|
||||
- 创建查询成功/超出限额、列出查询、更新查询、删除查询、查询不存在/不属于他人
|
||||
- 使用夹具:mock_query、async_client、patch 服务层返回值或异常
|
||||
|
||||
- 引用数据测试(tests/test_citations.py)
|
||||
- 获取引用列表、统计信息、CSV 导出(MIME 类型、附件头、内容片段)
|
||||
|
||||
- 测试夹具(tests/conftest.py)
|
||||
- mock_scheduler:屏蔽后台调度器,避免真实任务影响测试
|
||||
- mock_user:模拟认证用户对象,包含 id、邮箱、计划、配额等
|
||||
- override_get_current_user:重写依赖 get_current_user,使路由自动获得认证用户
|
||||
- auth_token:基于 mock_user 生成 JWT
|
||||
- auth_headers:组装 Authorization Bearer 头
|
||||
- async_client:ASGI 异步 HTTP 客户端,用于端到端测试
|
||||
|
||||
章节来源
|
||||
- [tests/test_auth.py:1-104](file://tests/test_auth.py#L1-L104)
|
||||
- [tests/test_citation_engine.py:1-54](file://tests/test_citation_engine.py#L1-L54)
|
||||
- [tests/test_queries.py:1-154](file://tests/test_queries.py#L1-L154)
|
||||
- [tests/test_citations.py:1-93](file://tests/test_citations.py#L1-L93)
|
||||
- [tests/conftest.py:19-71](file://tests/conftest.py#L19-L71)
|
||||
|
||||
## 架构总览
|
||||
下图展示测试夹具与被测组件之间的交互关系,以及异步 HTTP 客户端如何驱动 FastAPI 路由与依赖注入。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Test as "测试用例"
|
||||
participant Fixture as "测试夹具(conftest.py)"
|
||||
participant Client as "AsyncClient"
|
||||
participant App as "FastAPI 应用(main.py)"
|
||||
participant Dep as "依赖注入(deps.py)"
|
||||
participant Service as "服务层(API 路由)"
|
||||
participant Model as "模型(models)"
|
||||
Test->>Fixture : 请求夹具(mock_user/override_get_current_user/...)
|
||||
Fixture-->>Test : 返回夹具对象
|
||||
Test->>Client : 发起 HTTP 请求
|
||||
Client->>App : ASGI 请求
|
||||
App->>Dep : 解析 OAuth2 Bearer Token
|
||||
Dep-->>App : 返回当前用户(get_current_user)
|
||||
App->>Service : 调用路由处理器
|
||||
Service->>Model : 数据库读写/查询
|
||||
Service-->>App : 返回响应数据
|
||||
App-->>Client : HTTP 响应
|
||||
Client-->>Test : 断言结果
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [tests/conftest.py:28-71](file://tests/conftest.py#L28-L71)
|
||||
- [backend/app/main.py:38-42](file://backend/app/main.py#L38-L42)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/api/auth.py:13-43](file://backend/app/api/auth.py#L13-L43)
|
||||
- [backend/app/api/queries.py:15-86](file://backend/app/api/queries.py#L15-L86)
|
||||
- [backend/app/api/citations.py:25-78](file://backend/app/api/citations.py#L25-L78)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 认证模块测试
|
||||
- 测试要点
|
||||
- 注册接口:成功返回 201,响应包含邮箱与姓名;重复邮箱抛出 400
|
||||
- 登录接口:成功返回 200,包含 access_token 与用户信息;错误凭据返回 401
|
||||
- 用户信息接口:已认证返回 200,未认证返回 401
|
||||
- 测试夹具
|
||||
- mock_registered_user:模拟注册成功的用户对象
|
||||
- override_get_current_user:通过依赖覆盖提供认证用户
|
||||
- auth_headers:携带 Bearer Token 的请求头
|
||||
- async_client:ASGI 异步客户端
|
||||
- 断言策略
|
||||
- 状态码断言:201/200/400/401
|
||||
- 响应体字段断言:access_token、token_type、user.email 等
|
||||
- 错误详情断言:detail 中包含特定提示
|
||||
- 异步与依赖注入示例路径
|
||||
- [tests/test_auth.py:25-40](file://tests/test_auth.py#L25-L40)
|
||||
- [tests/test_auth.py:62-73](file://tests/test_auth.py#L62-L73)
|
||||
- [tests/test_auth.py:88-104](file://tests/test_auth.py#L88-L104)
|
||||
- [tests/conftest.py:28-62](file://tests/conftest.py#L28-L62)
|
||||
- [backend/app/api/auth.py:13-43](file://backend/app/api/auth.py#L13-L43)
|
||||
- [backend/app/services/auth.py:37-69](file://backend/app/services/auth.py#L37-L69)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant T as "测试用例(test_auth.py)"
|
||||
participant F as "夹具(conftest.py)"
|
||||
participant AC as "AsyncClient"
|
||||
participant R as "路由(auth.py)"
|
||||
participant S as "服务(auth.py)"
|
||||
participant DB as "数据库"
|
||||
T->>F : 获取 mock_registered_user/override_get_current_user/auth_headers
|
||||
T->>AC : POST /api/v1/auth/register
|
||||
AC->>R : 路由处理器
|
||||
R->>S : register_user(...)
|
||||
S->>DB : 查询/插入用户
|
||||
S-->>R : 返回用户对象
|
||||
R-->>AC : 201 + 用户信息
|
||||
AC-->>T : 断言状态码与字段
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [tests/test_auth.py:25-40](file://tests/test_auth.py#L25-L40)
|
||||
- [tests/conftest.py:28-62](file://tests/conftest.py#L28-L62)
|
||||
- [backend/app/api/auth.py:13-19](file://backend/app/api/auth.py#L13-L19)
|
||||
- [backend/app/services/auth.py:37-52](file://backend/app/services/auth.py#L37-L52)
|
||||
|
||||
章节来源
|
||||
- [tests/test_auth.py:1-104](file://tests/test_auth.py#L1-L104)
|
||||
- [tests/conftest.py:28-62](file://tests/conftest.py#L28-L62)
|
||||
- [backend/app/api/auth.py:13-43](file://backend/app/api/auth.py#L13-L43)
|
||||
- [backend/app/services/auth.py:37-69](file://backend/app/services/auth.py#L37-L69)
|
||||
|
||||
### 引用引擎测试
|
||||
- 测试要点
|
||||
- BrandMatcher:精确匹配置信度 1.0、别名匹配置信度 0.9、模糊匹配阈值 >0.4、无匹配返回 False/None/0.0
|
||||
- CompetitorDetector:在文本中识别除目标品牌外的竞争品牌
|
||||
- 引用位置与上下文:按段落定位首次出现位置并截取上下文片段
|
||||
- 测试夹具
|
||||
- 直接构造 BrandMatcher/CompetitorDetector 实例,无需外部依赖
|
||||
- 断言策略
|
||||
- 字典键断言:cited、match_type、confidence、position、citation_text
|
||||
- 列表断言:competitor_brands 包含预期品牌且不含目标品牌
|
||||
- 示例路径
|
||||
- [tests/test_citation_engine.py:6-37](file://tests/test_citation_engine.py#L6-L37)
|
||||
- [tests/test_citation_engine.py:39-45](file://tests/test_citation_engine.py#L39-L45)
|
||||
- [tests/test_citation_engine.py:47-54](file://tests/test_citation_engine.py#L47-L54)
|
||||
- [backend/app/workers/citation_engine.py:19-120](file://backend/app/workers/citation_engine.py#L19-L120)
|
||||
- [backend/app/workers/citation_engine.py:122-146](file://backend/app/workers/citation_engine.py#L122-L146)
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class BrandMatcher {
|
||||
+match(text) dict
|
||||
-_extract_candidates(text) list
|
||||
-_extract_position_and_context(text, keyword) tuple
|
||||
}
|
||||
class CompetitorDetector {
|
||||
+detect(text, target_brand) list
|
||||
}
|
||||
BrandMatcher --> CompetitorDetector : "在引擎中协作"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/citation_engine.py:19-146](file://backend/app/workers/citation_engine.py#L19-L146)
|
||||
|
||||
章节来源
|
||||
- [tests/test_citation_engine.py:1-54](file://tests/test_citation_engine.py#L1-L54)
|
||||
- [backend/app/workers/citation_engine.py:19-146](file://backend/app/workers/citation_engine.py#L19-L146)
|
||||
|
||||
### 查询处理测试
|
||||
- 测试要点
|
||||
- 创建查询:成功返回 201,响应包含关键词与目标品牌;超出配额抛出 403
|
||||
- 列出查询:返回 items 与 total
|
||||
- 更新查询:返回更新后的关键词与频率
|
||||
- 删除查询:返回 204
|
||||
- 查询不存在/不属于他人:返回 404
|
||||
- 测试夹具
|
||||
- mock_query:模拟查询对象,包含 id、keyword、target_brand、platforms、frequency、status 等
|
||||
- override_get_current_user、auth_headers、async_client
|
||||
- 断言策略
|
||||
- 状态码断言:201/200/204/403/404
|
||||
- 响应体字段断言:keyword、frequency、detail 等
|
||||
- 示例路径
|
||||
- [tests/test_queries.py:30-48](file://tests/test_queries.py#L30-L48)
|
||||
- [tests/test_queries.py:51-71](file://tests/test_queries.py#L51-L71)
|
||||
- [tests/test_queries.py:74-84](file://tests/test_queries.py#L74-L84)
|
||||
- [tests/test_queries.py:87-113](file://tests/test_queries.py#L87-L113)
|
||||
- [tests/test_queries.py:116-125](file://tests/test_queries.py#L116-L125)
|
||||
- [tests/test_queries.py:128-139](file://tests/test_queries.py#L128-L139)
|
||||
- [tests/test_queries.py:143-154](file://tests/test_queries.py#L143-L154)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant T as "测试用例(test_queries.py)"
|
||||
participant F as "夹具(conftest.py)"
|
||||
participant AC as "AsyncClient"
|
||||
participant R as "路由(queries.py)"
|
||||
participant S as "服务层"
|
||||
participant DB as "数据库"
|
||||
T->>F : 获取 mock_query/override_get_current_user/auth_headers
|
||||
T->>AC : POST /api/v1/queries/
|
||||
AC->>R : 路由处理器
|
||||
R->>S : create_query(...)
|
||||
S->>DB : 插入查询记录
|
||||
S-->>R : 返回查询对象
|
||||
R-->>AC : 201 + 查询信息
|
||||
AC-->>T : 断言状态码与字段
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [tests/test_queries.py:30-48](file://tests/test_queries.py#L30-L48)
|
||||
- [backend/app/api/queries.py:26-39](file://backend/app/api/queries.py#L26-L39)
|
||||
|
||||
章节来源
|
||||
- [tests/test_queries.py:1-154](file://tests/test_queries.py#L1-L154)
|
||||
- [backend/app/api/queries.py:15-86](file://backend/app/api/queries.py#L15-L86)
|
||||
|
||||
### 引用数据测试
|
||||
- 测试要点
|
||||
- 获取引用列表:返回 items 与 total,包含 platform 与 cited 等字段
|
||||
- 统计信息:返回总量、引用率、按平台分布与趋势
|
||||
- CSV 导出:返回 text/csv 内容类型与附件头,内容包含平台信息
|
||||
- 测试夹具
|
||||
- mock_citation_record:模拟引用记录对象
|
||||
- override_get_current_user、auth_headers、async_client
|
||||
- 断言策略
|
||||
- 状态码断言:200
|
||||
- 响应体断言:total、items、by_platform、trend 等
|
||||
- 响应头断言:Content-Type 以 text/csv 开头、Content-Disposition 含 attachment
|
||||
- 示例路径
|
||||
- [tests/test_citations.py:24-41](file://tests/test_citations.py#L24-L41)
|
||||
- [tests/test_citations.py:44-73](file://tests/test_citations.py#L44-L73)
|
||||
- [tests/test_citations.py:76-93](file://tests/test_citations.py#L76-L93)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant T as "测试用例(test_citations.py)"
|
||||
participant F as "夹具(conftest.py)"
|
||||
participant AC as "AsyncClient"
|
||||
participant R as "路由(citations.py)"
|
||||
participant S as "服务层"
|
||||
participant DB as "数据库"
|
||||
T->>F : 获取 mock_citation_record/override_get_current_user/auth_headers
|
||||
T->>AC : GET /api/v1/citations/stats
|
||||
AC->>R : 路由处理器
|
||||
R->>S : get_citation_stats(...)
|
||||
S->>DB : 聚合统计
|
||||
S-->>R : 返回统计结果
|
||||
R-->>AC : 200 + 统计信息
|
||||
AC-->>T : 断言字段与头
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [tests/test_citations.py:44-73](file://tests/test_citations.py#L44-L73)
|
||||
- [backend/app/api/citations.py:49-56](file://backend/app/api/citations.py#L49-L56)
|
||||
|
||||
章节来源
|
||||
- [tests/test_citations.py:1-93](file://tests/test_citations.py#L1-L93)
|
||||
- [backend/app/api/citations.py:25-78](file://backend/app/api/citations.py#L25-L78)
|
||||
|
||||
### 测试夹具详解与最佳实践
|
||||
- mock_scheduler
|
||||
- 作用:屏蔽后台调度器,避免真实任务启动影响测试稳定性
|
||||
- 使用:在会话级 autouse fixture 中替换 app.main.query_scheduler 的 start/shutdown
|
||||
- 参考路径:[tests/conftest.py:19-26](file://tests/conftest.py#L19-L26)
|
||||
- mock_user
|
||||
- 作用:提供认证用户对象,包含 id、email、name、plan、max_queries、is_active 等
|
||||
- 使用:作为 override_get_current_user 的数据源
|
||||
- 参考路径:[tests/conftest.py:29-39](file://tests/conftest.py#L29-L39)
|
||||
- override_get_current_user
|
||||
- 作用:重写依赖 get_current_user,使路由自动解析到 mock_user
|
||||
- 使用:在测试函数参数中注入,结束后清理依赖覆盖
|
||||
- 参考路径:[tests/conftest.py:42-50](file://tests/conftest.py#L42-L50)
|
||||
- auth_token
|
||||
- 作用:基于 mock_user 的 id 生成 JWT
|
||||
- 使用:配合 auth_headers 进行认证请求
|
||||
- 参考路径:[tests/conftest.py:54-56](file://tests/conftest.py#L54-L56)
|
||||
- auth_headers
|
||||
- 作用:组装 Authorization: Bearer <token> 请求头
|
||||
- 使用:GET/POST/PUT/DELETE 等请求统一携带
|
||||
- 参考路径:[tests/conftest.py:60-62](file://tests/conftest.py#L60-L62)
|
||||
- async_client
|
||||
- 作用:ASGI 异步 HTTP 客户端,用于端到端测试
|
||||
- 使用:with 上下文管理器确保生命周期内正确创建与释放
|
||||
- 参考路径:[tests/conftest.py:65-71](file://tests/conftest.py#L65-L71)
|
||||
|
||||
最佳实践
|
||||
- 尽量使用 patch 替换服务层或外部依赖,避免真实网络或数据库调用
|
||||
- 对于需要认证的路由,优先使用 override_get_current_user 与 auth_headers
|
||||
- 对于后台任务,使用 mock_scheduler 屏蔽真实调度器
|
||||
- 对于异步函数,使用 pytest.mark.asyncio 并通过 AsyncClient 发起请求
|
||||
- 清理:在依赖覆盖后及时 pop,避免影响其他测试
|
||||
|
||||
章节来源
|
||||
- [tests/conftest.py:19-71](file://tests/conftest.py#L19-L71)
|
||||
|
||||
### 异步函数、依赖注入与错误处理测试示例
|
||||
- 异步函数
|
||||
- 使用 pytest.mark.asyncio 标记测试函数
|
||||
- 使用 ASGI AsyncClient 发起请求
|
||||
- 参考路径:[tests/test_auth.py:25-40](file://tests/test_auth.py#L25-L40)、[tests/test_queries.py:30-48](file://tests/test_queries.py#L30-L48)
|
||||
- 依赖注入
|
||||
- 通过 app.dependency_overrides 临时替换 get_current_user
|
||||
- 参考路径:[tests/conftest.py:42-50](file://tests/conftest.py#L42-L50)、[backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- 错误处理
|
||||
- 通过 patch 抛出异常或返回 None,验证路由的 HTTP 异常与错误详情
|
||||
- 参考路径:[tests/test_auth.py:43-58](file://tests/test_auth.py#L43-L58)、[tests/test_queries.py:51-71](file://tests/test_queries.py#L51-L71)
|
||||
|
||||
章节来源
|
||||
- [tests/test_auth.py:25-58](file://tests/test_auth.py#L25-L58)
|
||||
- [tests/test_queries.py:51-71](file://tests/test_queries.py#L51-L71)
|
||||
- [tests/conftest.py:42-50](file://tests/conftest.py#L42-L50)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
|
||||
### 测试数据准备与清理
|
||||
- 准备
|
||||
- 使用 AsyncMock 构造模型对象(用户、查询、引用记录),设置必要字段
|
||||
- 使用 patch 模拟服务层返回值或异常
|
||||
- 使用 override_get_current_user 提供认证上下文
|
||||
- 清理
|
||||
- 测试结束后 pop 依赖覆盖,避免污染其他测试
|
||||
- 使用 with 上下文管理 async_client 生命周期
|
||||
- 参考路径
|
||||
- [tests/conftest.py:42-50](file://tests/conftest.py#L42-L50)、[tests/conftest.py:65-71](file://tests/conftest.py#L65-L71)
|
||||
- [tests/test_citations.py:8-21](file://tests/test_citations.py#L8-L21)、[tests/test_queries.py:10-27](file://tests/test_queries.py#L10-L27)
|
||||
|
||||
章节来源
|
||||
- [tests/conftest.py:42-50](file://tests/conftest.py#L42-L50)
|
||||
- [tests/conftest.py:65-71](file://tests/conftest.py#L65-L71)
|
||||
- [tests/test_citations.py:8-21](file://tests/test_citations.py#L8-L21)
|
||||
- [tests/test_queries.py:10-27](file://tests/test_queries.py#L10-L27)
|
||||
|
||||
## 依赖分析
|
||||
- 测试对应用层的依赖
|
||||
- 测试通过 ASGI AsyncClient 直接调用 FastAPI 路由,路由再依赖 get_current_user 解析 JWT
|
||||
- get_current_user 依赖 OAuth2PasswordBearer 与 verify_token,最终查询数据库 User 表
|
||||
- 测试对服务层的依赖
|
||||
- 通过 patch 替换服务层函数(如 register_user、authenticate_user、create_query 等)
|
||||
- 测试对业务组件的依赖
|
||||
- 引擎测试直接构造 BrandMatcher/CompetitorDetector,不依赖外部服务
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
T["测试用例"] --> C["conftest.py 夹具"]
|
||||
C --> A["FastAPI 路由"]
|
||||
A --> D["依赖注入(get_current_user)"]
|
||||
D --> S["服务层"]
|
||||
S --> DB["数据库"]
|
||||
T --> E["引用引擎(直接类实例)"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [tests/conftest.py:28-71](file://tests/conftest.py#L28-L71)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/api/auth.py:13-43](file://backend/app/api/auth.py#L13-L43)
|
||||
- [backend/app/api/queries.py:26-39](file://backend/app/api/queries.py#L26-L39)
|
||||
- [backend/app/workers/citation_engine.py:19-146](file://backend/app/workers/citation_engine.py#L19-L146)
|
||||
|
||||
章节来源
|
||||
- [tests/conftest.py:28-71](file://tests/conftest.py#L28-L71)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/api/auth.py:13-43](file://backend/app/api/auth.py#L13-L43)
|
||||
- [backend/app/api/queries.py:26-39](file://backend/app/api/queries.py#L26-L39)
|
||||
- [backend/app/workers/citation_engine.py:19-146](file://backend/app/workers/citation_engine.py#L19-L146)
|
||||
|
||||
## 性能考虑
|
||||
- 使用 ASGI AsyncClient 避免真实网络往返,提升测试速度
|
||||
- 通过 patch 替换外部服务调用,减少 IO 与等待时间
|
||||
- 使用 mock_scheduler 屏蔽后台任务,避免并发与定时任务干扰
|
||||
- 对纯函数(引擎类)直接实例化断言,避免数据库与网络依赖
|
||||
|
||||
## 故障排查指南
|
||||
- 401 未认证
|
||||
- 检查是否正确注入 override_get_current_user 与 auth_headers
|
||||
- 参考路径:[tests/test_auth.py:88-104](file://tests/test_auth.py#L88-L104)、[tests/conftest.py:42-62](file://tests/conftest.py#L42-L62)
|
||||
- 403 超出配额
|
||||
- 检查服务层是否抛出 PermissionError,测试中通过 patch 触发
|
||||
- 参考路径:[tests/test_queries.py:51-71](file://tests/test_queries.py#L51-L71)
|
||||
- 404 不存在/不属于他人
|
||||
- 检查 get_query 返回 None 的分支,确认依赖覆盖与参数
|
||||
- 参考路径:[tests/test_queries.py:128-139](file://tests/test_queries.py#L128-L139)、[tests/test_queries.py:143-154](file://tests/test_queries.py#L143-L154)
|
||||
- CSV 导出问题
|
||||
- 检查 Content-Type 与 Content-Disposition 头,以及响应体是否包含平台信息
|
||||
- 参考路径:[tests/test_citations.py:76-93](file://tests/test_citations.py#L76-L93)
|
||||
|
||||
章节来源
|
||||
- [tests/test_auth.py:88-104](file://tests/test_auth.py#L88-L104)
|
||||
- [tests/test_queries.py:51-71](file://tests/test_queries.py#L51-L71)
|
||||
- [tests/test_queries.py:128-154](file://tests/test_queries.py#L128-L154)
|
||||
- [tests/test_citations.py:76-93](file://tests/test_citations.py#L76-L93)
|
||||
- [tests/conftest.py:42-62](file://tests/conftest.py#L42-L62)
|
||||
|
||||
## 结论
|
||||
本测试体系通过夹具与 patch 有效隔离外部依赖,结合 ASGI 异步客户端完成端到端验证。认证、查询、引用数据三大模块均覆盖成功与错误路径,引擎模块以纯函数形式保证可测试性与高内聚。建议持续补充边界条件与并发场景测试,逐步提升整体覆盖率。
|
||||
|
||||
## 附录
|
||||
- 测试覆盖率建议
|
||||
- 路由层:100%(含错误分支)
|
||||
- 服务层:100%(含异常路径)
|
||||
- 引擎类:100%(精确/别名/模糊/无匹配、位置与上下文)
|
||||
- 数据模型:100%(字段与索引覆盖)
|
||||
- 参考文件
|
||||
- [backend/app/models/user.py:1-41](file://backend/app/models/user.py#L1-L41)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/api/deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
|
|
@ -0,0 +1,384 @@
|
|||
# 测试最佳实践
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [tests/conftest.py](file://tests/conftest.py)
|
||||
- [tests/test_auth.py](file://tests/test_auth.py)
|
||||
- [tests/test_citations.py](file://tests/test_citations.py)
|
||||
- [tests/test_queries.py](file://tests/test_queries.py)
|
||||
- [tests/test_citation_engine.py](file://tests/test_citation_engine.py)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/api/deps.py](file://backend/app/api/deps.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/alembic.ini](file://backend/alembic.ini)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本指南面向GEO项目的测试工作,系统性地给出测试编写规范、覆盖率要求与测量方法、持续集成配置建议、调试技巧以及测试环境与数据管理的最佳实践。文档以仓库现有测试代码为基础,结合后端FastAPI应用、数据库与任务调度器的实际实现,帮助团队建立一致、可维护、高覆盖度的测试体系。
|
||||
|
||||
## 项目结构
|
||||
测试相关的核心位置与职责如下:
|
||||
- tests 目录:集中存放pytest测试用例与通用fixture配置
|
||||
- backend/app:后端应用入口、API路由、依赖注入、模型与服务
|
||||
- backend/app/workers:引用检测引擎与任务调度器
|
||||
- docker-compose.yml:本地开发与测试所需的数据库与缓存服务编排
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "测试层"
|
||||
T1["tests/conftest.py"]
|
||||
T2["tests/test_auth.py"]
|
||||
T3["tests/test_citations.py"]
|
||||
T4["tests/test_queries.py"]
|
||||
T5["tests/test_citation_engine.py"]
|
||||
end
|
||||
subgraph "后端应用"
|
||||
A1["app/main.py"]
|
||||
A2["app/api/deps.py"]
|
||||
W1["workers/citation_engine.py"]
|
||||
W2["workers/scheduler.py"]
|
||||
end
|
||||
subgraph "基础设施"
|
||||
D1["docker-compose.yml"]
|
||||
DB["PostgreSQL"]
|
||||
R["Redis"]
|
||||
end
|
||||
T1 --> A1
|
||||
T2 --> A1
|
||||
T3 --> A1
|
||||
T4 --> A1
|
||||
T5 --> W1
|
||||
A1 --> W2
|
||||
W1 --> DB
|
||||
W2 --> DB
|
||||
D1 --> DB
|
||||
D1 --> R
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [tests/conftest.py:1-71](file://tests/conftest.py#L1-L71)
|
||||
- [tests/test_auth.py:1-104](file://tests/test_auth.py#L1-L104)
|
||||
- [tests/test_citations.py:1-93](file://tests/test_citations.py#L1-L93)
|
||||
- [tests/test_queries.py:1-154](file://tests/test_queries.py#L1-L154)
|
||||
- [tests/test_citation_engine.py:1-54](file://tests/test_citation_engine.py#L1-L54)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/api/deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
章节来源
|
||||
- [tests/conftest.py:1-71](file://tests/conftest.py#L1-L71)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
## 核心组件
|
||||
- 测试夹具与环境
|
||||
- 会话级mock:屏蔽真实任务调度器,避免后台作业影响测试稳定性
|
||||
- 认证夹具:生成模拟用户、JWT令牌与请求头,统一处理鉴权依赖
|
||||
- 异步HTTP客户端:基于ASGI传输,便于对FastAPI路由进行端到端测试
|
||||
- API测试范围
|
||||
- 认证模块:注册、登录、个人信息读取
|
||||
- 查询模块:创建、列表、更新、删除、详情与权限边界
|
||||
- 引用数据模块:查询、统计、导出CSV
|
||||
- 单元测试范围
|
||||
- 引用检测引擎:品牌匹配、竞争品牌识别、引用位置与置信度
|
||||
- 测试依赖
|
||||
- pytest、pytest-asyncio、httpx、aiosqlite、unittest.mock
|
||||
|
||||
章节来源
|
||||
- [tests/conftest.py:19-71](file://tests/conftest.py#L19-L71)
|
||||
- [tests/test_auth.py:25-104](file://tests/test_auth.py#L25-L104)
|
||||
- [tests/test_queries.py:29-154](file://tests/test_queries.py#L29-L154)
|
||||
- [tests/test_citations.py:23-93](file://tests/test_citations.py#L23-L93)
|
||||
- [tests/test_citation_engine.py:6-54](file://tests/test_citation_engine.py#L6-L54)
|
||||
- [backend/requirements.txt:31-35](file://backend/requirements.txt#L31-L35)
|
||||
|
||||
## 架构总览
|
||||
下图展示测试执行路径与关键依赖交互,体现测试如何通过夹具注入、路由访问与服务mock实现可控的测试场景。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Py as "pytest"
|
||||
participant Fix as "conftest夹具"
|
||||
participant AC as "AsyncClient"
|
||||
participant APP as "FastAPI应用"
|
||||
participant DEP as "依赖注入(get_current_user)"
|
||||
participant SVC as "业务服务(被patch)"
|
||||
participant DB as "数据库/缓存"
|
||||
Py->>Fix : "加载会话级fixture"
|
||||
Fix->>APP : "注入依赖覆盖(get_current_user)"
|
||||
Py->>AC : "创建异步HTTP客户端"
|
||||
AC->>APP : "发送API请求"
|
||||
APP->>DEP : "解析JWT并解析当前用户"
|
||||
DEP-->>APP : "返回模拟用户"
|
||||
APP->>SVC : "调用业务逻辑(被patch)"
|
||||
SVC-->>APP : "返回测试数据/异常"
|
||||
APP-->>AC : "响应JSON/状态码"
|
||||
AC-->>Py : "断言结果"
|
||||
APP->>DB : "读写(测试中通常隔离)"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [tests/conftest.py:19-71](file://tests/conftest.py#L19-L71)
|
||||
- [tests/test_auth.py:25-104](file://tests/test_auth.py#L25-L104)
|
||||
- [tests/test_queries.py:29-154](file://tests/test_queries.py#L29-L154)
|
||||
- [tests/test_citations.py:23-93](file://tests/test_citations.py#L23-L93)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/main.py:13-48](file://backend/app/main.py#L13-L48)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 测试命名约定与结构
|
||||
- 文件命名
|
||||
- 使用 test_xxx.py,如 test_auth.py、test_queries.py、test_citations.py、test_citation_engine.py
|
||||
- 函数命名
|
||||
- 使用 test_xxx_pattern,如 test_register_success、test_create_query_exceeds_limit
|
||||
- 夹具命名
|
||||
- 使用 fixture_xxx 或 mock_xxx,如 mock_user、auth_headers、override_get_current_user
|
||||
- 断言风格
|
||||
- 明确断言状态码、响应体字段与业务语义,如“包含特定错误信息”“返回CSV头”
|
||||
- 注释标准
|
||||
- 每个fixture与关键测试函数添加简要说明,解释用途与边界条件
|
||||
|
||||
章节来源
|
||||
- [tests/test_auth.py:11-22](file://tests/test_auth.py#L11-L22)
|
||||
- [tests/test_queries.py:10-26](file://tests/test_queries.py#L10-L26)
|
||||
- [tests/test_citations.py:8-20](file://tests/test_citations.py#L8-L20)
|
||||
- [tests/test_citation_engine.py:3-5](file://tests/test_citation_engine.py#L3-L5)
|
||||
|
||||
### 认证与授权测试
|
||||
- 关键点
|
||||
- 使用依赖覆盖模拟当前用户,确保受保护路由在已认证与未认证两种场景下的行为
|
||||
- 通过patch替换服务层方法,验证成功与失败路径
|
||||
- 推荐断言
|
||||
- 成功:200/201响应、返回必要字段、token类型与用户信息
|
||||
- 失败:4xx状态码、错误消息包含预期关键字
|
||||
- 安全性
|
||||
- 确保未覆盖依赖时,未认证访问返回401
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as "客户端"
|
||||
participant A as "认证路由"
|
||||
participant D as "get_current_user"
|
||||
participant S as "认证服务(被patch)"
|
||||
C->>A : "POST /api/v1/auth/login"
|
||||
A->>D : "解析JWT并解析用户"
|
||||
D-->>A : "返回模拟用户"
|
||||
A->>S : "authenticate_user(...)"
|
||||
S-->>A : "返回用户或None"
|
||||
A-->>C : "200/401 + JSON"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [tests/test_auth.py:62-84](file://tests/test_auth.py#L62-L84)
|
||||
- [tests/conftest.py:42-50](file://tests/conftest.py#L42-L50)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
|
||||
章节来源
|
||||
- [tests/test_auth.py:25-104](file://tests/test_auth.py#L25-L104)
|
||||
- [tests/conftest.py:28-62](file://tests/conftest.py#L28-L62)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
|
||||
### 查询管理测试
|
||||
- 关键点
|
||||
- 使用mock对象构造典型查询实体,覆盖创建、列表、更新、删除、不存在与越权访问
|
||||
- 通过patch触发权限限制与业务异常,验证403/404响应
|
||||
- 推荐断言
|
||||
- 字段一致性、分页总数、状态转换、权限边界
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["开始: 发送请求"]) --> Patch["patch 业务方法"]
|
||||
Patch --> CallAPI["调用 /queries/* 路由"]
|
||||
CallAPI --> Resp{"响应状态码"}
|
||||
Resp --> |201/200| AssertOK["断言返回字段与业务语义"]
|
||||
Resp --> |403/404| AssertErr["断言错误消息与权限边界"]
|
||||
AssertOK --> End(["结束"])
|
||||
AssertErr --> End
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [tests/test_queries.py:30-70](file://tests/test_queries.py#L30-L70)
|
||||
- [tests/test_queries.py:127-139](file://tests/test_queries.py#L127-L139)
|
||||
|
||||
章节来源
|
||||
- [tests/test_queries.py:29-154](file://tests/test_queries.py#L29-L154)
|
||||
|
||||
### 引用数据与报告测试
|
||||
- 关键点
|
||||
- mock服务返回结构化数据,验证列表、统计聚合与CSV导出的响应头与内容
|
||||
- 推荐断言
|
||||
- 统计字段存在性与合理性、CSV内容片段包含关键字段
|
||||
|
||||
章节来源
|
||||
- [tests/test_citations.py:23-93](file://tests/test_citations.py#L23-L93)
|
||||
|
||||
### 引用检测引擎单元测试
|
||||
- 关键点
|
||||
- 测试品牌匹配器的精确、别名、模糊匹配与无匹配场景,以及引用位置与置信度
|
||||
- 测试竞争品牌检测器对不同行业类别的识别
|
||||
- 推荐断言
|
||||
- 匹配结果布尔值、匹配类型、置信度范围、位置编号与上下文片段
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class BrandMatcher {
|
||||
+match(text) dict
|
||||
-_extract_candidates(text) list
|
||||
-_extract_position_and_context(text, keyword) tuple
|
||||
}
|
||||
class CompetitorDetector {
|
||||
+detect(text, target_brand) list
|
||||
}
|
||||
BrandMatcher <.. CompetitorDetector : "组合使用"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [tests/test_citation_engine.py:6-54](file://tests/test_citation_engine.py#L6-L54)
|
||||
- [backend/app/workers/citation_engine.py:19-146](file://backend/app/workers/citation_engine.py#L19-L146)
|
||||
|
||||
章节来源
|
||||
- [tests/test_citation_engine.py:1-54](file://tests/test_citation_engine.py#L1-L54)
|
||||
- [backend/app/workers/citation_engine.py:19-146](file://backend/app/workers/citation_engine.py#L19-L146)
|
||||
|
||||
## 依赖分析
|
||||
- 测试与应用的耦合
|
||||
- 通过依赖覆盖与patch解耦具体实现细节,提升测试稳定性
|
||||
- 会话级mock调度器避免真实任务执行,保证测试幂等性
|
||||
- 外部依赖
|
||||
- PostgreSQL与Redis通过docker-compose提供,测试可选择独立数据库或内存数据库(如aiosqlite)以加速
|
||||
- 日志与可观测性
|
||||
- 引擎与调度器使用标准日志,便于定位问题;Alembic日志级别较低,避免噪声
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
Py["pytest"] --> CF["conftest.py"]
|
||||
CF --> APP["app/main.py"]
|
||||
APP --> SCH["workers/scheduler.py"]
|
||||
SCH --> CE["workers/citation_engine.py"]
|
||||
CE --> DB["PostgreSQL/Redis"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [tests/conftest.py:19-25](file://tests/conftest.py#L19-L25)
|
||||
- [backend/app/main.py:10-21](file://backend/app/main.py#L10-L21)
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
|
||||
章节来源
|
||||
- [tests/conftest.py:19-25](file://tests/conftest.py#L19-L25)
|
||||
- [backend/app/main.py:10-21](file://backend/app/main.py#L10-L21)
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/alembic.ini:115-149](file://backend/alembic.ini#L115-L149)
|
||||
|
||||
## 性能考虑
|
||||
- 测试速度
|
||||
- 使用会话级fixture减少重复初始化成本
|
||||
- 将外部依赖(数据库、缓存)置于容器中,避免每次重启
|
||||
- 并发与异步
|
||||
- 使用pytest-asyncio与AsyncClient,确保异步测试稳定
|
||||
- 覆盖率
|
||||
- 建议优先达到以下目标:行覆盖率≥80%、分支覆盖率≥70%、功能覆盖率≥90%
|
||||
- 对关键路径(认证、查询、引用检测)进行重点覆盖
|
||||
|
||||
## 故障排查指南
|
||||
- 常见问题
|
||||
- 未覆盖依赖导致未认证访问失败:确认在测试中正确注入依赖覆盖
|
||||
- patch目标不匹配:核对被patch的服务方法签名与调用路径
|
||||
- 调度器干扰:确保会话级mock调度器生效
|
||||
- 调试技巧
|
||||
- 使用pytest调试选项:--pdb(进入调试器)、-v(详细输出)、-s(打印日志)
|
||||
- 启用更详细的日志:在测试前设置日志级别,关注引擎与调度器日志
|
||||
- 分离数据库:使用独立测试数据库或内存数据库(如aiosqlite)以避免数据污染
|
||||
- 错误排查步骤
|
||||
- 逐个缩小patch范围,确认业务方法是否被正确替换
|
||||
- 检查JWT生成与解析流程,确保令牌有效
|
||||
- 校验路由前缀与路径参数,避免404
|
||||
|
||||
章节来源
|
||||
- [tests/conftest.py:19-25](file://tests/conftest.py#L19-L25)
|
||||
- [tests/test_auth.py:88-104](file://tests/test_auth.py#L88-L104)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
|
||||
## 结论
|
||||
通过统一的命名与结构规范、完善的夹具与mock策略、清晰的断言与覆盖率目标,以及合理的CI/CD与调试实践,GEO项目可以构建高质量、可维护的测试体系。建议在现有基础上逐步扩展覆盖率,并引入自动化CI流水线以保障质量。
|
||||
|
||||
## 附录
|
||||
|
||||
### 测试覆盖率要求与测量方法
|
||||
- 行覆盖率(Line Coverage)
|
||||
- 目标:≥80%
|
||||
- 方法:使用覆盖率工具(如pytest-cov)生成报告,关注未覆盖的分支与异常路径
|
||||
- 分支覆盖率(Branch Coverage)
|
||||
- 目标:≥70%
|
||||
- 方法:针对if/else、异常处理与权限判断路径分别设计用例
|
||||
- 功能覆盖率(Functional Coverage)
|
||||
- 目标:≥90%
|
||||
- 方法:以API端点与核心算法(品牌匹配、竞争品牌检测)为功能域,确保每个功能域至少有一个正向与一个反向用例
|
||||
|
||||
### 持续集成配置建议
|
||||
- 触发条件
|
||||
- push到主分支与拉取请求
|
||||
- 步骤建议
|
||||
- 安装依赖(后端requirements.txt)
|
||||
- 启动PostgreSQL与Redis(可复用docker-compose)
|
||||
- 运行pytest并生成覆盖率报告
|
||||
- 上传覆盖率与测试报告
|
||||
- 参考文件
|
||||
- 后端依赖清单:[backend/requirements.txt:31-35](file://backend/requirements.txt#L31-L35)
|
||||
- 服务编排:[docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:31-35](file://backend/requirements.txt#L31-L35)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
### 测试环境管理
|
||||
- 数据库
|
||||
- 开发:使用docker-compose提供的PostgreSQL
|
||||
- 测试:可选独立测试库或内存数据库(如aiosqlite),避免跨测试污染
|
||||
- 缓存
|
||||
- Redis用于任务调度与缓存,测试中可直接使用容器版本
|
||||
- 依赖注入
|
||||
- 通过conftest中的依赖覆盖,确保测试中始终使用mock用户与禁用真实调度器
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [tests/conftest.py:19-50](file://tests/conftest.py#L19-L50)
|
||||
|
||||
### 测试数据管理
|
||||
- 建议
|
||||
- 使用fixture生成稳定的UUID与时间戳,确保可重复性
|
||||
- 对于复杂对象(查询、引用记录),在fixture中集中定义字段默认值
|
||||
- 对于CSV导出等场景,使用固定字符串片段进行断言,避免动态内容导致脆弱断言
|
||||
|
||||
章节来源
|
||||
- [tests/test_queries.py:10-26](file://tests/test_queries.py#L10-L26)
|
||||
- [tests/test_citations.py:8-20](file://tests/test_citations.py#L8-L20)
|
||||
- [tests/test_citations.py:76-93](file://tests/test_citations.py#L76-L93)
|
||||
|
||||
### 测试报告生成
|
||||
- 建议
|
||||
- 使用pytest-cov生成HTML或XML报告,便于CI集成
|
||||
- 在CI中保留报告产物,便于回溯历史趋势
|
||||
- 日志配置
|
||||
- Alembic日志级别已较低,可在测试中临时提高日志级别以辅助排查
|
||||
|
||||
章节来源
|
||||
- [backend/alembic.ini:115-149](file://backend/alembic.ini#L115-L149)
|
||||
|
|
@ -0,0 +1,585 @@
|
|||
# 测试策略
|
||||
|
||||
<cite>
|
||||
**本文引用的文件**
|
||||
- [tests/conftest.py](file://tests/conftest.py)
|
||||
- [tests/test_auth.py](file://tests/test_auth.py)
|
||||
- [tests/test_business_flow.py](file://tests/test_business_flow.py)
|
||||
- [tests/test_citation_engine.py](file://tests/test_citation_engine.py)
|
||||
- [tests/test_citations.py](file://tests/test_citations.py)
|
||||
- [tests/test_queries.py](file://tests/test_queries.py)
|
||||
- [tests/test_scheduler.py](file://tests/test_scheduler.py)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/api/deps.py](file://backend/app/api/deps.py)
|
||||
- [backend/app/services/auth.py](file://backend/app/services/auth.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
</cite>
|
||||
|
||||
## 更新摘要
|
||||
**变更内容**
|
||||
- 新增业务流程测试章节,涵盖端到端业务场景测试
|
||||
- 新增调度器测试章节,包括定时任务调度和频率计算测试
|
||||
- 完善测试最佳实践,增加业务流程测试和调度器测试的最佳实践指导
|
||||
- 更新测试策略以反映新增的测试覆盖范围
|
||||
|
||||
## 目录
|
||||
1. [引言](#引言)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [业务流程测试策略](#业务流程测试策略)
|
||||
7. [调度器测试策略](#调度器测试策略)
|
||||
8. [依赖分析](#依赖分析)
|
||||
9. [性能考虑](#性能考虑)
|
||||
10. [故障排查指南](#故障排查指南)
|
||||
11. [结论](#结论)
|
||||
12. [附录](#附录)
|
||||
|
||||
## 引言
|
||||
本测试策略文档面向GEO项目的Pytest测试体系,覆盖单元测试、集成测试和业务流程测试的设计与实施要点。内容包括:测试夹具与模拟对象的组织方式、测试数据管理策略、认证模块、引用引擎、查询处理、业务流程和调度器等关键功能的测试用例设计思路;同时给出测试最佳实践,包括覆盖率目标、持续集成配置建议以及测试环境管理方案,并提供调试技巧与性能测试方法。
|
||||
|
||||
## 项目结构
|
||||
测试目录位于仓库根目录下的tests,采用按功能模块划分的组织方式,配合Pytest的conftest集中式夹具与模拟对象,确保测试隔离与可重复性。后端应用以FastAPI为核心,API层通过依赖注入获取当前用户与数据库会话,服务层封装业务逻辑,工作器(worker)负责异步任务与平台适配。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "测试层"
|
||||
C["tests/conftest.py"]
|
||||
TA["tests/test_auth.py"]
|
||||
TQ["tests/test_queries.py"]
|
||||
TC["tests/test_citations.py"]
|
||||
TCE["tests/test_citation_engine.py"]
|
||||
TB["tests/test_business_flow.py"]
|
||||
TS["tests/test_scheduler.py"]
|
||||
end
|
||||
subgraph "后端应用"
|
||||
M["backend/app/main.py"]
|
||||
D["backend/app/api/deps.py"]
|
||||
DB["backend/app/database.py"]
|
||||
CFG["backend/app/config.py"]
|
||||
AUTH_API["backend/app/api/auth.py"]
|
||||
QUERIES_API["backend/app/api/queries.py"]
|
||||
CITATIONS_API["backend/app/api/citations.py"]
|
||||
CE["backend/app/workers/citation_engine.py"]
|
||||
QS["backend/app/workers/scheduler.py"]
|
||||
end
|
||||
C --> TA
|
||||
C --> TQ
|
||||
C --> TC
|
||||
C --> TCE
|
||||
C --> TB
|
||||
C --> TS
|
||||
TA --> AUTH_API
|
||||
TQ --> QUERIES_API
|
||||
TC --> CITATIONS_API
|
||||
TCE --> CE
|
||||
TS --> QS
|
||||
AUTH_API --> D
|
||||
QUERIES_API --> D
|
||||
CITATIONS_API --> D
|
||||
D --> DB
|
||||
DB --> CFG
|
||||
M --> AUTH_API
|
||||
M --> QUERIES_API
|
||||
M --> CITATIONS_API
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [tests/conftest.py:1-123](file://tests/conftest.py#L1-L123)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/api/deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/scheduler.py:1-182](file://backend/app/workers/scheduler.py#L1-L182)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
**章节来源**
|
||||
- [tests/conftest.py:1-123](file://tests/conftest.py#L1-L123)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
|
||||
## 核心组件
|
||||
- 测试夹具与模拟
|
||||
- 会话级调度器模拟:在测试启动时替换查询调度器,避免真实后台任务影响测试稳定性。
|
||||
- 用户与令牌:提供模拟用户对象、JWT访问令牌及请求头,便于认证相关接口测试。
|
||||
- 异步HTTP客户端:基于ASGI传输创建异步HTTP客户端,用于端到端API测试。
|
||||
- 依赖覆盖:通过依赖注入覆盖当前用户解析逻辑,简化认证流程。
|
||||
- 内存数据库:使用SQLite内存数据库进行集成测试,确保测试隔离性。
|
||||
- 测试数据管理
|
||||
- 使用pytest fixture生成模拟模型对象(如查询、引用记录),保证测试数据一致性与可读性。
|
||||
- 通过patch对服务层函数进行桩替,隔离外部依赖,提升测试确定性。
|
||||
- 直接操作数据库模型进行复杂场景测试,如权限隔离和统计计算。
|
||||
- 测试运行与并发
|
||||
- 使用pytest-asyncio标记异步测试,确保事件循环正确初始化与清理。
|
||||
- 支持并行执行多个测试文件,提高测试执行效率。
|
||||
|
||||
**章节来源**
|
||||
- [tests/conftest.py:19-123](file://tests/conftest.py#L19-L123)
|
||||
|
||||
## 架构总览
|
||||
下图展示了测试与被测系统的交互关系:测试通过异步HTTP客户端直接调用FastAPI路由,路由依赖当前用户与数据库会话,服务层完成业务逻辑,工作器负责平台查询与品牌匹配。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant T as "测试用例"
|
||||
participant AC as "异步HTTP客户端"
|
||||
participant APP as "FastAPI应用"
|
||||
participant R_AUTH as "认证路由"
|
||||
participant R_QUERIES as "查询路由"
|
||||
participant R_CIT as "引用路由"
|
||||
participant DEPS as "依赖注入(get_current_user)"
|
||||
participant SVC as "服务层"
|
||||
participant DB as "数据库"
|
||||
T->>AC : 发起HTTP请求
|
||||
AC->>APP : ASGI传输调用
|
||||
APP->>DEPS : 解析当前用户
|
||||
DEPS->>DB : 查询用户
|
||||
DB-->>DEPS : 返回用户
|
||||
APP->>R_AUTH : 认证相关端点
|
||||
APP->>R_QUERIES : 查询相关端点
|
||||
APP->>R_CIT : 引用相关端点
|
||||
R_AUTH->>SVC : 调用服务
|
||||
R_QUERIES->>SVC : 调用服务
|
||||
R_CIT->>SVC : 调用服务
|
||||
SVC->>DB : 数据持久化/查询
|
||||
DB-->>SVC : 返回结果
|
||||
SVC-->>APP : 返回响应
|
||||
APP-->>AC : 序列化响应
|
||||
AC-->>T : 断言结果
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [tests/conftest.py:117-123](file://tests/conftest.py#L117-L123)
|
||||
- [backend/app/main.py:38-42](file://backend/app/main.py#L38-L42)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/api/auth.py:13-43](file://backend/app/api/auth.py#L13-L43)
|
||||
- [backend/app/api/queries.py:15-86](file://backend/app/api/queries.py#L15-L86)
|
||||
- [backend/app/api/citations.py:25-78](file://backend/app/api/citations.py#L25-L78)
|
||||
- [backend/app/database.py:23-29](file://backend/app/database.py#L23-L29)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 认证模块测试策略
|
||||
- 测试目标
|
||||
- 注册成功与重复邮箱错误处理
|
||||
- 登录成功与密码错误场景
|
||||
- 当前用户信息获取(已认证与未认证)
|
||||
- 关键测试点
|
||||
- 使用patch对注册与登录服务函数进行桩替,控制返回值与异常
|
||||
- 通过依赖覆盖与令牌头验证路由鉴权中间件行为
|
||||
- 用例设计要点
|
||||
- 成功路径断言状态码与响应体字段
|
||||
- 失败路径断言HTTP状态码与错误详情
|
||||
- 未认证路径断言401并检查依赖覆盖清理
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant T as "测试用例"
|
||||
participant AC as "异步HTTP客户端"
|
||||
participant AUTH as "认证路由"
|
||||
participant SVC as "认证服务"
|
||||
participant DEPS as "依赖注入(get_current_user)"
|
||||
T->>AC : POST /api/v1/auth/register
|
||||
AC->>AUTH : 路由处理
|
||||
AUTH->>SVC : 注册服务
|
||||
SVC-->>AUTH : 返回用户或抛出异常
|
||||
AUTH-->>AC : 201/400
|
||||
AC-->>T : 断言
|
||||
T->>AC : POST /api/v1/auth/login
|
||||
AC->>AUTH : 路由处理
|
||||
AUTH->>SVC : 登录服务
|
||||
SVC-->>AUTH : 返回用户或None
|
||||
AUTH-->>AC : 200/401
|
||||
AC-->>T : 断言
|
||||
T->>AC : GET /api/v1/auth/me
|
||||
AC->>AUTH : 路由处理
|
||||
AUTH->>DEPS : 解析当前用户
|
||||
DEPS-->>AUTH : 返回用户或抛出401
|
||||
AUTH-->>AC : 200/401
|
||||
AC-->>T : 断言
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [tests/test_auth.py:25-104](file://tests/test_auth.py#L25-L104)
|
||||
- [backend/app/api/auth.py:13-43](file://backend/app/api/auth.py#L13-L43)
|
||||
- [backend/app/services/auth.py:37-69](file://backend/app/services/auth.py#L37-L69)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
|
||||
**章节来源**
|
||||
- [tests/test_auth.py:1-104](file://tests/test_auth.py#L1-L104)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/api/deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
|
||||
### 引擎与查询处理测试策略
|
||||
- 测试目标
|
||||
- 品牌匹配器:精确、别名、模糊匹配与无匹配场景
|
||||
- 竞争品牌检测器:在文本中识别除目标品牌外的竞争品牌
|
||||
- 引擎执行流程:单平台查询、任务状态更新、记录创建与异常处理
|
||||
- 关键测试点
|
||||
- 单元测试直接构造BrandMatcher与CompetitorDetector,断言匹配结果与置信度、位置等字段
|
||||
- 集成测试通过patch平台适配器返回AI响应,验证引擎整合后的综合输出
|
||||
- 用例设计要点
|
||||
- 文本分段与位置提取:验证引用出现在第几段及上下文截取
|
||||
- 平台适配器替换:确保引擎执行单平台流程时能捕获异常并写入失败记录
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class BrandMatcher {
|
||||
+match(text) dict
|
||||
-_extract_candidates(text) list
|
||||
-_extract_position_and_context(text, keyword) tuple
|
||||
}
|
||||
class CompetitorDetector {
|
||||
+detect(text, target_brand) list
|
||||
}
|
||||
class CitationEngine {
|
||||
+execute_query(query, db) list
|
||||
+execute_single_platform(keyword, platform, target_brand, brand_aliases) dict
|
||||
-_get_or_create_task(db, query_id, platform) QueryTask
|
||||
-_calculate_next_query_at(frequency) datetime
|
||||
+close() void
|
||||
}
|
||||
BrandMatcher <.. CitationEngine : "使用"
|
||||
CompetitorDetector <.. CitationEngine : "使用"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/citation_engine.py:19-120](file://backend/app/workers/citation_engine.py#L19-L120)
|
||||
- [backend/app/workers/citation_engine.py:122-146](file://backend/app/workers/citation_engine.py#L122-L146)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
|
||||
**章节来源**
|
||||
- [tests/test_citation_engine.py:1-127](file://tests/test_citation_engine.py#L1-L127)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
|
||||
### 引用数据与报告测试策略
|
||||
- 测试目标
|
||||
- 引用列表查询与统计聚合
|
||||
- 立即运行查询任务
|
||||
- CSV导出格式与响应头校验
|
||||
- 关键测试点
|
||||
- 通过patch服务函数返回预设数据,断言分页、总数与字段
|
||||
- 校验CSV内容类型、附件头与关键字段存在性
|
||||
- 用例设计要点
|
||||
- 统计聚合断言整体指标与按平台细分
|
||||
- 导出接口断言响应头与正文内容
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant T as "测试用例"
|
||||
participant AC as "异步HTTP客户端"
|
||||
participant CIT as "引用路由"
|
||||
participant REP as "报告路由"
|
||||
participant SVC as "服务层"
|
||||
T->>AC : GET /api/v1/citations/?skip=&limit=
|
||||
AC->>CIT : 路由处理
|
||||
CIT->>SVC : 获取引用列表
|
||||
SVC-->>CIT : 返回(items,total)
|
||||
CIT-->>AC : 200 + JSON
|
||||
AC-->>T : 断言
|
||||
T->>AC : GET /api/v1/citations/stats
|
||||
AC->>CIT : 路由处理
|
||||
CIT->>SVC : 获取统计
|
||||
SVC-->>CIT : 返回统计聚合
|
||||
CIT-->>AC : 200 + JSON
|
||||
AC-->>T : 断言
|
||||
T->>AC : GET /api/v1/reports/export/csv?query_id=
|
||||
AC->>REP : 路由处理
|
||||
REP->>SVC : 导出CSV
|
||||
SVC-->>REP : 返回CSV字符串
|
||||
REP-->>AC : 200 + text/csv + attachment
|
||||
AC-->>T : 断言
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [tests/test_citations.py:23-93](file://tests/test_citations.py#L23-L93)
|
||||
- [backend/app/api/citations.py:25-78](file://backend/app/api/citations.py#L25-L78)
|
||||
|
||||
**章节来源**
|
||||
- [tests/test_citations.py:1-93](file://tests/test_citations.py#L1-L93)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
|
||||
### 查询管理测试策略
|
||||
- 测试目标
|
||||
- 创建查询(含配额限制场景)
|
||||
- 列表查询与分页
|
||||
- 更新查询(字段变更)
|
||||
- 删除查询
|
||||
- 查询不存在与跨用户访问控制
|
||||
- 关键测试点
|
||||
- 通过patch服务函数返回模拟查询对象或抛出权限异常
|
||||
- 断言HTTP状态码与响应体字段
|
||||
- 用例设计要点
|
||||
- 权限异常断言403与错误详情
|
||||
- 跨用户访问断言404(模拟查询归属判定)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant T as "测试用例"
|
||||
participant AC as "异步HTTP客户端"
|
||||
participant Q as "查询路由"
|
||||
participant SVC as "服务层"
|
||||
T->>AC : POST /api/v1/queries/
|
||||
AC->>Q : 路由处理
|
||||
Q->>SVC : 创建查询
|
||||
SVC-->>Q : 返回查询或抛出权限异常
|
||||
Q-->>AC : 201/403
|
||||
AC-->>T : 断言
|
||||
T->>AC : GET /api/v1/queries/?skip=&limit=
|
||||
AC->>Q : 路由处理
|
||||
Q->>SVC : 获取查询列表
|
||||
SVC-->>Q : 返回(items,total)
|
||||
Q-->>AC : 200 + JSON
|
||||
AC-->>T : 断言
|
||||
T->>AC : PUT /api/v1/queries/{id}
|
||||
AC->>Q : 路由处理
|
||||
Q->>SVC : 更新查询
|
||||
SVC-->>Q : 返回更新后的查询或None
|
||||
Q-->>AC : 200/404
|
||||
AC-->>T : 断言
|
||||
T->>AC : DELETE /api/v1/queries/{id}
|
||||
AC->>Q : 路由处理
|
||||
Q->>SVC : 删除查询
|
||||
SVC-->>Q : 返回True/False
|
||||
Q-->>AC : 204/404
|
||||
AC-->>T : 断言
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [tests/test_queries.py:29-154](file://tests/test_queries.py#L29-L154)
|
||||
- [backend/app/api/queries.py:15-86](file://backend/app/api/queries.py#L15-L86)
|
||||
|
||||
**章节来源**
|
||||
- [tests/test_queries.py:1-154](file://tests/test_queries.py#L1-L154)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
|
||||
## 业务流程测试策略
|
||||
|
||||
### 测试目标
|
||||
业务流程测试旨在验证GEO应用的核心业务场景,包括用户完整注册登录流程、查询词生命周期管理、权限隔离机制、配额限制控制、统计计算准确性以及CSV导出功能。
|
||||
|
||||
### 关键测试场景
|
||||
- **完整用户流程**:从注册到登录再到查询管理的端到端流程
|
||||
- **查询生命周期**:创建、更新、暂停、恢复、删除的完整生命周期
|
||||
- **权限隔离**:确保用户间数据完全隔离
|
||||
- **配额限制**:免费用户的查询数量限制验证
|
||||
- **统计准确性**:引用统计数据的正确性验证
|
||||
- **CSV导出**:导出功能的完整性测试
|
||||
|
||||
### 测试实现策略
|
||||
- **用户管理**:通过fixture创建真实用户账户,模拟完整的用户生命周期
|
||||
- **权限测试**:使用两个独立用户账户验证权限隔离机制
|
||||
- **数据验证**:直接操作数据库模型验证统计计算的准确性
|
||||
- **端到端验证**:通过异步HTTP客户端验证完整的业务流程
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant T as "业务流程测试"
|
||||
participant AC as "异步HTTP客户端"
|
||||
participant AUTH as "认证路由"
|
||||
participant QUERIES as "查询路由"
|
||||
participant CITATIONS as "引用路由"
|
||||
participant DB as "数据库"
|
||||
T->>AC : 注册用户
|
||||
AC->>AUTH : POST /api/v1/auth/register
|
||||
AUTH->>DB : 创建用户记录
|
||||
AUTH-->>AC : 201 Created
|
||||
T->>AC : 登录用户
|
||||
AC->>AUTH : POST /api/v1/auth/login
|
||||
AUTH-->>AC : 200 OK + Token
|
||||
T->>AC : 创建查询
|
||||
AC->>QUERIES : POST /api/v1/queries/
|
||||
QUERIES->>DB : 创建查询记录
|
||||
QUERIES-->>AC : 201 Created
|
||||
T->>AC : 验证统计
|
||||
AC->>CITATIONS : GET /api/v1/citations/stats
|
||||
CITATIONS->>DB : 查询引用记录
|
||||
CITATIONS-->>AC : 200 OK + 统计数据
|
||||
AC-->>T : 断言业务流程正确性
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [tests/test_business_flow.py:83-126](file://tests/test_business_flow.py#L83-L126)
|
||||
- [tests/test_business_flow.py:131-186](file://tests/test_business_flow.py#L131-L186)
|
||||
- [tests/test_business_flow.py:192-222](file://tests/test_business_flow.py#L192-L222)
|
||||
- [tests/test_business_flow.py:228-296](file://tests/test_business_flow.py#L228-L296)
|
||||
|
||||
### 测试用例设计要点
|
||||
- **用户隔离**:使用独立fixture创建多个用户,确保权限测试的准确性
|
||||
- **数据完整性**:通过直接操作数据库模型验证统计计算的正确性
|
||||
- **流程完整性**:覆盖业务流程的所有关键节点和异常场景
|
||||
- **边界条件**:测试配额限制、权限边界等关键边界条件
|
||||
|
||||
**章节来源**
|
||||
- [tests/test_business_flow.py:1-441](file://tests/test_business_flow.py#L1-L441)
|
||||
|
||||
## 调度器测试策略
|
||||
|
||||
### 测试目标
|
||||
调度器测试专注于验证查询调度器的定时任务执行能力,包括调度器的启动/关闭、查询任务筛选机制、频率计算逻辑以及遗留任务处理功能。
|
||||
|
||||
### 关键测试场景
|
||||
- **调度器生命周期**:启动、正常运行和优雅关闭
|
||||
- **查询筛选机制**:仅执行活跃且到期的查询任务
|
||||
- **频率计算**:daily和weekly频率的next_query_at计算
|
||||
- **遗留任务处理**:处理超过1分钟未执行的pending任务
|
||||
- **异常处理**:查询执行失败时的异常处理和日志记录
|
||||
|
||||
### 测试实现策略
|
||||
- **调度器控制**:通过patch替换真实的APScheduler,使用AsyncMock控制调度器行为
|
||||
- **数据库隔离**:使用独立的测试会话,确保调度器测试不影响其他测试
|
||||
- **时间控制**:通过精确的时间戳控制查询的到期状态
|
||||
- **频率验证**:使用datetime.utcnow()进行精确的时间计算验证
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class QueryScheduler {
|
||||
+start() void
|
||||
+check_and_execute_queries() void
|
||||
+check_and_execute_pending_tasks() void
|
||||
+shutdown() void
|
||||
-_run_check() void
|
||||
-_run_pending_tasks_check() void
|
||||
-_execute_single_query(query, db) void
|
||||
}
|
||||
class CitationEngine {
|
||||
+execute_query(query, db) list
|
||||
+execute_single_platform(keyword, platform, target_brand, brand_aliases) dict
|
||||
}
|
||||
class AsyncIOScheduler {
|
||||
+add_job(job, trigger, id, name) void
|
||||
+start() void
|
||||
+shutdown() void
|
||||
}
|
||||
QueryScheduler --> CitationEngine : "调用"
|
||||
QueryScheduler --> AsyncIOScheduler : "使用"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/scheduler.py:27-182](file://backend/app/workers/scheduler.py#L27-L182)
|
||||
|
||||
### 测试用例设计要点
|
||||
- **调度器生命周期**:验证调度器启动时添加的定时任务和名称
|
||||
- **查询筛选**:通过创建不同状态和到期时间的查询验证筛选逻辑
|
||||
- **频率计算**:使用绝对误差容差验证next_query_at的计算精度
|
||||
- **遗留任务处理**:验证pending任务的兜底处理机制
|
||||
- **异常处理**:确保查询执行失败时不会中断整个调度流程
|
||||
|
||||
**章节来源**
|
||||
- [tests/test_scheduler.py:1-123](file://tests/test_scheduler.py#L1-L123)
|
||||
- [backend/app/workers/scheduler.py:1-182](file://backend/app/workers/scheduler.py#L1-L182)
|
||||
|
||||
## 依赖分析
|
||||
- 测试与被测模块耦合
|
||||
- 测试通过ASGI传输直接调用路由,避免引入额外适配层
|
||||
- 通过依赖覆盖与patch解耦服务层与数据库、第三方平台
|
||||
- 业务流程测试直接操作数据库模型,确保测试数据的准确性
|
||||
- 外部依赖与集成点
|
||||
- 数据库:通过异步引擎与会话管理,测试中可使用内存数据库或独立测试库
|
||||
- JWT:通过服务层令牌生成与校验,测试中直接构造令牌头
|
||||
- 平台适配器:通过patch替换,避免真实网络请求
|
||||
- 调度器:通过patch替换真实的APScheduler,使用AsyncMock控制调度行为
|
||||
- 循环依赖与风险
|
||||
- 当前结构清晰,无明显循环依赖;注意在测试中避免对真实调度器的依赖
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
T_AUTH["测试: 认证"] --> A_AUTH["路由: 认证"]
|
||||
T_QUERIES["测试: 查询"] --> A_QUERIES["路由: 查询"]
|
||||
T_CIT["测试: 引用"] --> A_CIT["路由: 引用"]
|
||||
T_BUSINESS["测试: 业务流程"] --> A_QUERIES
|
||||
T_BUSINESS --> A_CIT
|
||||
T_SCHED["测试: 调度器"] --> QS["调度器: QueryScheduler"]
|
||||
A_AUTH --> S_AUTH["服务: 认证"]
|
||||
A_QUERIES --> S_QUERY["服务: 查询"]
|
||||
A_CIT --> S_CIT["服务: 引用"]
|
||||
S_AUTH --> DB["数据库"]
|
||||
S_QUERY --> DB
|
||||
S_CIT --> DB
|
||||
QS --> CE["引擎: CitationEngine"]
|
||||
QS --> DB
|
||||
DB --> CFG["配置"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [tests/test_auth.py:1-104](file://tests/test_auth.py#L1-L104)
|
||||
- [tests/test_business_flow.py:1-441](file://tests/test_business_flow.py#L1-L441)
|
||||
- [tests/test_scheduler.py:1-123](file://tests/test_scheduler.py#L1-L123)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/workers/scheduler.py:1-182](file://backend/app/workers/scheduler.py#L1-L182)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
## 性能考虑
|
||||
- 测试并发与资源
|
||||
- 使用pytest-asyncio并行执行异步测试,减少总耗时
|
||||
- 通过会话级调度器模拟避免真实后台任务带来的不稳定因素
|
||||
- 业务流程测试使用内存数据库,避免磁盘I/O开销
|
||||
- 数据库与缓存
|
||||
- 建议使用独立测试数据库实例,避免与开发/生产数据冲突
|
||||
- 对高频查询场景,可在测试中模拟数据库延迟,评估路由与服务层的超时与重试策略
|
||||
- 调度器测试使用AsyncMock,避免真实的定时任务执行
|
||||
- 接口响应与序列化
|
||||
- 对大列表与统计聚合接口,关注JSON序列化开销与分页参数边界
|
||||
- 业务流程测试中直接操作数据库模型,避免不必要的API调用
|
||||
- 平台适配器性能
|
||||
- 通过patch模拟不同响应时延与错误率,评估引擎的容错与降级策略
|
||||
- 调度器测试中使用精确的时间控制,避免真实的等待时间
|
||||
|
||||
## 故障排查指南
|
||||
- 常见问题定位
|
||||
- 认证失败:检查令牌生成与头设置、依赖覆盖是否生效
|
||||
- 404查询:确认查询ID与用户归属,检查服务层查询逻辑
|
||||
- 403配额:检查服务层权限异常抛出与HTTP状态映射
|
||||
- 调度器异常:检查APScheduler的启动状态和job配置
|
||||
- 业务流程失败:检查数据库事务和fixture的使用
|
||||
- 调试技巧
|
||||
- 在conftest中临时打印依赖解析过程,定位get_current_user解析失败原因
|
||||
- 使用pytest的-v与-s选项查看详细输出,结合patch的side_effect观察异常传播
|
||||
- 对数据库相关测试,开启SQLAlchemy echo以查看生成的SQL
|
||||
- 调度器测试中使用AsyncMock的assert_called_once()验证调度器行为
|
||||
- 性能与稳定性
|
||||
- 对于长时间运行的异步测试,确保事件循环正确关闭
|
||||
- 对需要真实网络请求的场景,优先使用patch模拟,必要时增加超时与重试
|
||||
- 业务流程测试中合理使用fixture,避免重复创建昂贵的对象
|
||||
|
||||
## 结论
|
||||
本测试策略以Pytest为核心,结合会话级调度器模拟、依赖覆盖与patch技术,实现了对认证、查询、引用、引擎模块以及业务流程和调度器的全面覆盖。通过明确的夹具与测试数据管理,确保测试的可维护性与可重复性。新增的业务流程测试和调度器测试进一步完善了测试体系,涵盖了端到端业务场景和定时任务调度的关键功能。建议在CI中启用并行执行与覆盖率统计,并为数据库与平台适配器建立稳定的模拟层,持续提升测试效率与质量。
|
||||
|
||||
## 附录
|
||||
- 测试覆盖率要求建议
|
||||
- 语句覆盖率:≥80%
|
||||
- 分支覆盖率:≥70%
|
||||
- 行覆盖率:≥80%
|
||||
- 函数/方法覆盖率:≥90%
|
||||
- 业务流程覆盖率:≥95%
|
||||
- 调度器覆盖率:≥90%
|
||||
- 持续集成配置建议
|
||||
- 使用GitHub Actions或GitLab CI,包含Python版本矩阵、依赖安装、数据库准备、pytest执行与覆盖率上传
|
||||
- 将测试与lint、类型检查并行,确保主干分支质量
|
||||
- 为业务流程测试和调度器测试单独配置执行时间限制
|
||||
- 测试环境管理
|
||||
- 使用独立测试数据库与Redis实例,避免污染
|
||||
- 通过环境变量切换测试配置,确保敏感信息不泄露
|
||||
- 业务流程测试使用内存数据库,调度器测试使用AsyncMock
|
||||
- 性能测试方法
|
||||
- 使用pytest-benchmark或locust对高频路由进行基准测试
|
||||
- 对引擎执行流程进行压力测试,评估平台适配器与数据库写入瓶颈
|
||||
- 调度器测试中使用时间控制和AsyncMock,避免真实的定时等待
|
||||
- 业务流程测试中评估端到端流程的响应时间和吞吐量
|
||||
|
|
@ -0,0 +1,365 @@
|
|||
# 集成测试
|
||||
|
||||
<cite>
|
||||
**本文引用的文件**
|
||||
- [tests/conftest.py](file://tests/conftest.py)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [tests/test_auth.py](file://tests/test_auth.py)
|
||||
- [tests/test_queries.py](file://tests/test_queries.py)
|
||||
- [tests/test_citations.py](file://tests/test_citations.py)
|
||||
- [backend/app/api/deps.py](file://backend/app/api/deps.py)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/services/auth.py](file://backend/app/services/auth.py)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件面向GEO项目的集成测试,系统性阐述端到端测试的实现方法与最佳实践,覆盖以下方面:
|
||||
- FastAPI应用测试:通过ASGI传输层在内存中启动应用,使用异步HTTP客户端进行端到端请求验证。
|
||||
- 数据库连接测试:基于异步SQLAlchemy引擎与会话管理,确保依赖注入与数据库交互正确。
|
||||
- 外部服务集成测试:通过依赖注入覆盖与模拟(patch)实现对外部平台(如wenxin、kimi)的模拟调用。
|
||||
- 测试环境配置:测试数据库、异步客户端、依赖注入覆盖与后台调度器的处理策略。
|
||||
- 完整用户工作流:从认证到查询创建、执行到结果返回的端到端流程验证。
|
||||
- 数据库事务与测试数据隔离:通过依赖注入覆盖与会话生命周期管理保障隔离性。
|
||||
- 性能与负载测试:提供可扩展的测试方法与建议。
|
||||
|
||||
## 项目结构
|
||||
GEO采用前后端分离架构,后端使用FastAPI + SQLAlchemy异步ORM,测试位于tests目录,通过pytest与pytest-asyncio驱动异步测试。数据库与缓存服务通过Docker Compose编排,便于本地与CI环境一致化。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "测试环境"
|
||||
TC["测试客户端<br/>AsyncClient(ASGI)"]
|
||||
CFG["测试配置<br/>Settings(.env)"]
|
||||
DC["Docker Compose<br/>db, redis, backend, frontend"]
|
||||
end
|
||||
subgraph "后端"
|
||||
APP["FastAPI 应用<br/>app.main"]
|
||||
DEPS["依赖注入<br/>api.deps"]
|
||||
AUTH["认证路由<br/>api.auth"]
|
||||
QUERIES["查询路由<br/>api.queries"]
|
||||
CITATIONS["引用路由<br/>api.citations"]
|
||||
SCHED["调度器<br/>workers.scheduler"]
|
||||
DB["数据库引擎/会话<br/>database.py"]
|
||||
end
|
||||
DC --> APP
|
||||
CFG --> APP
|
||||
TC --> APP
|
||||
APP --> AUTH
|
||||
APP --> QUERIES
|
||||
APP --> CITATIONS
|
||||
APP --> SCHED
|
||||
AUTH --> DB
|
||||
QUERIES --> DB
|
||||
CITATIONS --> DB
|
||||
DEPS --> DB
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/api/deps.py:1-43](file://backend/app/api/deps.py#L1-L43)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
## 核心组件
|
||||
- 异步HTTP客户端与ASGI传输层:通过ASGITransport在内存中启动FastAPI应用,避免真实网络开销,提升测试速度与稳定性。
|
||||
- 依赖注入覆盖:使用app.dependency_overrides在测试中替换真实依赖(如认证用户),以模拟不同业务场景。
|
||||
- 认证与令牌:通过create_access_token生成JWT,配合OAuth2PasswordBearer在路由中校验令牌。
|
||||
- 数据库引擎与会话:异步SQLAlchemy引擎与会话工厂,确保测试中数据库交互的可控性与隔离性。
|
||||
- 后台调度器:QueryScheduler负责周期性检查并触发查询任务,测试中通过mock屏蔽真实调度,防止后台任务干扰测试。
|
||||
|
||||
章节来源
|
||||
- [tests/conftest.py:19-71](file://tests/conftest.py#L19-L71)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/services/auth.py:24-34](file://backend/app/services/auth.py#L24-L34)
|
||||
- [backend/app/database.py:6-29](file://backend/app/database.py#L6-L29)
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
|
||||
## 架构总览
|
||||
下图展示测试视角下的端到端调用链路,从异步客户端发起请求,经由FastAPI路由、依赖注入、服务层,最终访问数据库或外部平台(通过模拟)。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Test as "测试客户端"
|
||||
participant App as "FastAPI 应用"
|
||||
participant Dep as "依赖注入<br/>get_current_user"
|
||||
participant Auth as "认证路由<br/>/api/v1/auth/*"
|
||||
participant Queries as "查询路由<br/>/api/v1/queries/*"
|
||||
participant Citations as "引用路由<br/>/api/v1/citations/*"
|
||||
participant DB as "数据库引擎/会话"
|
||||
Test->>App : "POST /api/v1/auth/register"
|
||||
App->>Dep : "解析令牌/校验用户"
|
||||
App->>Auth : "注册逻辑"
|
||||
Auth->>DB : "写入用户"
|
||||
DB-->>Auth : "提交成功"
|
||||
Auth-->>Test : "201 用户信息"
|
||||
Test->>App : "POST /api/v1/auth/login"
|
||||
App->>Auth : "登录校验"
|
||||
Auth-->>Test : "200 {access_token}"
|
||||
Test->>App : "POST /api/v1/queries/"
|
||||
App->>Dep : "校验令牌并解析用户"
|
||||
App->>Queries : "创建查询"
|
||||
Queries->>DB : "插入查询记录"
|
||||
DB-->>Queries : "提交成功"
|
||||
Queries-->>Test : "201 查询详情"
|
||||
Test->>App : "GET /api/v1/citations/?query_id=..."
|
||||
App->>Dep : "校验令牌并解析用户"
|
||||
App->>Citations : "查询引用数据"
|
||||
Citations->>DB : "读取引用记录"
|
||||
DB-->>Citations : "返回数据"
|
||||
Citations-->>Test : "200 引用列表"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [tests/test_auth.py:25-104](file://tests/test_auth.py#L25-L104)
|
||||
- [tests/test_queries.py:29-154](file://tests/test_queries.py#L29-L154)
|
||||
- [tests/test_citations.py:23-93](file://tests/test_citations.py#L23-L93)
|
||||
- [backend/app/api/auth.py:13-43](file://backend/app/api/auth.py#L13-L43)
|
||||
- [backend/app/api/queries.py:15-86](file://backend/app/api/queries.py#L15-L86)
|
||||
- [backend/app/api/citations.py:25-78](file://backend/app/api/citations.py#L25-L78)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/database.py:23-29](file://backend/app/database.py#L23-L29)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 测试环境与配置
|
||||
- 测试数据库与外部服务:通过Docker Compose启动PostgreSQL与Redis,确保测试与生产环境一致;数据库URL与Redis URL在配置类中定义,便于在测试中覆盖。
|
||||
- 异步客户端:使用ASGITransport在内存中启动FastAPI应用,避免网络抖动对测试的影响。
|
||||
- 依赖注入覆盖:在conftest中定义override_get_current_user,使所有需要认证的路由在测试中自动获得模拟用户。
|
||||
- 调度器屏蔽:通过mock_scheduler在测试期间禁用真实调度器,防止后台任务影响测试稳定性。
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:4-51](file://docker-compose.yml#L4-L51)
|
||||
- [backend/app/config.py:7-14](file://backend/app/config.py#L7-L14)
|
||||
- [tests/conftest.py:19-71](file://tests/conftest.py#L19-L71)
|
||||
|
||||
### 认证与用户工作流测试
|
||||
- 注册与登录:通过patch模拟注册与登录服务,断言状态码与响应字段,确保认证流程正确。
|
||||
- 当前用户信息:在依赖注入覆盖下,调用/me接口返回当前用户信息。
|
||||
- 未认证访问:移除依赖注入覆盖后,访问受保护路由应返回401。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as "测试客户端"
|
||||
participant A as "认证路由"
|
||||
participant S as "认证服务"
|
||||
participant D as "数据库"
|
||||
C->>A : "POST /api/v1/auth/register"
|
||||
A->>S : "register_user()"
|
||||
S->>D : "查询邮箱是否存在"
|
||||
S->>D : "插入新用户"
|
||||
D-->>S : "提交成功"
|
||||
S-->>A : "返回用户"
|
||||
A-->>C : "201 Created"
|
||||
C->>A : "POST /api/v1/auth/login"
|
||||
A->>S : "authenticate_user()"
|
||||
S->>D : "查询用户并校验密码"
|
||||
D-->>S : "返回用户"
|
||||
S-->>A : "生成JWT"
|
||||
A-->>C : "200 {access_token}"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [tests/test_auth.py:25-104](file://tests/test_auth.py#L25-L104)
|
||||
- [backend/app/api/auth.py:13-43](file://backend/app/api/auth.py#L13-L43)
|
||||
- [backend/app/services/auth.py:37-69](file://backend/app/services/auth.py#L37-L69)
|
||||
|
||||
章节来源
|
||||
- [tests/test_auth.py:25-104](file://tests/test_auth.py#L25-L104)
|
||||
- [backend/app/api/auth.py:13-43](file://backend/app/api/auth.py#L13-L43)
|
||||
- [backend/app/services/auth.py:37-69](file://backend/app/services/auth.py#L37-L69)
|
||||
|
||||
### 查询管理端到端测试
|
||||
- 创建查询:在依赖注入覆盖与令牌头下,调用创建接口,断言返回字段与状态码。
|
||||
- 列表与详情:通过patch模拟服务层返回值,断言分页与字段一致性。
|
||||
- 更新与删除:断言更新后的字段与删除后的状态码。
|
||||
- 权限限制:当超过配额时,断言403错误与错误信息。
|
||||
- 资源不存在:断言404错误与错误信息。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as "测试客户端"
|
||||
participant Q as "查询路由"
|
||||
participant S as "查询服务"
|
||||
participant D as "数据库"
|
||||
C->>Q : "POST /api/v1/queries/"
|
||||
Q->>S : "create_query()"
|
||||
S->>D : "插入查询记录"
|
||||
D-->>S : "提交成功"
|
||||
S-->>Q : "返回查询"
|
||||
Q-->>C : "201 Created"
|
||||
C->>Q : "GET /api/v1/queries/?skip=&limit="
|
||||
Q->>S : "get_queries()"
|
||||
S->>D : "查询记录"
|
||||
D-->>S : "返回列表"
|
||||
S-->>Q : "返回分页"
|
||||
Q-->>C : "200 OK"
|
||||
C->>Q : "PUT /api/v1/queries/{id}"
|
||||
Q->>S : "update_query()"
|
||||
S->>D : "更新记录"
|
||||
D-->>S : "提交成功"
|
||||
S-->>Q : "返回更新后记录"
|
||||
Q-->>C : "200 OK"
|
||||
C->>Q : "DELETE /api/v1/queries/{id}"
|
||||
Q->>S : "delete_query()"
|
||||
S->>D : "删除记录"
|
||||
D-->>S : "提交成功"
|
||||
S-->>Q : "返回True"
|
||||
Q-->>C : "204 No Content"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [tests/test_queries.py:29-154](file://tests/test_queries.py#L29-L154)
|
||||
- [backend/app/api/queries.py:15-86](file://backend/app/api/queries.py#L15-L86)
|
||||
|
||||
章节来源
|
||||
- [tests/test_queries.py:29-154](file://tests/test_queries.py#L29-L154)
|
||||
- [backend/app/api/queries.py:15-86](file://backend/app/api/queries.py#L15-L86)
|
||||
|
||||
### 引用数据与统计导出测试
|
||||
- 引用列表:断言分页、字段与过滤参数生效。
|
||||
- 统计信息:断言总量、引用率、按平台分布与趋势数据。
|
||||
- CSV导出:断言内容类型、附件头与CSV内容片段。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as "测试客户端"
|
||||
participant R as "引用路由"
|
||||
participant RS as "引用服务"
|
||||
participant D as "数据库"
|
||||
C->>R : "GET /api/v1/citations/?query_id=&platform=&start_date=&end_date=&skip=&limit="
|
||||
R->>RS : "get_citations()"
|
||||
RS->>D : "查询引用记录"
|
||||
D-->>RS : "返回列表"
|
||||
RS-->>R : "返回分页"
|
||||
R-->>C : "200 OK"
|
||||
C->>R : "GET /api/v1/citations/stats?query_id="
|
||||
R->>RS : "get_citation_stats()"
|
||||
RS->>D : "聚合统计"
|
||||
D-->>RS : "返回统计"
|
||||
RS-->>R : "返回统计"
|
||||
R-->>C : "200 OK"
|
||||
C->>R : "GET /api/v1/reports/export/csv?query_id="
|
||||
R->>RS : "export_citations_csv()"
|
||||
RS-->>R : "返回CSV内容"
|
||||
R-->>C : "200 OK, text/csv, attachment"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [tests/test_citations.py:23-93](file://tests/test_citations.py#L23-L93)
|
||||
- [backend/app/api/citations.py:25-78](file://backend/app/api/citations.py#L25-L78)
|
||||
|
||||
章节来源
|
||||
- [tests/test_citations.py:23-93](file://tests/test_citations.py#L23-L93)
|
||||
- [backend/app/api/citations.py:25-78](file://backend/app/api/citations.py#L25-L78)
|
||||
|
||||
### 数据库事务管理与测试数据隔离
|
||||
- 会话生命周期:get_db提供异步会话生成与关闭,确保每次请求在独立会话中执行,避免跨请求污染。
|
||||
- 依赖注入覆盖:通过app.dependency_overrides在测试中替换get_current_user,保证路由依赖始终可用且可控。
|
||||
- 事务隔离:测试中通过patch模拟服务层,不直接写入真实数据;若需真实写入,应在测试事务中回滚或使用独立测试库。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["测试开始"]) --> Override["依赖注入覆盖<br/>get_current_user"]
|
||||
Override --> Session["获取异步会话<br/>get_db()"]
|
||||
Session --> Exec["执行路由/服务逻辑"]
|
||||
Exec --> Commit{"提交/回滚?"}
|
||||
Commit --> |提交| Close["关闭会话"]
|
||||
Commit --> |回滚| Close
|
||||
Close --> End(["测试结束"])
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [tests/conftest.py:42-50](file://tests/conftest.py#L42-L50)
|
||||
- [backend/app/database.py:23-29](file://backend/app/database.py#L23-L29)
|
||||
|
||||
章节来源
|
||||
- [tests/conftest.py:42-50](file://tests/conftest.py#L42-L50)
|
||||
- [backend/app/database.py:23-29](file://backend/app/database.py#L23-L29)
|
||||
|
||||
### 外部服务集成测试
|
||||
- 平台模拟:通过patch将平台调用替换为模拟实现,断言触发查询任务与返回的任务信息。
|
||||
- 调度器屏蔽:在测试期间mock调度器,避免真实后台任务执行,确保测试确定性。
|
||||
|
||||
章节来源
|
||||
- [tests/test_citations.py:75-93](file://tests/test_citations.py#L75-L93)
|
||||
- [tests/conftest.py:19-26](file://tests/conftest.py#L19-L26)
|
||||
|
||||
## 依赖分析
|
||||
- 应用生命周期:FastAPI应用在lifespan中启动调度器,并在关闭时优雅shutdown。
|
||||
- 路由依赖:认证与查询/引用路由均依赖get_current_user与get_db,形成清晰的依赖链。
|
||||
- 服务层:认证、查询、引用服务分别封装业务逻辑,便于在测试中通过patch进行替换。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
APP["FastAPI 应用"] --> LIFE["lifespan 启停调度器"]
|
||||
APP --> AUTH["认证路由"]
|
||||
APP --> QUERIES["查询路由"]
|
||||
APP --> CITATIONS["引用路由"]
|
||||
AUTH --> DEPS["get_current_user"]
|
||||
QUERIES --> DEPS
|
||||
CITATIONS --> DEPS
|
||||
DEPS --> DB["get_db/AsyncSession"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/database.py:23-29](file://backend/app/database.py#L23-L29)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:13-22](file://backend/app/main.py#L13-L22)
|
||||
- [backend/app/api/deps.py:16-43](file://backend/app/api/deps.py#L16-L43)
|
||||
- [backend/app/database.py:23-29](file://backend/app/database.py#L23-L29)
|
||||
|
||||
## 性能考虑
|
||||
- 内存测试:使用ASGI传输层与异步客户端,避免网络开销,提升测试吞吐量。
|
||||
- 调度器控制:在测试中禁用真实调度器,减少后台任务对测试性能的影响。
|
||||
- 数据库隔离:通过独立测试库或事务回滚策略,避免并发测试导致的数据竞争。
|
||||
- 负载测试建议:可在现有测试基础上扩展压力测试脚本,结合异步客户端批量发送请求,监控响应时间与错误率;注意在测试环境中使用独立数据库实例以避免影响生产数据。
|
||||
|
||||
## 故障排查指南
|
||||
- 401未认证:确认测试中是否正确设置依赖注入覆盖与Authorization头。
|
||||
- 403权限不足:检查配额限制逻辑与用户计划,确保测试数据符合预期。
|
||||
- 404资源不存在:确认查询ID与用户归属,确保服务层返回None时路由抛出404。
|
||||
- 调度器干扰:若出现不可预期的后台任务行为,确认是否启用了mock_scheduler。
|
||||
|
||||
章节来源
|
||||
- [tests/test_auth.py:88-104](file://tests/test_auth.py#L88-L104)
|
||||
- [tests/test_queries.py:50-71](file://tests/test_queries.py#L50-L71)
|
||||
- [tests/test_queries.py:127-154](file://tests/test_queries.py#L127-L154)
|
||||
- [tests/conftest.py:19-26](file://tests/conftest.py#L19-L26)
|
||||
|
||||
## 结论
|
||||
本文档提供了GEO项目集成测试的完整方法论与实施细节,涵盖FastAPI应用测试、数据库连接、外部服务模拟、端到端用户工作流验证、事务与隔离策略以及性能与负载测试建议。通过依赖注入覆盖与异步客户端,测试具备高可靠性与可维护性;借助Docker Compose与配置类,测试环境与生产环境保持一致,便于持续集成与部署。
|
||||
|
||||
## 附录
|
||||
- 建议在CI中增加:
|
||||
- 数据库迁移与初始化脚本的集成测试。
|
||||
- Redis健康检查与连接超时重试策略验证。
|
||||
- 前后端联调测试,确保API与前端交互稳定。
|
||||
|
|
@ -0,0 +1,685 @@
|
|||
# Docker容器化部署
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [backend/Dockerfile](file://backend/Dockerfile)
|
||||
- [frontend/Dockerfile](file://frontend/Dockerfile)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
- [frontend/package.json](file://frontend/package.json)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/alembic.ini](file://backend/alembic.ini)
|
||||
- [frontend/next.config.mjs](file://frontend/next.config.mjs)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构概览](#架构概览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖关系分析](#依赖关系分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
|
||||
GEO项目是一个基于FastAPI和Next.js的引文检索平台,采用Docker容器化部署方案。该部署方案提供了完整的微服务架构,包括PostgreSQL数据库、Redis缓存、后端API服务和前端Web应用。
|
||||
|
||||
本项目的核心目标是通过容器化技术实现开发环境的一致性和生产环境的可移植性,同时确保各服务之间的可靠通信和数据持久化。
|
||||
|
||||
## 项目结构
|
||||
|
||||
项目采用多容器架构,包含四个主要服务:
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "Docker Compose编排"
|
||||
DC[docker-compose.yml]
|
||||
subgraph "数据库层"
|
||||
DB[PostgreSQL 15]
|
||||
RD[Redis 7]
|
||||
end
|
||||
subgraph "应用层"
|
||||
BE[FastAPI 后端]
|
||||
FE[Next.js 前端]
|
||||
end
|
||||
subgraph "存储卷"
|
||||
PV[postgres_data]
|
||||
RV[redis_data]
|
||||
end
|
||||
end
|
||||
DC --> DB
|
||||
DC --> RD
|
||||
DC --> BE
|
||||
DC --> FE
|
||||
DB --> PV
|
||||
RD --> RV
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
**章节来源**
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
## 核心组件
|
||||
|
||||
### 数据库服务 (PostgreSQL)
|
||||
|
||||
数据库服务使用PostgreSQL 15-alpine镜像,配置了完整的环境变量和健康检查机制:
|
||||
|
||||
- **镜像版本**: postgres:15-alpine
|
||||
- **容器名称**: geo_db
|
||||
- **端口映射**: 5432:5432
|
||||
- **数据持久化**: 使用postgres_data命名卷
|
||||
- **环境变量**: 用户名、密码、数据库名
|
||||
- **健康检查**: 每5秒检查一次数据库连接
|
||||
|
||||
### 缓存服务 (Redis)
|
||||
|
||||
Redis服务提供高性能的键值存储和任务队列功能:
|
||||
|
||||
- **镜像版本**: redis:7-alpine
|
||||
- **容器名称**: geo_redis
|
||||
- **端口映射**: 6379:6379
|
||||
- **数据持久化**: 使用redis_data命名卷
|
||||
- **健康检查**: PING命令测试
|
||||
|
||||
### 后端服务 (FastAPI)
|
||||
|
||||
后端服务基于Python 3.11-slim,提供RESTful API接口:
|
||||
|
||||
- **基础镜像**: python:3.11-slim
|
||||
- **容器名称**: geo_backend
|
||||
- **端口映射**: 8000:8000
|
||||
- **开发模式**: 支持代码热重载
|
||||
- **依赖安装**: Playwright浏览器支持
|
||||
|
||||
### 前端服务 (Next.js)
|
||||
|
||||
前端服务提供现代化的用户界面:
|
||||
|
||||
- **基础镜像**: node:20-alpine
|
||||
- **容器名称**: geo_frontend
|
||||
- **端口映射**: 3000:3000
|
||||
- **开发模式**: Next.js开发服务器
|
||||
- **依赖管理**: npm包管理器
|
||||
|
||||
**章节来源**
|
||||
- [docker-compose.yml:3-71](file://docker-compose.yml#L3-L71)
|
||||
|
||||
## 架构概览
|
||||
|
||||
系统采用分层架构设计,确保服务间的松耦合和高内聚:
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "客户端层"
|
||||
Browser[Web浏览器]
|
||||
Mobile[移动应用]
|
||||
end
|
||||
subgraph "API网关层"
|
||||
Nginx[Nginx反向代理]
|
||||
end
|
||||
subgraph "应用服务层"
|
||||
subgraph "后端服务"
|
||||
Auth[认证服务]
|
||||
Query[查询服务]
|
||||
Citation[引文服务]
|
||||
Report[报告服务]
|
||||
end
|
||||
subgraph "任务处理层"
|
||||
Scheduler[调度器]
|
||||
Worker[工作进程]
|
||||
end
|
||||
end
|
||||
subgraph "数据存储层"
|
||||
subgraph "数据库"
|
||||
PostgreSQL[PostgreSQL]
|
||||
Redis[Redis缓存]
|
||||
end
|
||||
subgraph "文件存储"
|
||||
MinIO[S3兼容存储]
|
||||
end
|
||||
end
|
||||
Browser --> Nginx
|
||||
Mobile --> Nginx
|
||||
Nginx --> Auth
|
||||
Nginx --> Query
|
||||
Nginx --> Citation
|
||||
Nginx --> Report
|
||||
Auth --> PostgreSQL
|
||||
Query --> PostgreSQL
|
||||
Citation --> PostgreSQL
|
||||
Report --> PostgreSQL
|
||||
Query --> Redis
|
||||
Citation --> Redis
|
||||
Report --> Redis
|
||||
Scheduler --> PostgreSQL
|
||||
Scheduler --> Redis
|
||||
Worker --> PostgreSQL
|
||||
Worker --> Redis
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/main.py:24-48](file://backend/app/main.py#L24-L48)
|
||||
- [backend/app/config.py:7-8](file://backend/app/config.py#L7-L8)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### Docker Compose配置详解
|
||||
|
||||
#### 服务依赖关系
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant DB as "PostgreSQL"
|
||||
participant Redis as "Redis"
|
||||
participant Backend as "FastAPI"
|
||||
participant Frontend as "Next.js"
|
||||
Note over DB,Redis : 服务启动顺序
|
||||
DB->>DB : 初始化数据库
|
||||
Redis->>Redis : 启动缓存服务
|
||||
Backend->>DB : 等待数据库就绪
|
||||
Backend->>Redis : 等待缓存就绪
|
||||
Backend->>Backend : 启动Uvicorn服务
|
||||
Frontend->>Backend : 发起API请求
|
||||
Frontend->>Frontend : 启动开发服务器
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [docker-compose.yml:46-66](file://docker-compose.yml#L46-L66)
|
||||
|
||||
#### 网络配置
|
||||
|
||||
Docker Compose自动创建隔离的网络环境,各服务通过服务名进行内部通信:
|
||||
|
||||
- **内部DNS**: 服务间通过服务名访问
|
||||
- **端口映射**: 开发环境暴露必要端口
|
||||
- **卷挂载**: 数据持久化和代码热重载
|
||||
|
||||
#### 数据卷配置
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
subgraph "宿主机"
|
||||
Host[宿主机文件系统]
|
||||
end
|
||||
subgraph "Docker卷"
|
||||
PV[postgres_data]
|
||||
RV[redis_data]
|
||||
FV[node_modules]
|
||||
end
|
||||
subgraph "容器内"
|
||||
ContainerDB[数据库数据]
|
||||
ContainerRedis[Redis数据]
|
||||
ContainerFE[前端模块]
|
||||
end
|
||||
Host --> PV
|
||||
Host --> RV
|
||||
Host --> FV
|
||||
PV --> ContainerDB
|
||||
RV --> ContainerRedis
|
||||
FV --> ContainerFE
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [docker-compose.yml:14-15](file://docker-compose.yml#L14-L15)
|
||||
- [docker-compose.yml:28-29](file://docker-compose.yml#L28-L29)
|
||||
- [docker-compose.yml:61-63](file://docker-compose.yml#L61-L63)
|
||||
|
||||
**章节来源**
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
### 后端Dockerfile构建流程
|
||||
|
||||
后端服务采用多阶段构建策略,优化镜像大小和构建效率:
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start([开始构建]) --> BaseImage["基础镜像<br/>python:3.11-slim"]
|
||||
BaseImage --> InstallDeps["安装系统依赖<br/>apt-get更新"]
|
||||
InstallDeps --> CopyReq["复制依赖文件<br/>requirements.txt"]
|
||||
CopyReq --> InstallPy["安装Python依赖<br/>pip安装"]
|
||||
InstallPy --> InstallPW["安装Playwright<br/>chromium浏览器"]
|
||||
InstallPW --> CopyCode["复制应用代码"]
|
||||
CopyCode --> ExposePort["暴露端口<br/>8000"]
|
||||
ExposePort --> CMD["启动命令<br/>uvicorn"]
|
||||
CMD --> End([构建完成])
|
||||
InstallDeps -.-> SysDeps["系统依赖:<br/>curl, wget, libglib2.0-0,<br/>libnss3, libatk1.0-0,<br/>libcairo2, libasound2"]
|
||||
InstallPy -.-> PyDeps["Python依赖:<br/>FastAPI, SQLAlchemy,<br/>Redis, APScheduler,<br/>Playwright"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
|
||||
#### 关键构建步骤
|
||||
|
||||
1. **系统依赖安装**: 安装Playwright运行所需的系统库
|
||||
2. **Python环境配置**: 设置工作目录和依赖管理
|
||||
3. **应用代码部署**: 复制业务逻辑代码
|
||||
4. **运行时优化**: 配置Uvicorn ASGI服务器
|
||||
|
||||
**章节来源**
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
|
||||
### 前端Dockerfile构建流程
|
||||
|
||||
前端服务构建流程相对简洁,专注于开发环境优化:
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start([开始构建]) --> BaseImage["基础镜像<br/>node:20-alpine"]
|
||||
BaseImage --> WorkDir["设置工作目录<br/>/app"]
|
||||
WorkDir --> CopyPkg["复制包文件<br/>package.json, lock.json"]
|
||||
CopyPkg --> InstallDeps["安装依赖<br/>npm ci"]
|
||||
InstallDeps --> CopyCode["复制应用代码"]
|
||||
CopyCode --> ExposePort["暴露端口<br/>3000"]
|
||||
ExposePort --> CMD["启动命令<br/>npm run dev"]
|
||||
CMD --> End([构建完成])
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
|
||||
#### 依赖管理策略
|
||||
|
||||
- **生产依赖**: Next.js 14.2.35,React 18
|
||||
- **开发依赖**: TypeScript,ESLint,Tailwind CSS
|
||||
- **UI组件**: Radix UI,Lucide React图标库
|
||||
|
||||
**章节来源**
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
|
||||
### 环境变量配置
|
||||
|
||||
系统使用统一的环境变量配置机制,支持不同环境的灵活切换:
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class Settings {
|
||||
+string DATABASE_URL
|
||||
+string REDIS_URL
|
||||
+string JWT_SECRET
|
||||
+int JWT_EXPIRE_HOURS
|
||||
+string PLAYWRIGHT_BROWSERS_PATH
|
||||
+string ZHIPU_API_KEY
|
||||
+string TONGYI_API_KEY
|
||||
}
|
||||
class ConfigFile {
|
||||
+string env_file
|
||||
+string extra
|
||||
}
|
||||
class Environment {
|
||||
+Development
|
||||
+Production
|
||||
+Testing
|
||||
}
|
||||
Settings --> ConfigFile : "读取配置"
|
||||
Settings --> Environment : "根据环境变化"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/config.py:4-16](file://backend/app/config.py#L4-L16)
|
||||
|
||||
#### 核心配置参数
|
||||
|
||||
| 参数名称 | 默认值 | 用途描述 |
|
||||
|---------|--------|----------|
|
||||
| DATABASE_URL | postgresql+asyncpg://postgres:postgres123@db:5432/geo_platform | 数据库连接字符串 |
|
||||
| REDIS_URL | redis://redis:6379/0 | Redis缓存连接地址 |
|
||||
| JWT_SECRET | your-secret-key-change-in-production | JWT令牌密钥 |
|
||||
| JWT_EXPIRE_HOURS | 24 | JWT过期时间(小时) |
|
||||
| PLAYWRIGHT_BROWSERS_PATH | /ms-playwright | Playwright浏览器路径 |
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/config.py:7-13](file://backend/app/config.py#L7-L13)
|
||||
|
||||
### 健康检查机制
|
||||
|
||||
系统实现了多层次的健康检查,确保服务可用性:
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "健康检查层次"
|
||||
HC[健康检查总览]
|
||||
subgraph "数据库健康"
|
||||
DBHC[PostgreSQL健康]
|
||||
DBTest["pg_isready -U postgres -d geo_platform"]
|
||||
end
|
||||
subgraph "缓存健康"
|
||||
RHC[Redis健康]
|
||||
RTest["redis-cli ping"]
|
||||
end
|
||||
subgraph "应用健康"
|
||||
BHC[后端健康]
|
||||
BTest["HTTP GET /health"]
|
||||
FHC[前端健康]
|
||||
FTest["HTTP GET /"]
|
||||
end
|
||||
end
|
||||
HC --> DBHC
|
||||
HC --> RHC
|
||||
HC --> BHC
|
||||
HC --> FHC
|
||||
DBHC --> DBTest
|
||||
RHC --> RTest
|
||||
BHC --> BTest
|
||||
FHC --> FTest
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [docker-compose.yml:16-20](file://docker-compose.yml#L16-L20)
|
||||
- [docker-compose.yml:30-34](file://docker-compose.yml#L30-L34)
|
||||
- [backend/app/main.py:45-47](file://backend/app/main.py#L45-L47)
|
||||
|
||||
#### 健康检查配置
|
||||
|
||||
- **检查间隔**: 5秒
|
||||
- **超时时间**: 5秒
|
||||
- **重试次数**: 5次
|
||||
- **检查方式**:
|
||||
- PostgreSQL: 数据库连接测试
|
||||
- Redis: PING命令响应
|
||||
- 应用: HTTP端点响应
|
||||
|
||||
**章节来源**
|
||||
- [docker-compose.yml:16-34](file://docker-compose.yml#L16-L34)
|
||||
- [backend/app/main.py:45-47](file://backend/app/main.py#L45-L47)
|
||||
|
||||
## 依赖关系分析
|
||||
|
||||
### 服务间依赖图
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
subgraph "外部依赖"
|
||||
PG[PostgreSQL]
|
||||
RD[Redis]
|
||||
PW[Playwright]
|
||||
end
|
||||
subgraph "后端服务"
|
||||
FA[FastAPI]
|
||||
SQ[SQLAlchemy]
|
||||
AP[APScheduler]
|
||||
RE[Redis Client]
|
||||
end
|
||||
subgraph "前端服务"
|
||||
NX[Next.js]
|
||||
RC[React]
|
||||
TW[Tailwind CSS]
|
||||
end
|
||||
subgraph "开发工具"
|
||||
UV[Uvicorn]
|
||||
NP[npm]
|
||||
PY[Python]
|
||||
end
|
||||
FA --> SQ
|
||||
FA --> AP
|
||||
FA --> RE
|
||||
FA --> PW
|
||||
NX --> RC
|
||||
NX --> TW
|
||||
FA --> UV
|
||||
NX --> NP
|
||||
FA --> PY
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/requirements.txt:2-35](file://backend/requirements.txt#L2-L35)
|
||||
- [frontend/package.json:11-38](file://frontend/package.json#L11-L38)
|
||||
|
||||
### 数据库连接分析
|
||||
|
||||
后端服务通过异步数据库连接池与PostgreSQL交互:
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant App as "FastAPI应用"
|
||||
participant Engine as "SQLAlchemy引擎"
|
||||
participant Pool as "连接池"
|
||||
participant DB as "PostgreSQL"
|
||||
App->>Engine : 创建异步引擎
|
||||
Engine->>Pool : 初始化连接池
|
||||
Pool->>DB : 建立数据库连接
|
||||
DB-->>Pool : 连接确认
|
||||
Pool-->>Engine : 连接可用
|
||||
Engine-->>App : 引擎就绪
|
||||
App->>Engine : 获取数据库会话
|
||||
Engine->>Pool : 从池中获取连接
|
||||
Pool->>DB : 执行SQL查询
|
||||
DB-->>Pool : 返回查询结果
|
||||
Pool-->>Engine : 释放连接
|
||||
Engine-->>App : 会话完成
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/database.py:6-28](file://backend/app/database.py#L6-L28)
|
||||
- [backend/app/config.py:7](file://backend/app/config.py#L7)
|
||||
|
||||
**章节来源**
|
||||
- [backend/requirements.txt:5-8](file://backend/requirements.txt#L5-L8)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
|
||||
### 任务调度系统
|
||||
|
||||
系统实现了基于APScheduler的异步任务调度机制:
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start([应用启动]) --> InitScheduler["初始化调度器"]
|
||||
InitScheduler --> AddJob["添加定时任务<br/>check_queries"]
|
||||
AddJob --> StartJobs["启动所有任务"]
|
||||
StartJobs --> Timer["1小时定时器"]
|
||||
Timer --> CheckQueries["检查到期查询"]
|
||||
CheckQueries --> ExecuteQuery["执行查询任务"]
|
||||
ExecuteQuery --> UpdateSchedule["更新下次执行时间"]
|
||||
UpdateSchedule --> Timer
|
||||
CheckQueries --> ErrorHandle["错误处理"]
|
||||
ErrorHandle --> Continue["继续下一个查询"]
|
||||
subgraph "查询执行流程"
|
||||
ExecuteQuery --> GetPlatform["获取平台列表"]
|
||||
GetPlatform --> ExecutePlatform["执行平台查询"]
|
||||
ExecutePlatform --> CreateRecord["创建引文记录"]
|
||||
CreateRecord --> UpdateQuery["更新查询状态"]
|
||||
end
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/scheduler.py:30-95](file://backend/app/workers/scheduler.py#L30-L95)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
|
||||
## 性能考虑
|
||||
|
||||
### 容器资源优化
|
||||
|
||||
1. **镜像大小控制**: 使用alpine基础镜像减少体积
|
||||
2. **多阶段构建**: 分离构建和运行时环境
|
||||
3. **依赖精简**: 只安装必要的系统和Python依赖
|
||||
4. **缓存利用**: npm和pip缓存提高构建速度
|
||||
|
||||
### 数据库性能优化
|
||||
|
||||
- **连接池管理**: SQLAlchemy异步连接池
|
||||
- **索引优化**: 查询表的关键字段建立索引
|
||||
- **查询优化**: 使用异步查询避免阻塞
|
||||
- **事务管理**: 合理的事务边界控制
|
||||
|
||||
### 缓存策略
|
||||
|
||||
- **Redis缓存**: 高速数据缓存
|
||||
- **浏览器缓存**: 前端静态资源缓存
|
||||
- **CDN加速**: 生产环境静态资源分发
|
||||
|
||||
## 故障排查指南
|
||||
|
||||
### 常见问题诊断
|
||||
|
||||
#### 1. 数据库连接问题
|
||||
|
||||
**症状**: 后端服务启动失败,显示数据库连接错误
|
||||
|
||||
**排查步骤**:
|
||||
1. 检查PostgreSQL容器状态
|
||||
2. 验证数据库凭据配置
|
||||
3. 确认网络连通性
|
||||
4. 查看数据库日志
|
||||
|
||||
**解决方案**:
|
||||
- 确保PostgreSQL服务先于后端启动
|
||||
- 检查DATABASE_URL格式正确性
|
||||
- 验证数据库用户权限
|
||||
|
||||
#### 2. 前端开发服务器问题
|
||||
|
||||
**症状**: 前端无法访问,页面加载失败
|
||||
|
||||
**排查步骤**:
|
||||
1. 检查Node.js版本兼容性
|
||||
2. 验证npm依赖安装
|
||||
3. 确认端口占用情况
|
||||
4. 查看浏览器控制台错误
|
||||
|
||||
**解决方案**:
|
||||
- 清理node_modules缓存
|
||||
- 重新安装npm依赖
|
||||
- 检查防火墙设置
|
||||
|
||||
#### 3. 任务调度异常
|
||||
|
||||
**症状**: 引文查询任务未按时执行
|
||||
|
||||
**排查步骤**:
|
||||
1. 检查调度器状态
|
||||
2. 验证查询表数据
|
||||
3. 查看任务执行日志
|
||||
4. 确认Redis连接正常
|
||||
|
||||
**解决方案**:
|
||||
- 重启调度器服务
|
||||
- 检查查询状态字段
|
||||
- 验证平台API密钥
|
||||
|
||||
### 日志监控
|
||||
|
||||
系统提供了多层级的日志输出:
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "日志级别"
|
||||
Debug[调试日志]
|
||||
Info[信息日志]
|
||||
Warn[警告日志]
|
||||
Error[错误日志]
|
||||
end
|
||||
subgraph "日志来源"
|
||||
DBLog[数据库日志]
|
||||
TaskLog[任务日志]
|
||||
AppLog[应用日志]
|
||||
SysLog[系统日志]
|
||||
end
|
||||
Debug --> DBLog
|
||||
Debug --> TaskLog
|
||||
Debug --> AppLog
|
||||
Debug --> SysLog
|
||||
Info --> DBLog
|
||||
Info --> TaskLog
|
||||
Info --> AppLog
|
||||
Info --> SysLog
|
||||
Warn --> DBLog
|
||||
Warn --> TaskLog
|
||||
Warn --> AppLog
|
||||
Warn --> SysLog
|
||||
Error --> DBLog
|
||||
Error --> TaskLog
|
||||
Error --> AppLog
|
||||
Error --> SysLog
|
||||
```
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/scheduler.py:22](file://backend/app/workers/scheduler.py#L22)
|
||||
- [backend/app/main.py:45-47](file://backend/app/main.py#L45-L47)
|
||||
|
||||
### 环境变量验证
|
||||
|
||||
建议在启动前验证关键环境变量:
|
||||
|
||||
```bash
|
||||
# 检查数据库连接
|
||||
docker exec geo_db pg_isready -U postgres -d geo_platform
|
||||
|
||||
# 检查Redis连接
|
||||
docker exec geo_redis redis-cli ping
|
||||
|
||||
# 检查后端API
|
||||
curl http://localhost:8000/health
|
||||
|
||||
# 检查前端应用
|
||||
curl http://localhost:3000
|
||||
```
|
||||
|
||||
## 结论
|
||||
|
||||
GEO项目的Docker容器化部署方案提供了完整的微服务架构实现,具有以下优势:
|
||||
|
||||
1. **开发友好**: 支持代码热重载和快速迭代
|
||||
2. **环境一致**: 容器化确保开发和生产环境一致性
|
||||
3. **扩展性强**: 模块化设计便于功能扩展
|
||||
4. **运维简便**: 健康检查和日志监控提升可维护性
|
||||
|
||||
该部署方案为GEO平台的稳定运行和未来发展奠定了坚实的技术基础。
|
||||
|
||||
## 附录
|
||||
|
||||
### 完整部署流程
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Dev as "开发者"
|
||||
participant Docker as "Docker引擎"
|
||||
participant Compose as "Docker Compose"
|
||||
participant Registry as "镜像仓库"
|
||||
Dev->>Compose : docker-compose up -d
|
||||
Compose->>Registry : 拉取基础镜像
|
||||
Registry-->>Compose : 返回镜像
|
||||
Compose->>Docker : 构建自定义镜像
|
||||
Docker-->>Compose : 镜像构建完成
|
||||
Compose->>Docker : 启动数据库容器
|
||||
Compose->>Docker : 启动缓存容器
|
||||
Compose->>Docker : 启动后端容器
|
||||
Compose->>Docker : 启动前端容器
|
||||
Docker-->>Dev : 服务启动完成
|
||||
```
|
||||
|
||||
### 最佳实践建议
|
||||
|
||||
1. **生产环境优化**
|
||||
- 使用生产级数据库和缓存
|
||||
- 配置负载均衡和反向代理
|
||||
- 实施监控和告警系统
|
||||
|
||||
2. **安全加固**
|
||||
- 使用HTTPS证书
|
||||
- 配置防火墙规则
|
||||
- 定期更新依赖包
|
||||
|
||||
3. **性能调优**
|
||||
- 监控容器资源使用
|
||||
- 优化数据库查询
|
||||
- 实施缓存策略
|
||||
|
||||
**章节来源**
|
||||
- [docker-compose.yml:36-66](file://docker-compose.yml#L36-L66)
|
||||
- [backend/app/config.py:9](file://backend/app/config.py#L9)
|
||||
|
|
@ -0,0 +1,347 @@
|
|||
# 生产环境部署
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [backend/Dockerfile](file://backend/Dockerfile)
|
||||
- [frontend/Dockerfile](file://frontend/Dockerfile)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/platforms/kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [backend/app/workers/platforms/wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
- [frontend/next.config.mjs](file://frontend/next.config.mjs)
|
||||
- [frontend/package.json](file://frontend/package.json)
|
||||
- [frontend/tailwind.config.ts](file://frontend/tailwind.config.ts)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖关系分析](#依赖关系分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本指南面向GEO项目的生产环境部署,覆盖以下关键主题:
|
||||
- 部署架构:后端服务、数据库、缓存与前端容器编排
|
||||
- Nginx反向代理、SSL证书与负载均衡建议
|
||||
- 环境变量与安全配置:数据库、Redis、API密钥与JWT
|
||||
- 性能优化:静态资源缓存、Gzip压缩与CDN集成
|
||||
- 安全加固:防火墙、访问控制与数据加密
|
||||
- 域名与DNS、HTTPS证书申请流程
|
||||
- 部署后验证与性能基准测试方法
|
||||
|
||||
## 项目结构
|
||||
GEO采用多容器编排,后端使用FastAPI,前端使用Next.js,数据库为PostgreSQL,缓存为Redis。开发阶段通过Compose进行联调,生产环境建议以Nginx作为统一入口,结合反向代理、SSL终止与负载均衡。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "生产环境"
|
||||
LB["负载均衡/反向代理<br/>Nginx"]
|
||||
subgraph "应用层"
|
||||
FE["前端容器<br/>Next.js"]
|
||||
BE["后端容器<br/>FastAPI(Uvicorn)"]
|
||||
end
|
||||
subgraph "数据层"
|
||||
DB["数据库<br/>PostgreSQL"]
|
||||
RC["缓存/队列<br/>Redis"]
|
||||
end
|
||||
end
|
||||
LB --> FE
|
||||
LB --> BE
|
||||
BE --> DB
|
||||
BE --> RC
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [docker-compose.yml:36-51](file://docker-compose.yml#L36-L51)
|
||||
- [backend/Dockerfile:40-41](file://backend/Dockerfile#L40-L41)
|
||||
- [frontend/Dockerfile:14-15](file://frontend/Dockerfile#L14-L15)
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
|
||||
## 核心组件
|
||||
- 后端服务(FastAPI + Uvicorn):提供REST API,包含认证、查询词、引用数据与报告接口;内置健康检查端点。
|
||||
- 数据库(PostgreSQL):异步SQLAlchemy引擎连接,支持迁移脚本。
|
||||
- 缓存(Redis):用于任务调度与会话/锁等场景。
|
||||
- 前端(Next.js):开发模式运行,生产建议构建并由Nginx提供静态资源与反代。
|
||||
- 引擎与平台适配器:基于Playwright的网页自动化,对接Kimi与文心一格平台。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
|
||||
## 架构总览
|
||||
下图展示生产环境典型拓扑:Nginx作为入口,负责TLS终止、静态资源分发与反向代理;后端容器提供API;数据库与Redis分别承载持久化与缓存。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
Internet["互联网"]
|
||||
Nginx["Nginx<br/>SSL/TLS 终止<br/>静态资源缓存"]
|
||||
subgraph "应用服务"
|
||||
API["FastAPI 应用"]
|
||||
Worker["任务/调度器"]
|
||||
end
|
||||
subgraph "数据服务"
|
||||
Postgres["PostgreSQL"]
|
||||
Redis["Redis"]
|
||||
end
|
||||
Internet --> Nginx
|
||||
Nginx --> API
|
||||
Nginx --> |静态资源| Nginx
|
||||
API --> Postgres
|
||||
API --> Redis
|
||||
Worker --> Redis
|
||||
Worker --> Postgres
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:24-47](file://backend/app/main.py#L24-L47)
|
||||
- [backend/app/database.py:6-18](file://backend/app/database.py#L6-L18)
|
||||
- [backend/app/config.py:7-8](file://backend/app/config.py#L7-L8)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 后端服务(FastAPI)
|
||||
- 应用生命周期:启动时初始化调度器,关闭时优雅停机。
|
||||
- CORS策略:开发默认允许本地前端源,生产需收紧为受信域名。
|
||||
- 路由模块:认证、查询词、引用数据、报告与即时执行路由。
|
||||
- 健康检查:/health端点返回状态。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant Nginx as "Nginx"
|
||||
participant API as "FastAPI 应用"
|
||||
participant DB as "PostgreSQL"
|
||||
participant Redis as "Redis"
|
||||
Client->>Nginx : "HTTP 请求"
|
||||
Nginx->>API : "反向代理转发"
|
||||
API->>DB : "数据库查询"
|
||||
DB-->>API : "结果"
|
||||
API->>Redis : "缓存/任务交互"
|
||||
Redis-->>API : "响应"
|
||||
API-->>Nginx : "HTTP 响应"
|
||||
Nginx-->>Client : "返回内容"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:13-21](file://backend/app/main.py#L13-L21)
|
||||
- [backend/app/database.py:23-28](file://backend/app/database.py#L23-L28)
|
||||
- [backend/app/config.py:7-8](file://backend/app/config.py#L7-L8)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
|
||||
### 数据库与迁移
|
||||
- 异步引擎:使用异步SQLAlchemy与asyncpg。
|
||||
- 迁移:初始版本包含用户、查询、引用记录、任务与订阅表,含索引与外键约束。
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
USERS {
|
||||
uuid id PK
|
||||
string email UK
|
||||
string password_hash
|
||||
string name
|
||||
string plan
|
||||
integer max_queries
|
||||
boolean is_active
|
||||
timestamp created_at
|
||||
timestamp updated_at
|
||||
}
|
||||
QUERIES {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
timestamp last_queried_at
|
||||
timestamp next_query_at
|
||||
timestamp created_at
|
||||
timestamp updated_at
|
||||
}
|
||||
CITATION_RECORDS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
boolean cited
|
||||
integer citation_position
|
||||
text citation_text
|
||||
jsonb competitor_brands
|
||||
text raw_response
|
||||
timestamp queried_at
|
||||
}
|
||||
QUERY_TASKS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
string status
|
||||
text error_message
|
||||
timestamp scheduled_at
|
||||
timestamp started_at
|
||||
timestamp completed_at
|
||||
}
|
||||
SUBSCRIPTIONS {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string plan
|
||||
string status
|
||||
date start_date
|
||||
date end_date
|
||||
numeric amount
|
||||
string payment_method
|
||||
string payment_id
|
||||
timestamp created_at
|
||||
}
|
||||
USERS ||--o{ QUERIES : "拥有"
|
||||
QUERIES ||--o{ CITATION_RECORDS : "产生"
|
||||
QUERIES ||--o{ QUERY_TASKS : "调度"
|
||||
USERS ||--o{ SUBSCRIPTIONS : "订阅"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:24-111](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L24-L111)
|
||||
|
||||
章节来源
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:1-128](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L1-L128)
|
||||
|
||||
### 引擎与平台适配器
|
||||
- CitationEngine:对指定平台发起查询,执行品牌匹配与竞争品牌检测,并记录结果。
|
||||
- 平台适配器:Kimi与文心一言,基于Playwright自动化,具备重试与稳定性等待逻辑。
|
||||
- 任务调度:查询任务状态管理,支持失败回退与占位记录。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["开始执行查询"]) --> InitMatcher["初始化品牌匹配器"]
|
||||
InitMatcher --> IteratePlat["遍历平台列表"]
|
||||
IteratePlat --> CreateTask["创建/获取任务记录"]
|
||||
CreateTask --> SetRunning["标记任务为运行中"]
|
||||
SetRunning --> DoQuery["平台查询Playwright"]
|
||||
DoQuery --> BrandMatch["品牌匹配与竞争品牌检测"]
|
||||
BrandMatch --> SaveRecord["保存引用记录"]
|
||||
SaveRecord --> UpdateQueryTime["更新查询时间与下次查询时间"]
|
||||
UpdateQueryTime --> NextPlat{"还有平台?"}
|
||||
NextPlat --> |是| IteratePlat
|
||||
NextPlat --> |否| End(["结束"])
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [backend/app/workers/platforms/kimi.py:33-48](file://backend/app/workers/platforms/kimi.py#L33-L48)
|
||||
- [backend/app/workers/platforms/wenxin.py:33-48](file://backend/app/workers/platforms/wenxin.py#L33-L48)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
|
||||
### 前端(Next.js)
|
||||
- 开发模式:通过Dockerfile直接运行开发服务器。
|
||||
- 生产建议:构建产物由Nginx托管,配合缓存与压缩策略。
|
||||
|
||||
章节来源
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
- [frontend/package.json:5-9](file://frontend/package.json#L5-L9)
|
||||
- [frontend/next.config.mjs:1-5](file://frontend/next.config.mjs#L1-L5)
|
||||
|
||||
## 依赖关系分析
|
||||
- 后端依赖:FastAPI、Uvicorn、SQLAlchemy、asyncpg、Pydantic、Redis、APScheduler、Playwright、httpx、python-dotenv等。
|
||||
- 前端依赖:Next.js、Radix UI、Tailwind CSS、NextAuth等。
|
||||
- 容器编排:Compose定义数据库、Redis、后端与前端服务及其端口映射与健康检查。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
Backend["后端应用"] --> DB["PostgreSQL"]
|
||||
Backend --> Redis["Redis"]
|
||||
Backend --> Playwright["Playwright 浏览器"]
|
||||
Frontend["前端应用"] --> Backend
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [docker-compose.yml:4-51](file://docker-compose.yml#L4-L51)
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
## 性能考虑
|
||||
- 静态资源缓存:Nginx启用长期缓存头,结合文件指纹命名策略,减少带宽与延迟。
|
||||
- Gzip/压缩:开启gzip或更高效的压缩算法(如Brotli),降低传输体积。
|
||||
- CDN集成:将静态资源(图片、JS/CSS)接入CDN,提升全球访问速度与可用性。
|
||||
- 数据库优化:合理索引(已有迁移脚本包含索引)、连接池参数调优、慢查询日志与监控。
|
||||
- 缓存策略:Redis用于热点数据与任务队列,避免重复计算与高并发下的数据库压力。
|
||||
- 前端构建:生产构建开启Tree Shaking、代码分割与懒加载,缩短首屏时间。
|
||||
- 反向代理:Nginx作为统一入口,可集中处理压缩、缓存与限流。
|
||||
|
||||
## 故障排查指南
|
||||
- 健康检查:通过/health端点确认后端存活;Compose中数据库与Redis具备健康检查命令。
|
||||
- 日志定位:查看Nginx错误日志、后端Uvicorn日志与容器日志;关注数据库连接与Redis连通性。
|
||||
- CORS问题:生产环境需将前端域名加入allow_origins白名单,避免跨域阻断。
|
||||
- 数据库迁移:确保迁移脚本成功执行,检查表结构与索引是否存在。
|
||||
- Playwright依赖:容器内需安装浏览器依赖,确保Playwright可正常启动浏览器实例。
|
||||
- API密钥:确认平台API密钥配置正确,避免请求失败导致任务异常。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:45-47](file://backend/app/main.py#L45-L47)
|
||||
- [docker-compose.yml:16-20](file://docker-compose.yml#L16-L20)
|
||||
- [docker-compose.yml:30-34](file://docker-compose.yml#L30-L34)
|
||||
- [backend/app/config.py:12-13](file://backend/app/config.py#L12-L13)
|
||||
- [backend/app/workers/platforms/kimi.py:21-32](file://backend/app/workers/platforms/kimi.py#L21-L32)
|
||||
- [backend/app/workers/platforms/wenxin.py:21-32](file://backend/app/workers/platforms/wenxin.py#L21-L32)
|
||||
|
||||
## 结论
|
||||
本指南提供了GEO生产环境部署的完整路径:从容器编排到Nginx反向代理与SSL、从环境变量与安全配置到性能优化与安全加固,并给出了部署后验证与基准测试建议。建议在正式上线前完成域名与DNS解析、证书申请与安装、以及完整的端到端测试。
|
||||
|
||||
## 附录
|
||||
|
||||
### 生产环境部署清单
|
||||
- 基础设施
|
||||
- 负载均衡/反向代理:Nginx(建议启用TLS终止、静态资源缓存与Gzip/Brotli)
|
||||
- 应用容器:后端(FastAPI)、前端(Next.js生产构建)
|
||||
- 数据库:PostgreSQL(主从/高可用视规模而定)
|
||||
- 缓存:Redis(哨兵/集群视规模而定)
|
||||
- 环境变量与安全
|
||||
- 数据库连接:DATABASE_URL(生产使用强密码与只读账号)
|
||||
- Redis连接:REDIS_URL(网络隔离与ACL)
|
||||
- JWT密钥:JWT_SECRET(高强度随机值,定期轮换)
|
||||
- 平台API密钥:ZHIPU_API_KEY、TONGYI_API_KEY(最小权限与限额)
|
||||
- 配置与优化
|
||||
- Nginx:静态资源缓存头、压缩、限流、健康检查探针
|
||||
- 后端:连接池大小、超时、日志级别、CORS白名单
|
||||
- 数据库:连接数上限、慢查询阈值、备份策略
|
||||
- 前端:构建产物缓存、CDN接入、预加载关键资源
|
||||
- 安全加固
|
||||
- 防火墙:仅开放必要端口(80/443/22等)
|
||||
- 访问控制:IP白名单、速率限制、WAF
|
||||
- 数据加密:传输加密(TLS)、静态加密(视合规要求)
|
||||
- 域名与证书
|
||||
- DNS:A/AAAA记录指向负载均衡器;CNAME别名指向子域名
|
||||
- 证书:ACME自动签发或商业证书;多域名与SAN
|
||||
- 部署后验证与基准测试
|
||||
- 功能验证:登录、查询、报告生成、引用检测
|
||||
- 性能基准:并发用户数、P95/P99延迟、吞吐量、资源占用
|
||||
- 可靠性:故障切换、恢复时间、数据一致性校验
|
||||
|
|
@ -0,0 +1,276 @@
|
|||
# 监控与日志管理
|
||||
|
||||
<cite>
|
||||
**本文引用的文件**
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/docker-compose.yml](file://backend/docker-compose.yml)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/platforms/base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [backend/app/workers/platforms/kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [backend/app/workers/platforms/wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
- [backend/Dockerfile](file://backend/Dockerfile)
|
||||
- [backend/app/api/deps.py](file://backend/app/api/deps.py)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本方案围绕GEO项目的监控与日志管理,系统性地梳理应用健康检查、服务可用性监控、响应时间监控、错误率统计、日志收集与管理、错误追踪与告警通知、性能指标采集与可视化,以及日志分析与故障诊断的最佳实践。当前仓库中已具备基础的健康检查端点、容器编排与健康检查、日志记录与重试机制,但尚未集成统一的监控与告警体系(如Prometheus/Grafana)。本文在现有能力基础上,提出可落地的扩展建议与实施路径。
|
||||
|
||||
## 项目结构
|
||||
后端采用FastAPI框架,通过APScheduler进行定时任务调度,使用Playwright驱动浏览器访问外部AI平台,结合PostgreSQL与Redis作为数据与缓存存储。Docker Compose负责多服务编排与健康检查。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "后端服务"
|
||||
API["FastAPI 应用<br/>/health 健康检查"]
|
||||
SCH["APScheduler 调度器"]
|
||||
CE["引用检测引擎"]
|
||||
DB["PostgreSQL 引擎/会话"]
|
||||
REDIS["Redis 连接"]
|
||||
end
|
||||
subgraph "外部平台"
|
||||
KIMI["Kimi 适配器"]
|
||||
WENXIN["文心一言适配器"]
|
||||
end
|
||||
API --> SCH
|
||||
SCH --> CE
|
||||
CE --> KIMI
|
||||
CE --> WENXIN
|
||||
CE --> DB
|
||||
CE --> REDIS
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:45-47](file://backend/app/main.py#L45-L47)
|
||||
- [backend/app/workers/scheduler.py:30-40](file://backend/app/workers/scheduler.py#L30-L40)
|
||||
- [backend/app/workers/citation_engine.py:148-170](file://backend/app/workers/citation_engine.py#L148-L170)
|
||||
- [backend/app/database.py:6-18](file://backend/app/database.py#L6-L18)
|
||||
- [backend/app/config.py:7-8](file://backend/app/config.py#L7-L8)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/docker-compose.yml:1-71](file://backend/docker-compose.yml#L1-L71)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
|
||||
## 核心组件
|
||||
- 健康检查端点:提供基础可用性探测,便于反向代理与编排系统判断服务状态。
|
||||
- 定时任务调度器:基于APScheduler的异步调度器,周期性扫描并执行到期查询任务。
|
||||
- 引用检测引擎:封装品牌匹配、竞争品牌检测、平台适配器调用与结果持久化。
|
||||
- 平台适配器:Kimi与文心一言的Playwright驱动实现,包含重试与稳定性处理。
|
||||
- 数据库与配置:异步SQLAlchemy引擎与环境变量配置,支撑任务状态与结果存储。
|
||||
- 容器编排与健康检查:Compose对数据库与Redis进行健康检查,后端服务依赖健康状态启动。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:45-47](file://backend/app/main.py#L45-L47)
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/platforms/kimi.py:11-206](file://backend/app/workers/platforms/kimi.py#L11-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:11-205](file://backend/app/workers/platforms/wenxin.py#L11-L205)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/docker-compose.yml:16-34](file://backend/docker-compose.yml#L16-L34)
|
||||
|
||||
## 架构总览
|
||||
下图展示从API到调度器、引擎、平台适配器与数据库的整体交互流程,并标注健康检查与容器编排的关键节点。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant API as "FastAPI 应用"
|
||||
participant SCH as "调度器"
|
||||
participant CE as "引用检测引擎"
|
||||
participant KIMI as "Kimi 适配器"
|
||||
participant WENXIN as "文心一言适配器"
|
||||
participant DB as "数据库"
|
||||
Client->>API : GET /health
|
||||
API-->>Client : {"status" : "ok"}
|
||||
SCH->>CE : 触发检查并执行查询
|
||||
CE->>KIMI : query(keyword)
|
||||
CE->>WENXIN : query(keyword)
|
||||
CE->>DB : 写入CitationRecord/更新QueryTask
|
||||
CE-->>SCH : 返回执行结果
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:45-47](file://backend/app/main.py#L45-L47)
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [backend/app/workers/platforms/kimi.py:33-48](file://backend/app/workers/platforms/kimi.py#L33-L48)
|
||||
- [backend/app/workers/platforms/wenxin.py:33-48](file://backend/app/workers/platforms/wenxin.py#L33-L48)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 健康检查与服务可用性监控
|
||||
- 基础健康检查端点:提供轻量级可用性探测,适合反向代理与编排系统快速判断服务状态。
|
||||
- 容器健康检查:Compose对PostgreSQL与Redis进行健康检查,后端服务依赖这些服务健康后再启动,提升整体可用性保障。
|
||||
- 建议扩展:
|
||||
- 在应用内增加更细粒度的依赖检查(数据库连接池、Redis连接、外部平台可用性)。
|
||||
- 将健康检查结果暴露为指标,接入Prometheus/Grafana进行可视化与告警。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:45-47](file://backend/app/main.py#L45-L47)
|
||||
- [backend/docker-compose.yml:16-34](file://backend/docker-compose.yml#L16-L34)
|
||||
|
||||
### 日志收集与管理策略
|
||||
- 日志记录范围:调度器、引擎、平台适配器均使用标准日志模块记录信息与错误,覆盖任务执行、平台查询、异常处理等关键环节。
|
||||
- 日志级别建议:
|
||||
- INFO:任务开始/结束、平台查询成功、结果写入。
|
||||
- WARNING:重试警告、超时警告。
|
||||
- ERROR:平台查询失败、数据库写入失败、异常抛出。
|
||||
- 结构化日志格式建议:
|
||||
- 统一字段:timestamp、level、service、module、function、message、trace_id(可选)、span_id(可选)、extra(JSON)。
|
||||
- 示例字段:service=backend、module=scheduler/engine/platform、function=check_and_execute_queries/execute_query/query。
|
||||
- 日志轮转与存储:
|
||||
- 使用logrotate或容器日志驱动自带轮转;生产环境建议将日志输出到stdout/stderr,由容器编排系统集中收集(如Fluent Bit/Fluentd)。
|
||||
- 存储策略:短期本地、长期归档至对象存储或集中日志平台(如ELK/Graylog/Loki)。
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:40-90](file://backend/app/workers/scheduler.py#L40-L90)
|
||||
- [backend/app/workers/citation_engine.py:211-227](file://backend/app/workers/citation_engine.py#L211-L227)
|
||||
- [backend/app/workers/platforms/kimi.py:28-48](file://backend/app/workers/platforms/kimi.py#L28-L48)
|
||||
- [backend/app/workers/platforms/wenxin.py:28-48](file://backend/app/workers/platforms/wenxin.py#L28-L48)
|
||||
|
||||
### 错误追踪机制
|
||||
- 异常捕获与堆栈跟踪:
|
||||
- 调度器与引擎在关键路径捕获异常并记录详细错误信息,便于定位问题。
|
||||
- 平台适配器对Playwright操作进行超时与异常处理,并在多次重试后记录最终失败原因。
|
||||
- 告警通知:
|
||||
- 建议在应用层或网关层集成告警通道(如邮件、Webhook、IM机器人),当ERROR/WARNING级别日志达到阈值时触发。
|
||||
- 可结合日志平台的规则引擎或Prometheus Alertmanager实现自动告警。
|
||||
- 诊断要点:
|
||||
- 关注平台查询超时、浏览器启动失败、数据库事务提交失败等高频错误。
|
||||
- 为每次查询生成唯一trace_id,贯穿日志链路,便于跨服务串联分析。
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:73-74](file://backend/app/workers/scheduler.py#L73-L74)
|
||||
- [backend/app/workers/citation_engine.py:211-227](file://backend/app/workers/citation_engine.py#L211-L227)
|
||||
- [backend/app/workers/platforms/kimi.py:116-119](file://backend/app/workers/platforms/kimi.py#L116-L119)
|
||||
- [backend/app/workers/platforms/wenxin.py:114-117](file://backend/app/workers/platforms/wenxin.py#L114-L117)
|
||||
|
||||
### 性能监控指标
|
||||
- CPU与内存:通过容器监控(如cAdvisor/Prometheus Node Exporter)采集后端容器的CPU/内存使用率。
|
||||
- 数据库连接数:从数据库侧查看连接数与慢查询,或通过中间件埋点导出指标。
|
||||
- API响应时间:在FastAPI中间件中统计请求耗时,按路由分组导出直方图与摘要指标。
|
||||
- 业务指标:
|
||||
- 查询任务执行成功率、失败率、平均耗时。
|
||||
- 平台查询成功率、平均响应时间、重试次数。
|
||||
- CitationRecord写入速率、QueryTask状态转换时延。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:24-28](file://backend/app/main.py#L24-L28)
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
|
||||
### 监控工具选择与配置
|
||||
- Prometheus:抓取后端应用指标(自定义指标+Node Exporter),用于构建仪表盘与告警。
|
||||
- Grafana:可视化Prometheus数据,创建面板展示健康状态、性能趋势与告警历史。
|
||||
- 日志平台(可选):Loki配合Promtail收集日志,Grafana中实现日志与指标联动。
|
||||
- 告警:Alertmanager基于规则触发,结合企业微信/钉钉/Slack等通道推送。
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:20-22](file://backend/requirements.txt#L20-L22)
|
||||
- [backend/Dockerfile:35-40](file://backend/Dockerfile#L35-L40)
|
||||
|
||||
### 日志分析与故障诊断最佳实践
|
||||
- 统一日志格式与标签:为每条日志添加服务名、模块、函数、trace_id等标签,便于聚合与检索。
|
||||
- 分层告警:针对不同级别与模块设置阈值与静默窗口,避免噪声干扰。
|
||||
- 快速定位:优先查看ERROR/WARNING级别日志,结合trace_id串联相关模块日志。
|
||||
- 回放与回归:对关键业务路径(如平台查询、数据库写入)建立回放机制,复现问题并验证修复。
|
||||
|
||||
## 依赖分析
|
||||
- 组件耦合:
|
||||
- 调度器与引擎松耦合,通过接口与数据库交互;平台适配器遵循统一抽象,便于扩展新平台。
|
||||
- 数据库连接通过依赖注入提供,降低全局状态耦合。
|
||||
- 外部依赖:
|
||||
- PostgreSQL/Redis:通过环境变量配置,容器编排保证依赖服务健康。
|
||||
- Playwright:浏览器自动化依赖系统库,Dockerfile中已安装必要依赖与浏览器。
|
||||
- 潜在风险:
|
||||
- 平台查询超时与不稳定:已有重试与超时处理,建议进一步引入熔断与降级策略。
|
||||
- 日志分散:建议集中化收集与结构化输出,避免grep式排查。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
REQ["requirements.txt"] --> FASTAPI["FastAPI"]
|
||||
REQ --> APS["APScheduler"]
|
||||
REQ --> SQLA["SQLAlchemy"]
|
||||
REQ --> REDIS["Redis"]
|
||||
REQ --> PW["Playwright"]
|
||||
DF["Dockerfile"] --> SYSDEPS["系统依赖安装"]
|
||||
DF --> PLY["Playwright 安装"]
|
||||
DF --> CMD["Uvicorn 启动"]
|
||||
DC["docker-compose.yml"] --> DBH["PostgreSQL 健康检查"]
|
||||
DC --> RDH["Redis 健康检查"]
|
||||
DC --> DEP["后端依赖健康启动"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [backend/Dockerfile:6-33](file://backend/Dockerfile#L6-L33)
|
||||
- [backend/docker-compose.yml:16-34](file://backend/docker-compose.yml#L16-L34)
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [backend/docker-compose.yml:1-71](file://backend/docker-compose.yml#L1-L71)
|
||||
|
||||
## 性能考虑
|
||||
- I/O密集型优化:平台查询与数据库写入均为I/O密集,建议:
|
||||
- 使用连接池与批量写入减少开销。
|
||||
- 对平台查询结果进行缓存(短期有效),降低重复请求。
|
||||
- 超时与重试:平台适配器已内置指数退避重试,建议:
|
||||
- 设置最大重试次数与超时上限,防止雪崩。
|
||||
- 引入熔断器,当错误率超过阈值时短时间拒绝请求。
|
||||
- 资源限制:在容器编排中设置CPU/内存限制与重启策略,避免单点故障影响整体。
|
||||
|
||||
## 故障排查指南
|
||||
- 健康检查失败:
|
||||
- 检查后端/数据库/Redis健康检查配置与日志。
|
||||
- 确认依赖服务已就绪再启动后端。
|
||||
- 平台查询失败:
|
||||
- 查看平台适配器日志,确认浏览器启动与页面交互是否正常。
|
||||
- 检查网络连通性与平台可用性。
|
||||
- 数据库写入失败:
|
||||
- 检查数据库连接字符串与权限。
|
||||
- 关注事务提交与异常回滚日志。
|
||||
- 日志分析:
|
||||
- 使用统一字段检索trace_id,串联各模块日志。
|
||||
- 结合Grafana面板观察指标趋势,定位异常时段。
|
||||
|
||||
章节来源
|
||||
- [backend/docker-compose.yml:46-50](file://backend/docker-compose.yml#L46-L50)
|
||||
- [backend/app/workers/platforms/kimi.py:28-48](file://backend/app/workers/platforms/kimi.py#L28-L48)
|
||||
- [backend/app/workers/platforms/wenxin.py:28-48](file://backend/app/workers/platforms/wenxin.py#L28-L48)
|
||||
- [backend/app/workers/citation_engine.py:211-227](file://backend/app/workers/citation_engine.py#L211-L227)
|
||||
|
||||
## 结论
|
||||
GEO项目已具备基础的健康检查、容器健康检查与完善的日志记录能力。建议在此基础上引入统一的监控与告警体系(Prometheus/Grafana),完善结构化日志与指标导出,增强平台查询的稳定性与可观测性,以支撑生产环境的持续运维与快速故障定位。
|
||||
|
||||
## 附录
|
||||
- 快速对照表
|
||||
- 健康检查端点:GET /health
|
||||
- 数据库连接:DATABASE_URL
|
||||
- Redis连接:REDIS_URL
|
||||
- 定时任务:每小时执行一次
|
||||
- 平台适配器:Kimi、文心一言
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:45-47](file://backend/app/main.py#L45-L47)
|
||||
- [backend/app/config.py:7-8](file://backend/app/config.py#L7-L8)
|
||||
- [backend/app/workers/scheduler.py:32-38](file://backend/app/workers/scheduler.py#L32-L38)
|
||||
- [backend/app/workers/platforms/base.py:4-17](file://backend/app/workers/platforms/base.py#L4-L17)
|
||||
|
|
@ -0,0 +1,375 @@
|
|||
# 运维最佳实践
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [backend/Dockerfile](file://backend/Dockerfile)
|
||||
- [frontend/Dockerfile](file://frontend/Dockerfile)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
- [frontend/package.json](file://frontend/package.json)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/alembic.ini](file://backend/alembic.ini)
|
||||
- [backend/alembic/env.py](file://backend/alembic/env.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本指南面向GEO平台的运维团队,围绕日常运维任务、安全运维、故障恢复、容量规划与扩容、运维自动化与CI/CD、团队职责与应急响应、以及成本优化与监控等方面,提供可操作的最佳实践。文档基于仓库现有配置与代码进行梳理,并结合容器化部署与异步数据库访问等技术特性,给出落地建议。
|
||||
|
||||
## 项目结构
|
||||
GEO采用前后端分离的容器化架构:
|
||||
- 前端服务:Next.js应用,开发模式下通过NPM脚本启动,容器暴露3000端口。
|
||||
- 后端服务:FastAPI应用,使用Uvicorn运行,容器暴露8000端口。
|
||||
- 数据库:PostgreSQL 15,持久化卷挂载,健康检查通过pg_isready。
|
||||
- 缓存:Redis 7,持久化卷挂载,健康检查通过redis-cli ping。
|
||||
- 配置:后端通过Pydantic设置从.env文件加载;数据库迁移使用Alembic。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "前端"
|
||||
FE_Docker["前端Dockerfile<br/>暴露3000端口"]
|
||||
FE_NPM["package.json<br/>dev/build/start/lint"]
|
||||
end
|
||||
subgraph "后端"
|
||||
BE_Docker["后端Dockerfile<br/>暴露8000端口"]
|
||||
BE_Main["app/main.py<br/>FastAPI应用与路由注册"]
|
||||
BE_Config["app/config.py<br/>.env加载与默认配置"]
|
||||
BE_DB["app/database.py<br/>异步引擎与会话工厂"]
|
||||
BE_Sched["app/workers/scheduler.py<br/>APScheduler定时任务"]
|
||||
BE_Alembic["alembic.ini / env.py<br/>数据库迁移配置"]
|
||||
end
|
||||
subgraph "基础设施"
|
||||
DB["PostgreSQL 15<br/>健康检查: pg_isready"]
|
||||
REDIS["Redis 7<br/>健康检查: redis-cli ping"]
|
||||
end
|
||||
FE_Docker --> |"3000"| BE_Main
|
||||
BE_Docker --> |"8000"| BE_Main
|
||||
BE_Main --> BE_DB
|
||||
BE_Main --> REDIS
|
||||
BE_DB --> DB
|
||||
BE_Sched --> BE_DB
|
||||
BE_Sched --> REDIS
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/alembic.ini:1-150](file://backend/alembic.ini#L1-L150)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/alembic.ini:1-150](file://backend/alembic.ini#L1-L150)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
|
||||
## 核心组件
|
||||
- 应用入口与生命周期:后端通过FastAPI应用注册路由与CORS中间件,并在应用生命周期内启动定时任务调度器。
|
||||
- 数据库连接:使用SQLAlchemy异步引擎与会话工厂,支持异步事务与连接池配置。
|
||||
- 配置管理:通过Pydantic设置从.env文件读取配置,包含数据库URL、Redis URL、JWT密钥、浏览器驱动路径等。
|
||||
- 定时任务:基于APScheduler的异步调度器,按小时检查并执行到期的查询任务。
|
||||
- 数据库迁移:Alembic配置与环境脚本,支持离线与在线迁移,连接字符串来自配置。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/alembic.ini:1-150](file://backend/alembic.ini#L1-L150)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
|
||||
## 架构总览
|
||||
下图展示容器化部署下的服务交互与数据流:
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Dev as "开发者/CI"
|
||||
participant Compose as "docker-compose"
|
||||
participant FE as "前端容器(3000)"
|
||||
participant BE as "后端容器(8000)"
|
||||
participant DB as "PostgreSQL(5432)"
|
||||
participant R as "Redis(6379)"
|
||||
Dev->>Compose : 启动/停止/重建
|
||||
Compose->>BE : 依赖DB/Redis健康就绪
|
||||
Compose->>FE : 依赖后端可用
|
||||
FE->>BE : HTTP请求 /api/v1/*
|
||||
BE->>DB : 异步ORM读写
|
||||
BE->>R : 缓存/任务状态
|
||||
BE-->>FE : JSON响应
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 数据库与迁移(PostgreSQL + Alembic)
|
||||
- 连接与会话:异步引擎与会话工厂确保高并发下的连接复用与事务一致性。
|
||||
- 迁移策略:Alembic支持离线与在线迁移,连接字符串来自配置;日志级别可调以降低噪音。
|
||||
- 建议:
|
||||
- 在生产环境固定DATABASE_URL,避免硬编码。
|
||||
- 迁移前先做备份,使用在线迁移时控制窗口时间。
|
||||
- 将迁移脚本纳入版本控制并审查变更。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["开始"]) --> CheckCfg["读取DATABASE_URL配置"]
|
||||
CheckCfg --> Mode{"迁移模式"}
|
||||
Mode --> |离线| Offline["生成/执行离线迁移"]
|
||||
Mode --> |在线| Online["建立异步连接并执行迁移"]
|
||||
Offline --> Done(["结束"])
|
||||
Online --> Done
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/alembic.ini:86-90](file://backend/alembic.ini#L86-L90)
|
||||
- [backend/alembic/env.py:33-88](file://backend/alembic/env.py#L33-L88)
|
||||
- [backend/app/config.py:7-8](file://backend/app/config.py#L7-L8)
|
||||
|
||||
章节来源
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/alembic.ini:1-150](file://backend/alembic.ini#L1-L150)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
### 定时任务调度(APScheduler)
|
||||
- 触发周期:每小时检查一次到期查询。
|
||||
- 执行逻辑:查询状态为active且next_query_at小于等于当前UTC时间的任务,逐条交由引用引擎执行。
|
||||
- 错误处理:单任务异常不影响整体调度,日志记录错误并继续处理其他任务。
|
||||
- 建议:
|
||||
- 将调度器纳入应用生命周期,在应用启动时启动,退出时优雅关闭。
|
||||
- 对高频任务增加幂等性校验与去重机制。
|
||||
- 监控任务执行耗时与积压情况。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
S(["启动调度器"]) --> AddJob["添加每小时检查任务"]
|
||||
AddJob --> Loop{"每小时触发"}
|
||||
Loop --> Query["查询到期active任务"]
|
||||
Query --> Exec["逐条执行引用引擎"]
|
||||
Exec --> Log["记录结果/异常"]
|
||||
Log --> Loop
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/scheduler.py:30-90](file://backend/app/workers/scheduler.py#L30-L90)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
|
||||
### 健康检查与容器编排
|
||||
- 数据库健康检查:使用pg_isready,间隔与重试次数可调。
|
||||
- Redis健康检查:使用redis-cli ping。
|
||||
- 服务依赖:后端等待数据库与缓存健康后再启动,前端等待后端可用。
|
||||
- 建议:
|
||||
- 生产环境调整健康检查参数,避免误判。
|
||||
- 使用独立的健康检查端点(如/health)对外暴露,便于外部监控系统探测。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant DC as "docker-compose"
|
||||
participant DB as "PostgreSQL"
|
||||
participant R as "Redis"
|
||||
participant BE as "后端"
|
||||
DC->>DB : 健康检查(pg_isready)
|
||||
DB-->>DC : OK
|
||||
DC->>R : 健康检查(redis-cli ping)
|
||||
R-->>DC : PONG
|
||||
DC->>BE : 启动应用(等待DB/Redis健康)
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [docker-compose.yml:16-34](file://docker-compose.yml#L16-L34)
|
||||
- [docker-compose.yml:46-50](file://docker-compose.yml#L46-L50)
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
### 配置与环境变量
|
||||
- 关键配置项:数据库URL、Redis URL、JWT密钥、过期时间、Playwright浏览器路径、第三方平台API Key等。
|
||||
- 加载方式:通过Pydantic设置从.env文件加载,支持类型校验与默认值。
|
||||
- 建议:
|
||||
- 生产环境使用只读权限的数据库账号与专用密钥。
|
||||
- 将敏感信息放入密钥管理服务或编排平台的密文存储。
|
||||
- 分环境(.env.dev/.env.prod)隔离配置,避免混用。
|
||||
|
||||
章节来源
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
### 前后端镜像构建
|
||||
- 后端镜像:基于python:3.11-slim,安装Playwright所需系统依赖,预装Python依赖并安装浏览器驱动。
|
||||
- 前端镜像:基于node:20-alpine,使用npm ci安装依赖。
|
||||
- 建议:
|
||||
- 使用多阶段构建减少镜像体积。
|
||||
- 固定基础镜像版本,启用镜像扫描与漏洞告警。
|
||||
- 在CI中缓存依赖层以提升构建速度。
|
||||
|
||||
章节来源
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
|
||||
## 依赖分析
|
||||
- 组件耦合:
|
||||
- 后端主程序依赖数据库模块与调度器模块。
|
||||
- 调度器依赖数据库会话工厂与引用引擎。
|
||||
- Alembic依赖配置与模型元数据。
|
||||
- 外部依赖:
|
||||
- 数据库:PostgreSQL(异步驱动)。
|
||||
- 缓存:Redis。
|
||||
- 浏览器自动化:Playwright(chromium)。
|
||||
- 可能的改进:
|
||||
- 将调度器抽象为可注入的服务,便于测试与替换。
|
||||
- 将数据库连接参数与迁移配置进一步解耦。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
Main["app/main.py"] --> DBMod["app/database.py"]
|
||||
Main --> Sched["app/workers/scheduler.py"]
|
||||
Sched --> DBMod
|
||||
Alembic["alembic/env.py"] --> Cfg["app/config.py"]
|
||||
Alembic --> Models["app/models/*"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
## 性能考虑
|
||||
- 数据库性能:
|
||||
- 使用异步引擎与连接池,合理设置超时与空闲回收。
|
||||
- 为热点查询建立索引,避免全表扫描。
|
||||
- 控制事务粒度,减少长事务锁竞争。
|
||||
- 缓存策略:
|
||||
- 利用Redis缓存热点数据与任务状态,降低数据库压力。
|
||||
- 设置合理的TTL与淘汰策略,避免内存膨胀。
|
||||
- 定时任务:
|
||||
- 控制任务频率与执行时长,避免高峰期抖动。
|
||||
- 对任务执行结果进行统计与告警。
|
||||
- 前后端镜像:
|
||||
- 减少镜像层数与体积,缩短拉取与启动时间。
|
||||
- 使用CDN与静态资源优化。
|
||||
|
||||
## 故障排查指南
|
||||
- 健康检查失败
|
||||
- 检查数据库与Redis健康检查命令是否可用。
|
||||
- 查看容器日志,确认端口映射与网络连通。
|
||||
- 数据库连接异常
|
||||
- 核对DATABASE_URL格式与凭据。
|
||||
- 检查PostgreSQL日志与连接数上限。
|
||||
- 定时任务未执行
|
||||
- 确认调度器已启动且未报错。
|
||||
- 检查任务状态与next_query_at字段。
|
||||
- 前端无法访问后端
|
||||
- 核对CORS配置与路由前缀。
|
||||
- 检查后端/health端点返回状态。
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:16-34](file://docker-compose.yml#L16-L34)
|
||||
- [backend/app/main.py:30-36](file://backend/app/main.py#L30-L36)
|
||||
- [backend/app/workers/scheduler.py:30-90](file://backend/app/workers/scheduler.py#L30-L90)
|
||||
|
||||
## 结论
|
||||
本指南基于仓库现有配置与代码,总结了GEO平台的运维关注点与最佳实践建议。建议在生产环境中进一步完善配置管理、安全加固、监控告警与自动化流程,以保障系统的稳定性与可维护性。
|
||||
|
||||
## 附录
|
||||
|
||||
### 日常运维任务清单
|
||||
- 定期备份策略
|
||||
- 数据库:使用pg_dump/pg_restore进行增量/全量备份,保留至少7天滚动备份。
|
||||
- 持久化卷:定期快照或导出postgres_data与redis_data。
|
||||
- 数据库维护
|
||||
- 清理历史日志与临时表,重建索引,更新统计信息。
|
||||
- 监控慢查询与锁等待,定位瓶颈。
|
||||
- 系统更新流程
|
||||
- 先在测试环境验证镜像与配置变更,再灰度到生产。
|
||||
- 使用滚动更新与健康检查,确保零停机。
|
||||
|
||||
### 安全运维实践
|
||||
- 漏洞扫描
|
||||
- 镜像扫描:在CI中集成镜像漏洞扫描工具。
|
||||
- 依赖扫描:定期扫描Python与Node依赖的安全问题。
|
||||
- 安全补丁管理
|
||||
- 基础镜像与依赖库定期升级,优先修复高危漏洞。
|
||||
- 访问审计
|
||||
- 开启数据库与应用日志审计,记录敏感操作。
|
||||
- 限制数据库连接权限,最小化管理员账号使用。
|
||||
|
||||
### 故障恢复流程
|
||||
- 灾难恢复计划
|
||||
- 明确RTO/RPO目标,准备多活或多副本方案。
|
||||
- 数据恢复
|
||||
- 从最近备份恢复,核对时间点一致性。
|
||||
- 系统回滚策略
|
||||
- 采用蓝绿/金丝雀发布,失败自动回滚。
|
||||
- 回滚前做好配置与数据备份。
|
||||
|
||||
### 容量规划与扩容策略
|
||||
- 水平扩展
|
||||
- 前后端服务均可横向扩展,注意会话与共享状态的处理。
|
||||
- 垂直扩展
|
||||
- 根据CPU/内存与I/O使用率评估,逐步提升实例规格。
|
||||
- 决策因素
|
||||
- QPS、响应时间、错误率、资源利用率阈值。
|
||||
|
||||
### 运维自动化与CI/CD
|
||||
- 自动化脚本
|
||||
- 备份脚本:封装pg_dump与卷快照命令。
|
||||
- 部署脚本:封装docker-compose拉起/回滚流程。
|
||||
- CI/CD流水线
|
||||
- 触发条件:push到主分支或打标签。
|
||||
- 步骤:代码检查、单元测试、镜像构建、安全扫描、部署、健康检查。
|
||||
- 告警:失败通知与自动回滚。
|
||||
|
||||
### 运维团队职责与应急响应
|
||||
- 职责分工
|
||||
- 基础设施:负责容器编排、网络与存储。
|
||||
- 应用运维:负责后端服务、数据库与缓存。
|
||||
- 安全与合规:负责漏洞扫描、密钥与审计。
|
||||
- 应急响应
|
||||
- 建立值班制度与应急预案,明确升级路径。
|
||||
- 使用统一告警平台,确保信息透明与快速处置。
|
||||
|
||||
### 成本优化与监控
|
||||
- 成本优化
|
||||
- 合理选择实例规格与存储类型,避免过度配置。
|
||||
- 使用预留实例或批量折扣,优化闲置资源。
|
||||
- 资源利用率监控
|
||||
- CPU/内存/磁盘/网络IO与数据库连接数。
|
||||
- 业务指标:QPS、P95/P99延迟、错误率、任务积压。
|
||||
|
|
@ -0,0 +1,418 @@
|
|||
# 部署与运维
|
||||
|
||||
<cite>
|
||||
**本文引用的文件**
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [backend/Dockerfile](file://backend/Dockerfile)
|
||||
- [frontend/Dockerfile](file://frontend/Dockerfile)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
- [frontend/package.json](file://frontend/package.json)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/alembic.ini](file://backend/alembic.ini)
|
||||
- [backend/alembic/env.py](file://backend/alembic/env.py)
|
||||
- [tests/test_auth.py](file://tests/test_auth.py)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件面向GEO项目的部署与运维团队,提供从开发到生产的完整落地指南。内容覆盖Docker容器化部署、镜像构建、服务编排与环境配置;生产部署策略(Nginx反向代理、SSL证书、负载均衡);监控与日志管理(健康检查、错误追踪、性能监控);运维最佳实践(备份、安全、故障恢复)以及CI/CD流水线与自动化部署建议。文档严格基于仓库现有配置与实现进行说明,避免臆测。
|
||||
|
||||
## 项目结构
|
||||
GEO采用前后端分离架构,后端为FastAPI应用,前端为Next.js应用,数据库使用PostgreSQL,缓存使用Redis。通过Docker Compose进行本地开发与演示环境编排,支持数据库、Redis、后端API与前端开发服务器的统一启动与依赖管理。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "容器编排"
|
||||
DC["docker-compose.yml"]
|
||||
end
|
||||
subgraph "后端服务"
|
||||
BE["backend/Dockerfile"]
|
||||
API["backend/app/main.py"]
|
||||
CFG["backend/app/config.py"]
|
||||
DB["backend/app/database.py"]
|
||||
SCH["backend/app/workers/scheduler.py"]
|
||||
ALEMBIC["backend/alembic/*"]
|
||||
end
|
||||
subgraph "前端服务"
|
||||
FE["frontend/Dockerfile"]
|
||||
PKG["frontend/package.json"]
|
||||
end
|
||||
subgraph "基础设施"
|
||||
PG["PostgreSQL:5432"]
|
||||
RD["Redis:6379"]
|
||||
end
|
||||
DC --> BE
|
||||
DC --> FE
|
||||
DC --> PG
|
||||
DC --> RD
|
||||
BE --> API
|
||||
API --> DB
|
||||
API --> SCH
|
||||
DB --> PG
|
||||
SCH --> RD
|
||||
FE --> API
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/alembic.ini:1-150](file://backend/alembic.ini#L1-L150)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
|
||||
## 核心组件
|
||||
- 后端API(FastAPI)
|
||||
- 应用入口与生命周期管理、路由注册、CORS中间件、健康检查端点。
|
||||
- 配置加载与数据库连接、Redis连接、定时任务调度器。
|
||||
- 数据库(PostgreSQL)
|
||||
- 异步SQLAlchemy引擎、会话管理、模型定义与索引。
|
||||
- 缓存(Redis)
|
||||
- 用于任务调度与状态存储等场景。
|
||||
- 前端(Next.js)
|
||||
- 开发模式运行,与后端API交互。
|
||||
- 迁移与版本控制(Alembic)
|
||||
- 异步迁移环境、配置文件与日志级别。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/alembic.ini:1-150](file://backend/alembic.ini#L1-L150)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
|
||||
## 架构总览
|
||||
下图展示生产环境典型拓扑:Nginx作为反向代理与SSL终止,后端API以多实例运行,数据库与Redis作为共享资源,前端静态资源可由Nginx或CDN提供。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "外部访问"
|
||||
U["用户浏览器"]
|
||||
end
|
||||
subgraph "边缘层"
|
||||
NGINX["Nginx 反向代理<br/>SSL/TLS 终止"]
|
||||
end
|
||||
subgraph "应用层"
|
||||
LB["负载均衡器/反向代理"
|
||||
subgraph "后端实例"
|
||||
API1["API 实例 1"]
|
||||
API2["API 实例 2"]
|
||||
API3["API 实例 N"]
|
||||
end
|
||||
end
|
||||
end
|
||||
subgraph "数据与缓存"
|
||||
DB["PostgreSQL 主库/只读副本"]
|
||||
RDS["Redis 集群/哨兵"]
|
||||
end
|
||||
U --> NGINX
|
||||
NGINX --> LB
|
||||
LB --> API1
|
||||
LB --> API2
|
||||
LB --> API3
|
||||
API1 --> DB
|
||||
API2 --> DB
|
||||
API3 --> DB
|
||||
API1 --> RDS
|
||||
API2 --> RDS
|
||||
API3 --> RDS
|
||||
```
|
||||
|
||||
(本图为概念性架构示意,不对应具体源码文件)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 后端API(FastAPI)与容器化
|
||||
- 容器镜像构建要点
|
||||
- 基于Python slim镜像,安装Playwright所需系统依赖,并预装依赖后复制应用代码。
|
||||
- 暴露8000端口,使用Uvicorn启动应用。
|
||||
- 应用特性
|
||||
- 生命周期钩子在启动时初始化调度器,在关闭时优雅停机。
|
||||
- 注册认证、查询、引用数据、报告等路由前缀。
|
||||
- 提供/CORS允许来自前端开发地址的跨域请求。
|
||||
- 提供健康检查端点。
|
||||
- 配置与连接
|
||||
- 通过环境变量加载数据库URL、Redis URL、JWT密钥、平台API密钥等。
|
||||
- 数据库连接为异步引擎,使用会话工厂管理事务。
|
||||
- 定时任务
|
||||
- 使用APScheduler异步调度器,每小时检查到期查询并调用引用引擎执行。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as "容器"
|
||||
participant U as "Uvicorn"
|
||||
participant A as "FastAPI 应用"
|
||||
participant S as "调度器"
|
||||
participant D as "数据库"
|
||||
participant R as "Redis"
|
||||
C->>U : "启动进程"
|
||||
U->>A : "加载应用与生命周期"
|
||||
A->>S : "启动调度器"
|
||||
A->>D : "建立异步连接"
|
||||
A->>R : "建立连接"
|
||||
A-->>C : "健康检查端点可用"
|
||||
S->>D : "周期性查询待执行任务"
|
||||
S->>A : "触发执行逻辑"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
|
||||
章节来源
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
|
||||
### 前端(Next.js)与容器化
|
||||
- 容器镜像构建要点
|
||||
- 基于Node Alpine镜像,安装依赖后复制应用代码。
|
||||
- 暴露3000端口,开发模式启动。
|
||||
- 与后端交互
|
||||
- 默认CORS允许来自前端开发地址的请求,生产环境需根据域名调整。
|
||||
|
||||
章节来源
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [backend/app/main.py:30-36](file://backend/app/main.py#L30-L36)
|
||||
|
||||
### 数据库与迁移(PostgreSQL + Alembic)
|
||||
- 连接与会话
|
||||
- 使用异步引擎与会话工厂,提供依赖注入式数据库会话。
|
||||
- 迁移
|
||||
- 配置文件指定数据库URL,环境脚本支持离线与在线迁移,异步连接迁移上下文。
|
||||
- 模型与索引
|
||||
- 查询模型包含用户外键、关键词、平台列表、频率、状态、时间戳等字段,并建立复合索引以优化查询。
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
USERS {
|
||||
uuid id PK
|
||||
string email UK
|
||||
string name
|
||||
string plan
|
||||
int max_queries
|
||||
boolean is_active
|
||||
timestamp created_at
|
||||
timestamp updated_at
|
||||
}
|
||||
QUERIES {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
timestamp last_queried_at
|
||||
timestamp next_query_at
|
||||
timestamp created_at
|
||||
timestamp updated_at
|
||||
}
|
||||
CITATION_RECORDS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
jsonb raw_data
|
||||
timestamp created_at
|
||||
}
|
||||
QUERY_TASKS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
string status
|
||||
timestamp scheduled_at
|
||||
timestamp executed_at
|
||||
}
|
||||
USERS ||--o{ QUERIES : "拥有"
|
||||
QUERIES ||--o{ CITATION_RECORDS : "生成"
|
||||
QUERIES ||--o{ QUERY_TASKS : "拆分任务"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/alembic.ini:86-89](file://backend/alembic.ini#L86-L89)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
|
||||
章节来源
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/alembic.ini:1-150](file://backend/alembic.ini#L1-L150)
|
||||
- [backend/alembic/env.py:1-89](file://backend/alembic/env.py#L1-L89)
|
||||
|
||||
### 认证与安全(JWT)
|
||||
- 接口
|
||||
- 提供注册、登录、获取当前用户信息的接口。
|
||||
- 安全要点
|
||||
- JWT密钥默认值仅用于开发,生产必须替换。
|
||||
- CORS在开发环境允许前端地址,生产需限定来源。
|
||||
|
||||
章节来源
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/config.py:9-9](file://backend/app/config.py#L9-L9)
|
||||
|
||||
### 编排与健康检查(Docker Compose)
|
||||
- 服务编排
|
||||
- db、redis、backend、frontend四类服务,分别映射端口、挂载卷、加载环境文件。
|
||||
- 健康检查
|
||||
- PostgreSQL与Redis提供健康检查探针,后端API提供/CORS与健康检查端点。
|
||||
- 依赖顺序
|
||||
- 后端等待数据库与Redis健康后再启动,前端依赖后端。
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/app/main.py:45-48](file://backend/app/main.py#L45-L48)
|
||||
|
||||
## 依赖分析
|
||||
- 后端依赖
|
||||
- Web框架、数据库、配置校验、认证加密、缓存与调度、浏览器自动化、HTTP客户端、测试工具等。
|
||||
- 前端依赖
|
||||
- Next.js、React、UI组件库、图表库、类型与样式工具等。
|
||||
- 运行时依赖
|
||||
- Playwright Chromium浏览器、系统库满足其运行需求。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
subgraph "后端"
|
||||
REQ["requirements.txt"]
|
||||
PY["Python 3.11"]
|
||||
PLT["Playwright"]
|
||||
end
|
||||
subgraph "前端"
|
||||
PKGJSON["package.json"]
|
||||
NODE["Node 20"]
|
||||
end
|
||||
REQ --> PY
|
||||
REQ --> PLT
|
||||
PKGJSON --> NODE
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
|
||||
章节来源
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
|
||||
## 性能考虑
|
||||
- 数据库
|
||||
- 查询模型已建立多列索引,建议结合慢查询日志与执行计划持续优化。
|
||||
- 缓存
|
||||
- Redis用于任务调度与临时状态,建议在高并发场景下启用持久化与主从复制。
|
||||
- API
|
||||
- 使用异步数据库连接与事件循环,减少阻塞;对高频接口开启限流与熔断。
|
||||
- 前端
|
||||
- 生产构建与静态资源缓存策略,配合CDN提升加载速度。
|
||||
- 容器
|
||||
- 后端与前端镜像体积较小,建议启用只读根文件系统与最小权限运行。
|
||||
|
||||
(本节为通用指导,不直接分析具体文件)
|
||||
|
||||
## 故障排查指南
|
||||
- 健康检查
|
||||
- 后端/CORS与健康检查端点可用于快速判断服务可用性。
|
||||
- 日志
|
||||
- 后端使用标准库日志,Alembic配置了日志级别;建议在生产中接入集中式日志收集。
|
||||
- 数据库迁移
|
||||
- 使用异步迁移环境,确保数据库URL正确;离线/在线迁移均可通过配置切换。
|
||||
- 认证问题
|
||||
- 生产需替换JWT密钥;CORS来源需按域名精确配置。
|
||||
- 单元测试
|
||||
- 认证模块测试覆盖注册、登录与当前用户接口,可作为回归测试基线。
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:45-48](file://backend/app/main.py#L45-L48)
|
||||
- [backend/alembic.ini:115-150](file://backend/alembic.ini#L115-L150)
|
||||
- [backend/alembic/env.py:64-89](file://backend/alembic/env.py#L64-L89)
|
||||
- [tests/test_auth.py:1-104](file://tests/test_auth.py#L1-L104)
|
||||
|
||||
## 结论
|
||||
本部署与运维文档基于仓库现有配置,给出了从容器化到生产部署的实施路径与最佳实践建议。建议在生产环境中补充Nginx反向代理与SSL、集中化日志与监控、完善的备份与灾难恢复策略,并通过CI/CD实现自动化部署与回滚。
|
||||
|
||||
## 附录
|
||||
|
||||
### A. Docker容器化部署流程(开发/演示)
|
||||
- 准备工作
|
||||
- 确保已安装Docker与Docker Compose。
|
||||
- 启动服务
|
||||
- 在仓库根目录执行编排命令,等待所有服务进入健康状态。
|
||||
- 访问应用
|
||||
- 前端:http://localhost:3000
|
||||
- 后端:http://localhost:8000
|
||||
- 数据库:localhost:5432
|
||||
- Redis:localhost:6379
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
### B. 生产环境部署策略
|
||||
- 反向代理与SSL
|
||||
- 使用Nginx作为反向代理,开启HTTPS并配置证书自动续期。
|
||||
- 负载均衡
|
||||
- 将后端API以多实例部署并通过LB分发流量,确保健康检查与自动扩缩容。
|
||||
- 环境隔离
|
||||
- 区分开发、测试、预发布与生产环境,严格管理环境变量与密钥。
|
||||
|
||||
(本节为通用指导,不直接分析具体文件)
|
||||
|
||||
### C. 监控与日志管理
|
||||
- 健康检查
|
||||
- 利用/CORS与健康检查端点,结合探针实现自动发现与告警。
|
||||
- 错误追踪
|
||||
- 在应用中集成结构化日志,输出到标准输出,由平台收集至集中式系统。
|
||||
- 性能监控
|
||||
- 关注数据库慢查询、Redis命中率、API响应时间与错误率。
|
||||
|
||||
(本节为通用指导,不直接分析具体文件)
|
||||
|
||||
### D. 运维最佳实践
|
||||
- 备份策略
|
||||
- 数据库与Redis定期快照与归档,验证恢复流程。
|
||||
- 安全配置
|
||||
- 最小权限原则、密钥轮换、网络隔离、入站/出站策略。
|
||||
- 故障恢复
|
||||
- 制定回滚预案与演练计划,确保快速恢复。
|
||||
|
||||
(本节为通用指导,不直接分析具体文件)
|
||||
|
||||
### E. CI/CD流水线与自动化部署
|
||||
- 建议阶段
|
||||
- 代码提交触发测试(含认证接口测试),通过后构建镜像并推送制品库,随后部署到目标环境。
|
||||
- 回滚机制
|
||||
- 支持一键回滚至上一个稳定版本。
|
||||
- 配置管理
|
||||
- 环境变量与密钥通过安全渠道注入,避免硬编码。
|
||||
|
||||
(本节为通用指导,不直接分析具体文件)
|
||||
|
|
@ -0,0 +1,390 @@
|
|||
# 快速开始
|
||||
|
||||
<cite>
|
||||
**本文引用的文件**
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [backend/Dockerfile](file://backend/Dockerfile)
|
||||
- [frontend/Dockerfile](file://frontend/Dockerfile)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
- [frontend/package.json](file://frontend/package.json)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/alembic.ini](file://backend/alembic.ini)
|
||||
- [backend/alembic/env.py](file://backend/alembic/env.py)
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [frontend/README.md](file://frontend/README.md)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [前置条件与环境准备](#前置条件与环境准备)
|
||||
4. [一键启动(Docker Compose)](#一键启动docker-compose)
|
||||
5. [首次运行与基本操作](#首次运行与基本操作)
|
||||
6. [架构概览](#架构概览)
|
||||
7. [组件详解](#组件详解)
|
||||
8. [依赖关系分析](#依赖关系分析)
|
||||
9. [性能与资源建议](#性能与资源建议)
|
||||
10. [故障排除](#故障排除)
|
||||
11. [结语](#结语)
|
||||
|
||||
## 简介
|
||||
本指南面向首次接触 GEO 项目的用户,帮助你在最短时间内完成环境准备、依赖安装、数据库初始化与容器编排,并体验核心功能:用户注册、创建查询任务、查看结果与图表。文档严格基于仓库中的实际配置与代码,提供可复现的命令行步骤与预期行为说明。
|
||||
|
||||
## 项目结构
|
||||
项目采用前后端分离的多容器架构,通过 Docker Compose 统一编排:
|
||||
- 后端:FastAPI 应用,提供认证、查询词、引用数据、报告等接口;内置定时任务调度器与浏览器自动化能力。
|
||||
- 前端:Next.js 应用,提供登录、注册、仪表盘、查询管理、引用与报告可视化界面。
|
||||
- 数据库:PostgreSQL(15),使用 Alembic 进行迁移管理。
|
||||
- 缓存:Redis(7),用于任务队列与缓存。
|
||||
- Playwright:用于浏览器自动化(爬虫/抓取)场景。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "容器编排"
|
||||
DB["PostgreSQL 容器"]
|
||||
REDIS["Redis 容器"]
|
||||
BE["后端容器"]
|
||||
FE["前端容器"]
|
||||
end
|
||||
FE --> |"HTTP 3000"| BE
|
||||
BE --> |"数据库 5432"| DB
|
||||
BE --> |"缓存 6379"| REDIS
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
**章节来源**
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
## 前置条件与环境准备
|
||||
- 操作系统:Linux/macOS/Windows(WSL2 推荐)
|
||||
- Docker 与 Docker Compose:用于一键编排
|
||||
- Git:克隆仓库
|
||||
- 文本编辑器或 IDE:可选,用于查看/修改配置
|
||||
|
||||
说明
|
||||
- 本项目未要求本地安装 Python 3.9+ 或 Node.js,因为所有运行时均在容器内完成。
|
||||
- 若你希望在本地直接运行后端或前端,请参考各自镜像版本:
|
||||
- 后端镜像基于 Python 3.11(容器内运行)
|
||||
- 前端镜像基于 Node.js 20(容器内运行)
|
||||
|
||||
**章节来源**
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
|
||||
## 一键启动(Docker Compose)
|
||||
以下为完整的一键启动流程,涵盖容器编排、服务启动顺序、端口映射与健康检查。
|
||||
|
||||
步骤
|
||||
1. 克隆仓库
|
||||
- git clone <仓库地址>
|
||||
- cd GEO
|
||||
|
||||
2. 构建并启动容器
|
||||
- docker compose up -d
|
||||
|
||||
3. 查看服务状态
|
||||
- docker compose ps
|
||||
- 预期:db、redis、backend、frontend 均为 Up(healthy)
|
||||
|
||||
4. 访问应用
|
||||
- 前端:http://localhost:3000
|
||||
- 后端:http://localhost:8000(健康检查接口 /health 可用)
|
||||
|
||||
5. 停止与清理
|
||||
- docker compose down
|
||||
- 如需清理数据卷:docker compose down -v
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Dev as "开发者"
|
||||
participant Compose as "Docker Compose"
|
||||
participant DB as "PostgreSQL 容器"
|
||||
participant Redis as "Redis 容器"
|
||||
participant Backend as "后端容器"
|
||||
participant Frontend as "前端容器"
|
||||
Dev->>Compose : "docker compose up -d"
|
||||
Compose->>DB : "启动并健康检查"
|
||||
Compose->>Redis : "启动并健康检查"
|
||||
Compose->>Backend : "等待 DB/Redis 健康后启动"
|
||||
Backend->>DB : "连接数据库Alembic 已内置迁移"
|
||||
Compose->>Frontend : "启动并监听 3000"
|
||||
Frontend->>Backend : "请求 /api/v1/*"
|
||||
Backend-->>Frontend : "返回 JSON 响应"
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/app/main.py:45-48](file://backend/app/main.py#L45-L48)
|
||||
|
||||
**章节来源**
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [frontend/README.md:5-17](file://frontend/README.md#L5-L17)
|
||||
|
||||
## 首次运行与基本操作
|
||||
首次启动后,你可以进行如下操作以体验核心功能:
|
||||
|
||||
1. 用户注册与登录
|
||||
- 打开 http://localhost:3000
|
||||
- 使用注册页面创建账户
|
||||
- 登录后进入仪表盘
|
||||
|
||||
2. 创建查询任务
|
||||
- 在“查询”页面新建查询词,设置关键词、目标品牌、平台(默认包含 wenxin/kimi)、频率(日/周/月)
|
||||
- 提交后,后端调度器会按频率与时间窗口自动执行
|
||||
|
||||
3. 查看结果
|
||||
- “引用数据”页面查看各平台的引用检测结果(是否被提及、置信度、上下文片段等)
|
||||
- “报告”页面查看趋势与平台分布图表
|
||||
|
||||
4. 观察后台任务
|
||||
- 后端启动时会加载调度器,每小时检查一次到期的查询任务
|
||||
- 任务状态会在数据库中持久化,供前端展示
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["提交查询任务"]) --> Schedule["写入数据库<br/>设置 next_query_at"]
|
||||
Schedule --> Timer["定时器每小时触发"]
|
||||
Timer --> Check{"查询是否到期?"}
|
||||
Check --> |否| Wait["等待下个小时"]
|
||||
Check --> |是| Run["执行查询多平台"]
|
||||
Run --> Detect["品牌匹配与竞争品牌检测"]
|
||||
Detect --> Persist["写入引用记录与任务状态"]
|
||||
Persist --> Report["生成可视化数据"]
|
||||
Report --> End(["前端展示结果"])
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:38-42](file://backend/app/main.py#L38-L42)
|
||||
- [backend/app/workers/scheduler.py:30-40](file://backend/app/workers/scheduler.py#L30-L40)
|
||||
- [backend/app/workers/citation_engine.py:148-170](file://backend/app/workers/citation_engine.py#L148-L170)
|
||||
|
||||
## 架构概览
|
||||
整体架构由容器编排层、后端服务层、前端服务层、数据库与缓存组成。后端通过 Alembic 管理数据库迁移,使用 Redis 存储任务状态,使用 Playwright 进行浏览器自动化。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
FE["前端应用<br/>Next.js"] --> API["后端 API<br/>FastAPI"]
|
||||
API --> DB["数据库<br/>PostgreSQL"]
|
||||
API --> CACHE["缓存<br/>Redis"]
|
||||
API --> WORKER["任务执行器<br/>CitationEngine"]
|
||||
WORKER --> PLATFORMS["平台适配器<br/>Kimi/Wenxin"]
|
||||
API --> SCHED["调度器<br/>APScheduler"]
|
||||
SCHED --> API
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/config.py:7-13](file://backend/app/config.py#L7-L13)
|
||||
- [backend/app/workers/scheduler.py:25-39](file://backend/app/workers/scheduler.py#L25-L39)
|
||||
- [backend/app/workers/citation_engine.py:148-157](file://backend/app/workers/citation_engine.py#L148-L157)
|
||||
|
||||
## 组件详解
|
||||
|
||||
### 后端应用入口与路由
|
||||
- 应用生命周期:启动时初始化模型与调度器,关闭时优雅停机
|
||||
- 路由模块:认证、查询词、引用数据、报告、即时执行查询
|
||||
- CORS:允许前端 localhost:3000 访问
|
||||
- 健康检查:/health 返回状态
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:13-47](file://backend/app/main.py#L13-L47)
|
||||
|
||||
### 配置与连接
|
||||
- 配置来源:.env 文件(通过 pydantic-settings 加载)
|
||||
- 默认数据库:postgresql+asyncpg://postgres:postgres123@db:5432/geo_platform
|
||||
- 默认缓存:redis://redis:6379/0
|
||||
- Playwright 浏览器路径:/ms-playwright
|
||||
- JWT 密钥与过期时间:开发默认值(生产请务必修改)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/config.py:4-16](file://backend/app/config.py#L4-L16)
|
||||
|
||||
### 数据库与迁移
|
||||
- 引擎与会话:基于 SQLAlchemy AsyncEngine 与 async session
|
||||
- 迁移:Alembic 管理,初始迁移包含 users、queries、citation_records、query_tasks、subscriptions 表
|
||||
- 运行方式:Compose 启动时由后端容器自动执行迁移(见后端 Dockerfile 中的安装与命令)
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
USERS {
|
||||
uuid id PK
|
||||
string email UK
|
||||
string password_hash
|
||||
string name
|
||||
string plan
|
||||
int max_queries
|
||||
bool is_active
|
||||
timestamp created_at
|
||||
timestamp updated_at
|
||||
}
|
||||
QUERIES {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
timestamp last_queried_at
|
||||
timestamp next_query_at
|
||||
timestamp created_at
|
||||
timestamp updated_at
|
||||
}
|
||||
CITATION_RECORDS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
bool cited
|
||||
int citation_position
|
||||
text citation_text
|
||||
jsonb competitor_brands
|
||||
text raw_response
|
||||
timestamp queried_at
|
||||
}
|
||||
QUERY_TASKS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
string status
|
||||
text error_message
|
||||
timestamp scheduled_at
|
||||
timestamp started_at
|
||||
timestamp completed_at
|
||||
}
|
||||
SUBSCRIPTIONS {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string plan
|
||||
string status
|
||||
date start_date
|
||||
date end_date
|
||||
numeric amount
|
||||
string payment_method
|
||||
string payment_id
|
||||
timestamp created_at
|
||||
}
|
||||
USERS ||--o{ QUERIES : "拥有"
|
||||
QUERIES ||--o{ CITATION_RECORDS : "产生"
|
||||
QUERIES ||--o{ QUERY_TASKS : "驱动"
|
||||
USERS ||--o{ SUBSCRIPTIONS : "订阅"
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [backend/alembic/versions/488d0bd5ab01_initial_migration.py:21-111](file://backend/alembic/versions/488d0bd5ab01_initial_migration.py#L21-L111)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/database.py:6-28](file://backend/app/database.py#L6-L28)
|
||||
- [backend/alembic/env.py:10-25](file://backend/alembic/env.py#L10-L25)
|
||||
- [backend/alembic.ini:89-89](file://backend/alembic.ini#L89-L89)
|
||||
|
||||
### 任务调度与执行
|
||||
- 调度器:APScheduler AsyncIOScheduler,每小时检查到期查询
|
||||
- 执行器:CitationEngine,遍历查询平台,执行品牌匹配与竞争品牌检测,写入记录并更新查询时间
|
||||
- 平台适配器:Kimi 与 Wenxin(通过外部平台 API)
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class QueryScheduler {
|
||||
+start()
|
||||
+check_and_execute_queries()
|
||||
+shutdown()
|
||||
}
|
||||
class CitationEngine {
|
||||
+execute_query(query, db)
|
||||
+execute_single_platform(keyword, platform, target_brand, brand_aliases)
|
||||
+close()
|
||||
}
|
||||
class KimiAdapter {
|
||||
+query(keyword)
|
||||
+close()
|
||||
}
|
||||
class WenxinAdapter {
|
||||
+query(keyword)
|
||||
+close()
|
||||
}
|
||||
QueryScheduler --> CitationEngine : "调用执行"
|
||||
CitationEngine --> KimiAdapter : "平台适配"
|
||||
CitationEngine --> WenxinAdapter : "平台适配"
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-157](file://backend/app/workers/citation_engine.py#L148-L157)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/scheduler.py:30-89](file://backend/app/workers/scheduler.py#L30-L89)
|
||||
- [backend/app/workers/citation_engine.py:159-308](file://backend/app/workers/citation_engine.py#L159-L308)
|
||||
|
||||
## 依赖关系分析
|
||||
- 后端依赖:FastAPI、SQLAlchemy、asyncpg、Alembic、Pydantic、Passlib、Redis、APScheduler、Playwright、httpx、python-dotenv、pytest 等
|
||||
- 前端依赖:Next.js、NextAuth、Radix UI、Recharts、Tailwind 等
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
BE_REQ["后端依赖<br/>requirements.txt"] --> BE_IMG["后端镜像<br/>Python 3.11"]
|
||||
FE_PKG["前端依赖<br/>package.json"] --> FE_IMG["前端镜像<br/>Node.js 20"]
|
||||
BE_IMG --> BE_RUN["后端运行时"]
|
||||
FE_IMG --> FE_RUN["前端运行时"]
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:11-38](file://frontend/package.json#L11-L38)
|
||||
- [backend/Dockerfile:28-39](file://backend/Dockerfile#L28-L39)
|
||||
- [frontend/Dockerfile:6-14](file://frontend/Dockerfile#L6-L14)
|
||||
|
||||
**章节来源**
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:11-38](file://frontend/package.json#L11-L38)
|
||||
|
||||
## 性能与资源建议
|
||||
- CPU/内存:建议至少 2 核心 4GB 内存,以便同时运行 PostgreSQL、Redis、后端与前端
|
||||
- 磁盘:为持久化卷预留足够空间(PostgreSQL 与 Redis 数据目录)
|
||||
- 网络:Playwright 需要访问外部平台(Kimi/Wenxin),请确保网络连通性
|
||||
- 日志:后端默认关闭 SQL echo,若调试数据库问题可临时开启
|
||||
|
||||
[本节为通用建议,无需特定文件引用]
|
||||
|
||||
## 故障排除
|
||||
常见问题与解决思路
|
||||
- 容器无法启动或健康检查失败
|
||||
- 检查端口占用:确认 5432、6379、8000、3000 未被占用
|
||||
- 查看日志:docker compose logs db|redis|backend|frontend
|
||||
- 重启服务:docker compose restart db|redis|backend|frontend
|
||||
|
||||
- 前端无法访问或跨域报错
|
||||
- 确认前端已启动并监听 3000
|
||||
- 后端 CORS 已允许 http://localhost:3000,若自定义域名请在后端 CORS 配置中添加
|
||||
|
||||
- 数据库连接失败
|
||||
- 确认 .env 中 DATABASE_URL 与 docker-compose 中的 db 服务一致
|
||||
- 确认数据库初始化已完成(首次启动后端会自动执行迁移)
|
||||
|
||||
- Playwright 报错或浏览器不可用
|
||||
- 确认后端镜像已安装 Playwright 并完成浏览器安装
|
||||
- 如需离线部署,可在 .env 中配置 Playwright 浏览器路径
|
||||
|
||||
- 任务未执行或结果为空
|
||||
- 检查查询频率与 next_query_at 是否正确
|
||||
- 查看查询任务表状态(pending/running/success/failed)
|
||||
|
||||
**章节来源**
|
||||
- [docker-compose.yml:16-20](file://docker-compose.yml#L16-L20)
|
||||
- [backend/app/main.py:30-36](file://backend/app/main.py#L30-L36)
|
||||
- [backend/app/config.py:7-11](file://backend/app/config.py#L7-L11)
|
||||
- [backend/Dockerfile:31-33](file://backend/Dockerfile#L31-L33)
|
||||
- [backend/app/workers/scheduler.py:51-75](file://backend/app/workers/scheduler.py#L51-L75)
|
||||
|
||||
## 结语
|
||||
按照本指南完成 Docker Compose 一键启动后,你即可在浏览器中完成注册、创建查询任务并查看结果与图表。若需进一步扩展(如接入更多平台、调整调度策略或优化前端交互),可基于现有架构与配置进行定制。
|
||||
|
|
@ -0,0 +1,348 @@
|
|||
# 技术栈
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [backend/Dockerfile](file://backend/Dockerfile)
|
||||
- [frontend/Dockerfile](file://frontend/Dockerfile)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
- [frontend/package.json](file://frontend/package.json)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/database.py](file://backend/app/database.py)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [frontend/tsconfig.json](file://frontend/tsconfig.json)
|
||||
- [frontend/tailwind.config.ts](file://frontend/tailwind.config.ts)
|
||||
- [frontend/.eslintrc.json](file://frontend/.eslintrc.json)
|
||||
- [frontend/next.config.mjs](file://frontend/next.config.mjs)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排除指南](#故障排除指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本技术栈文档面向GEO项目的开发者与运维人员,系统梳理了后端(FastAPI + Python 3.9+)、前端(Next.js 14 + TypeScript)、数据库(PostgreSQL)、缓存(Redis)以及容器化部署方案。文档重点解释各技术选型的原因与优势,并结合仓库中的实际配置文件,给出版本要求、兼容性信息与最佳实践建议,帮助团队快速理解并高效迭代。
|
||||
|
||||
## 项目结构
|
||||
项目采用前后端分离的多服务架构,通过 Docker Compose 编排数据库、缓存、后端API与前端应用,形成可独立开发、测试与部署的完整环境。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "本地开发环境"
|
||||
FE["前端应用<br/>Next.js 14 + TypeScript"]
|
||||
BE["后端API<br/>FastAPI + Python 3.11"]
|
||||
DB["数据库<br/>PostgreSQL 15"]
|
||||
RC["缓存<br/>Redis 7"]
|
||||
end
|
||||
FE --> |"HTTP/HTTPS"| BE
|
||||
BE --> DB
|
||||
BE --> RC
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
|
||||
**章节来源**
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
## 核心组件
|
||||
- 后端框架:FastAPI(高性能异步Web框架,自动生成OpenAPI文档,类型安全)
|
||||
- 数据库:PostgreSQL(企业级关系型数据库,支持事务与复杂查询)
|
||||
- 缓存:Redis(高性能键值存储,用于会话、限流与任务队列)
|
||||
- 前端框架:Next.js 14(App Router架构,SSR/SSG支持,TypeScript原生支持)
|
||||
- 异步ORM:SQLAlchemy 2.x(异步引擎与会话管理)
|
||||
- 任务调度:APScheduler(基于事件循环的异步调度器)
|
||||
- 浏览器自动化:Playwright(跨平台无头浏览器)
|
||||
- 开发工具链:TypeScript、Tailwind CSS、ESLint、PostCSS、Tailwind CSS插件
|
||||
|
||||
**章节来源**
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
|
||||
## 架构总览
|
||||
下图展示了从浏览器到后端API、数据库与缓存的整体交互流程,以及定时任务调度器在后台自动执行查询任务的机制。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
Browser["浏览器/移动端"] --> NextUI["Next.js 前端应用"]
|
||||
NextUI --> FastAPI["FastAPI 后端"]
|
||||
FastAPI --> SQLAlchemy["SQLAlchemy 异步ORM"]
|
||||
SQLAlchemy --> PostgreSQL["PostgreSQL 数据库"]
|
||||
FastAPI --> Redis["Redis 缓存"]
|
||||
FastAPI --> APS["APScheduler 定时任务"]
|
||||
APS --> Playwright["Playwright 浏览器自动化"]
|
||||
Playwright --> ThirdParty["第三方平台接口"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 后端:FastAPI + Python 3.11
|
||||
- 应用入口与生命周期:通过 lifespan 钩子在启动时初始化模型与调度器,在关闭时优雅停机。
|
||||
- 中间件与路由:启用CORS以允许前端localhost访问;按模块注册认证、查询、引用数据与报告相关路由。
|
||||
- 健康检查:提供 /health 接口便于容器编排健康检查。
|
||||
- 版本与依赖:FastAPI≥0.109.0、Uvicorn标准变体、Pydantic≥2.0、SQLAlchemy≥2.0、Redis、APScheduler≥3.10、HTTPX、Playwright≥1.40、pytest-asyncio等。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "客户端"
|
||||
participant API as "FastAPI 应用"
|
||||
participant Router as "路由处理器"
|
||||
participant DB as "异步数据库会话"
|
||||
participant ORM as "SQLAlchemy 模型"
|
||||
Client->>API : "HTTP 请求"
|
||||
API->>Router : "分发到对应路由"
|
||||
Router->>DB : "获取异步会话"
|
||||
DB->>ORM : "执行查询/更新"
|
||||
ORM-->>DB : "返回结果"
|
||||
DB-->>Router : "提交/回滚"
|
||||
Router-->>API : "序列化响应"
|
||||
API-->>Client : "HTTP 响应"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
|
||||
### 数据库与ORM:PostgreSQL + SQLAlchemy 异步
|
||||
- 连接配置:通过配置类集中管理 DATABASE_URL,使用 asyncpg 作为异步驱动。
|
||||
- 会话管理:定义异步引擎与异步会话工厂,提供依赖注入式数据库会话生成器。
|
||||
- 模型基类:统一的 declarative_base,便于扩展业务模型。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["应用启动"]) --> LoadCfg["加载数据库配置"]
|
||||
LoadCfg --> CreateEngine["创建异步引擎"]
|
||||
CreateEngine --> SessionFactory["创建异步会话工厂"]
|
||||
SessionFactory --> Request["请求到达"]
|
||||
Request --> GetSession["依赖注入获取会话"]
|
||||
GetSession --> ExecOp["执行ORM操作"]
|
||||
ExecOp --> Commit["提交事务"]
|
||||
Commit --> Close["关闭会话"]
|
||||
Close --> End(["请求结束"])
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
- [backend/app/database.py:1-29](file://backend/app/database.py#L1-L29)
|
||||
|
||||
### 缓存:Redis
|
||||
- 配置:通过 REDIS_URL 指向 redis:6379/0,容器网络内可达。
|
||||
- 用途:会话存储、任务状态缓存、限流与临时数据缓存(结合业务场景使用)。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
### 任务调度:APScheduler 异步调度器
|
||||
- 触发周期:每小时检查一次到期的查询任务。
|
||||
- 查询逻辑:筛选状态为 active 且 next_query_at 小于等于当前时间的任务。
|
||||
- 执行流程:逐条调用引用引擎执行查询,异常记录日志但不中断整体流程。
|
||||
- 关闭流程:优雅关闭调度器与引擎资源。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
S(["启动调度器"]) --> AddJob["添加定时任务每小时"]
|
||||
AddJob --> Loop{"事件循环存在?"}
|
||||
Loop --> |是| RunTask["在当前事件循环创建任务"]
|
||||
Loop --> |否| NewLoop["新建事件循环执行"]
|
||||
RunTask --> Check["查询数据库:查找到期任务"]
|
||||
NewLoop --> Check
|
||||
Check --> Found{"找到任务?"}
|
||||
Found --> |否| Wait["等待下次触发"]
|
||||
Found --> |是| Exec["逐条执行查询"]
|
||||
Exec --> Wait
|
||||
Wait --> AddJob
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
|
||||
### 前端:Next.js 14 + TypeScript
|
||||
- 框架版本:Next.js 14.2.35,使用 App Router 架构组织页面与布局。
|
||||
- 类型系统:TypeScript 5,严格模式与路径别名配置,确保类型安全。
|
||||
- UI与样式:Tailwind CSS 3.4.1,配合 tailwindcss-animate 插件实现动画与主题扩展。
|
||||
- 工具链:ESLint 8(Next.js核心Web Vitals与TypeScript规则),PostCSS,开发脚本(dev/build/start/lint)。
|
||||
- 认证:NextAuth.js 4.24.14,集成OAuth与会话管理。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
Next["Next.js 14 App Router"] --> TS["TypeScript 5"]
|
||||
Next --> Tailwind["Tailwind CSS 3.4.1"]
|
||||
Tailwind --> Animate["tailwindcss-animate"]
|
||||
Next --> ESLint["ESLint 8"]
|
||||
Next --> NextAuth["NextAuth.js 4.24.14"]
|
||||
Next --> Radix["Radix UI 组件库"]
|
||||
Next --> Recharts["Recharts 图表库"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [frontend/tsconfig.json:1-27](file://frontend/tsconfig.json#L1-L27)
|
||||
- [frontend/tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
- [frontend/.eslintrc.json:1-4](file://frontend/.eslintrc.json#L1-L4)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [frontend/tsconfig.json:1-27](file://frontend/tsconfig.json#L1-L27)
|
||||
- [frontend/tailwind.config.ts:1-57](file://frontend/tailwind.config.ts#L1-L57)
|
||||
- [frontend/.eslintrc.json:1-4](file://frontend/.eslintrc.json#L1-L4)
|
||||
- [frontend/next.config.mjs:1-5](file://frontend/next.config.mjs#L1-L5)
|
||||
|
||||
### 浏览器自动化:Playwright
|
||||
- 作用:支持对第三方平台进行无头浏览器模拟,满足动态内容抓取与登录态维护。
|
||||
- 容器内安装:Dockerfile 中安装 Chromium 并预装 Playwright 依赖。
|
||||
|
||||
**章节来源**
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [backend/requirements.txt:25-25](file://backend/requirements.txt#L25-L25)
|
||||
|
||||
## 依赖分析
|
||||
- 后端依赖:FastAPI、Uvicorn、SQLAlchemy 2.x、asyncpg、Alembic、Pydantic 2.x、Pydantic Settings、python-jose、passlib、redis、apscheduler、httpx、python-dotenv、pytest、pytest-asyncio、aiosqlite、playwright。
|
||||
- 前端依赖:Next.js 14、React 18、NextAuth.js、Radix UI、Lucide React、Recharts、Tailwind CSS、Tailwind Merge、Tailwind CSS Animate、TypeScript、ESLint、PostCSS、Tailwind CSS。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "后端"
|
||||
F["FastAPI"]
|
||||
S["SQLAlchemy 2.x"]
|
||||
R["Redis"]
|
||||
P["Playwright"]
|
||||
A["APScheduler"]
|
||||
end
|
||||
subgraph "前端"
|
||||
N["Next.js 14"]
|
||||
T["TypeScript"]
|
||||
TA["Tailwind CSS"]
|
||||
E["ESLint"]
|
||||
NA["NextAuth.js"]
|
||||
end
|
||||
N --> F
|
||||
F --> S
|
||||
F --> R
|
||||
F --> A
|
||||
F --> P
|
||||
N --> TA
|
||||
N --> E
|
||||
N --> NA
|
||||
N --> T
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
|
||||
**章节来源**
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
|
||||
## 性能考虑
|
||||
- 异步优先:后端使用异步ORM与异步调度器,减少阻塞,提升并发处理能力。
|
||||
- 事件循环:调度器在当前事件循环或新建事件循环中执行任务,避免阻塞主事件循环。
|
||||
- 缓存策略:Redis用于热点数据与会话缓存,降低数据库压力;需结合业务设计合理的过期策略。
|
||||
- 数据库连接:异步引擎与会话池化配置,避免频繁建立/断开连接。
|
||||
- 前端优化:Next.js 14 App Router支持服务端渲染与静态生成,结合Tailwind CSS减少打包体积与样式开销。
|
||||
|
||||
## 故障排除指南
|
||||
- 健康检查失败
|
||||
- 现象:容器未就绪或重启频繁。
|
||||
- 排查:查看 docker-compose 中 db 与 redis 的 healthcheck 配置,确认端口映射与环境变量正确。
|
||||
- CORS 问题
|
||||
- 现象:前端跨域请求被拒绝。
|
||||
- 排查:确认后端 CORS 配置允许前端地址(默认允许 http://localhost:3000)。
|
||||
- 数据库连接失败
|
||||
- 现象:应用启动时报数据库连接错误。
|
||||
- 排查:核对 DATABASE_URL 是否指向容器内的 db 服务,确认网络连通与凭据正确。
|
||||
- Redis 连接失败
|
||||
- 现象:缓存不可用或任务调度异常。
|
||||
- 排查:核对 REDIS_URL 指向容器内的 redis 服务,确认端口映射与权限设置。
|
||||
- Playwright 依赖问题
|
||||
- 现象:容器内无法启动浏览器或报缺少系统库。
|
||||
- 排查:确认 Dockerfile 中已安装 Playwright 依赖与浏览器,必要时重建镜像。
|
||||
- 前端热更新异常
|
||||
- 现象:修改代码后未生效。
|
||||
- 排查:确认前端容器挂载了 node_modules 与源码目录,检查端口映射与命令是否正确。
|
||||
|
||||
**章节来源**
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/app/main.py:30-36](file://backend/app/main.py#L30-L36)
|
||||
- [backend/app/config.py:7-8](file://backend/app/config.py#L7-L8)
|
||||
- [backend/Dockerfile:6-25](file://backend/Dockerfile#L6-L25)
|
||||
|
||||
## 结论
|
||||
GEO项目采用现代全栈技术栈:后端以 FastAPI 为核心,结合 SQLAlchemy 异步ORM与 APscheduler 实现高并发与可维护的任务调度;前端以 Next.js 14 为基础,配合 TypeScript、Tailwind CSS 与 NextAuth.js 构建现代化用户界面与认证体系;数据库与缓存分别采用 PostgreSQL 与 Redis,满足生产级数据持久化与缓存需求。通过 Docker Compose 实现一键编排,便于本地开发与部署。
|
||||
|
||||
## 附录
|
||||
|
||||
### 技术选型与优势
|
||||
- FastAPI
|
||||
- 优势:高性能、自动生成OpenAPI文档、类型驱动的接口校验、异步支持。
|
||||
- 适用:高并发API、微服务、可观测性与可测试性要求高的场景。
|
||||
- Next.js 14 + TypeScript
|
||||
- 优势:App Router架构、SSR/SSG、原生TypeScript支持、严格的类型约束。
|
||||
- 适用:现代Web应用、SEO友好、团队协作与质量保障。
|
||||
- PostgreSQL
|
||||
- 优势:ACID事务、复杂查询、扩展性强、生态完善。
|
||||
- 适用:数据一致性要求高、业务逻辑复杂的场景。
|
||||
- Redis
|
||||
- 优势:高性能KV存储、丰富的数据结构、发布订阅与事务支持。
|
||||
- 适用:会话存储、缓存、限流、任务队列。
|
||||
- SQLAlchemy 异步ORM
|
||||
- 优势:类型安全、异步会话、灵活的查询DSL、与FastAPI依赖注入无缝集成。
|
||||
- 适用:需要强类型与异步IO的数据库访问层。
|
||||
|
||||
### 版本与兼容性
|
||||
- Python:后端使用 3.11(容器镜像),建议本地开发与CI保持一致。
|
||||
- Node.js:前端使用 20(容器镜像),建议本地开发与CI保持一致。
|
||||
- Next.js:14.2.35,使用 App Router 与TypeScript。
|
||||
- FastAPI:≥0.109.0,配合 Uvicorn 标准变体。
|
||||
- SQLAlchemy:≥2.0,异步引擎与会话。
|
||||
- Pydantic:≥2.0,配置与数据验证。
|
||||
- Redis:7-alpine,使用默认端口6379。
|
||||
- PostgreSQL:15-alpine,使用默认端口5432。
|
||||
- Playwright:≥1.40,容器内预装Chromium与依赖。
|
||||
- APScheduler:≥3.10,异步调度器。
|
||||
|
||||
### 容器化与部署要点
|
||||
- 多阶段思路:后端基于 slim 镜像,安装系统依赖与Playwright;前端基于 alpine 镜像,最小化依赖安装。
|
||||
- 端口映射:后端8000、前端3000、数据库5432、缓存6379。
|
||||
- 健康检查:db 与 redis 提供健康检查,后端提供 /health。
|
||||
- 开发体验:前端启用 --reload(开发命令),后端使用 uvicorn --reload。
|
||||
|
||||
**章节来源**
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
|
@ -0,0 +1,455 @@
|
|||
# 核心功能
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/api/auth.py](file://backend/app/api/auth.py)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [backend/app/api/reports.py](file://backend/app/api/reports.py)
|
||||
- [backend/app/services/auth.py](file://backend/app/services/auth.py)
|
||||
- [backend/app/services/query.py](file://backend/app/services/query.py)
|
||||
- [backend/app/services/citation.py](file://backend/app/services/citation.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/platforms/base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx](file://frontend/app/(dashboard)/dashboard/page.tsx)
|
||||
- [frontend/components/charts/trend-chart.tsx](file://frontend/components/charts/trend-chart.tsx)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本文件为 GEO 平台的核心功能概览,聚焦以下关键能力:
|
||||
- 用户认证与权限管理:基于邮箱/密码注册登录、JWT 访问令牌签发与校验、会话保护接口。
|
||||
- 智能查询任务管理:查询词创建、更新、删除、分页查询;按日/周/月频率自动调度;手动触发即时查询。
|
||||
- 品牌引用检测引擎:多阶段匹配(精确/别名/模糊)、置信度评分、竞争品牌识别、上下文片段抽取。
|
||||
- 多 AI 平台数据集成:抽象适配器模式对接不同大模型平台,统一查询与结果处理。
|
||||
- 数据分析与可视化:统计指标(总查询/引用次数、引用率、平均位置)、平台对比、30 天趋势折线图。
|
||||
- 报告导出:CSV 导出引用记录,便于离线分析与归档。
|
||||
|
||||
这些功能围绕“查询—检测—统计—可视—导出”的闭环展开,既满足管理员对系统运行与任务调度的掌控,也服务于研究人员对品牌监测与趋势分析的需求。
|
||||
|
||||
## 项目结构
|
||||
后端采用 FastAPI + SQLAlchemy 异步 ORM,按领域划分 API、服务、模型与工作器;前端使用 Next.js + React,通过自定义 API 客户端与后端交互;数据库为 PostgreSQL。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "后端"
|
||||
A["FastAPI 应用<br/>app/main.py"]
|
||||
B["API 层<br/>auth/queries/citations/reports"]
|
||||
C["服务层<br/>auth/query/citation"]
|
||||
D["模型层<br/>Query/CitationRecord"]
|
||||
E["工作器<br/>Scheduler/CitationEngine/Platforms"]
|
||||
end
|
||||
subgraph "前端"
|
||||
F["仪表盘页面<br/>dashboard/page.tsx"]
|
||||
G["趋势图表组件<br/>trend-chart.tsx"]
|
||||
end
|
||||
A --> B
|
||||
B --> C
|
||||
C --> D
|
||||
C --> E
|
||||
F --> G
|
||||
F --> B
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/api/reports.py:1-47](file://backend/app/api/reports.py#L1-L47)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/services/query.py:1-130](file://backend/app/services/query.py#L1-L130)
|
||||
- [backend/app/services/citation.py:1-269](file://backend/app/services/citation.py#L1-L269)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:1-156](file://frontend/app/(dashboard)/dashboard/page.tsx#L1-L156)
|
||||
- [frontend/components/charts/trend-chart.tsx:1-60](file://frontend/components/charts/trend-chart.tsx#L1-L60)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
|
||||
## 核心组件
|
||||
- 认证与权限
|
||||
- 注册/登录:邮箱唯一性校验、密码哈希、JWT 签发;当前用户信息读取。
|
||||
- 权限边界:所有业务接口均通过当前用户上下文进行资源归属校验(查询、引用、统计、导出)。
|
||||
- 查询任务管理
|
||||
- 查询 CRUD:分页列表、创建、读取、更新、删除;创建时校验用户配额上限;更新时可重算下次执行时间。
|
||||
- 自动调度:每小时扫描到期查询,自动触发引用检测;手动触发即时查询。
|
||||
- 引用检测引擎
|
||||
- 多阶段匹配:精确命中 → 别名命中 → 模糊相似度阈值;返回是否引用、置信度、位置、上下文。
|
||||
- 竞争品牌:基于预设行业品牌库识别竞品。
|
||||
- 结果持久化:生成引用记录,包含平台、是否引用、位置、文本、竞品集合、原始响应。
|
||||
- 多 AI 平台集成
|
||||
- 适配器基类定义统一接口;内置“文心”“Kimi”适配器;未来可扩展更多平台。
|
||||
- 数据分析与可视化
|
||||
- 统计聚合:总查询/引用数、引用率、平均位置、按平台汇总、30 天趋势。
|
||||
- 前端展示:仪表盘卡片与趋势折线图组件。
|
||||
- 报告导出
|
||||
- 支持 CSV 下载,包含日期、平台、是否引用、引用位置、引用文本、竞品品牌等字段。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/services/query.py:1-130](file://backend/app/services/query.py#L1-L130)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/services/citation.py:1-269](file://backend/app/services/citation.py#L1-L269)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:1-156](file://frontend/app/(dashboard)/dashboard/page.tsx#L1-L156)
|
||||
- [frontend/components/charts/trend-chart.tsx:1-60](file://frontend/components/charts/trend-chart.tsx#L1-L60)
|
||||
- [backend/app/api/reports.py:1-47](file://backend/app/api/reports.py#L1-L47)
|
||||
|
||||
## 架构总览
|
||||
下图展示从用户请求到数据落库与可视化的整体流程,以及定时调度与即时查询的协同机制。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant U as "用户"
|
||||
participant FE as "前端"
|
||||
participant API as "后端API"
|
||||
participant S as "服务层"
|
||||
participant DB as "数据库"
|
||||
participant W as "工作器"
|
||||
participant CE as "引用检测引擎"
|
||||
participant P as "AI平台适配器"
|
||||
U->>FE : 登录/访问仪表盘
|
||||
FE->>API : 获取统计/查询/引用/导出
|
||||
API->>S : 参数校验与业务处理
|
||||
S->>DB : 读写查询/引用/任务
|
||||
Note over S,DB : 权限校验:仅允许访问本人资源
|
||||
API->>W : 触发/查询任务
|
||||
W->>CE : 执行查询
|
||||
CE->>P : 平台查询
|
||||
P-->>CE : 原始响应
|
||||
CE->>S : 写入引用记录
|
||||
S->>DB : 持久化
|
||||
DB-->>S : 成功
|
||||
S-->>API : 结果
|
||||
API-->>FE : 响应数据/流式下载
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/main.py:13-21](file://backend/app/main.py#L13-L21)
|
||||
- [backend/app/api/citations.py:59-77](file://backend/app/api/citations.py#L59-L77)
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [backend/app/services/citation.py:204-234](file://backend/app/services/citation.py#L204-L234)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 用户认证与权限管理
|
||||
- 功能要点
|
||||
- 注册:邮箱唯一性检查、密码加密存储、返回用户信息。
|
||||
- 登录:邮箱+密码验证、签发 JWT(含过期时间),返回用户与令牌。
|
||||
- 当前用户:受保护路由读取当前用户上下文。
|
||||
- 关键价值
|
||||
- 保障数据隔离:所有业务接口均以当前用户为准进行资源归属校验。
|
||||
- 易于扩展:JWT 可用于跨域与第三方集成。
|
||||
- 典型流程
|
||||
- 注册 → 登录 → 携带令牌访问受保护接口 → 获取/创建查询 → 触发查询 → 查看统计/导出报告。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant U as "用户"
|
||||
participant API as "认证API"
|
||||
participant S as "认证服务"
|
||||
participant DB as "数据库"
|
||||
U->>API : POST /api/v1/auth/register
|
||||
API->>S : 注册逻辑
|
||||
S->>DB : 检查邮箱/保存用户
|
||||
DB-->>S : 成功
|
||||
S-->>API : 用户对象
|
||||
U->>API : POST /api/v1/auth/login
|
||||
API->>S : 验证邮箱/密码
|
||||
S->>DB : 查询用户
|
||||
DB-->>S : 用户信息
|
||||
S-->>API : JWT令牌
|
||||
API-->>U : {access_token,user}
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/auth.py:13-37](file://backend/app/api/auth.py#L13-L37)
|
||||
- [backend/app/services/auth.py:37-69](file://backend/app/services/auth.py#L37-L69)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/auth.py:1-43](file://backend/app/api/auth.py#L1-L43)
|
||||
- [backend/app/services/auth.py:1-69](file://backend/app/services/auth.py#L1-L69)
|
||||
|
||||
### 智能查询任务管理
|
||||
- 功能要点
|
||||
- 查询 CRUD:分页列表、创建(校验配额与频率)、读取、更新(频率变更时重算下次执行时间)、删除。
|
||||
- 自动调度:每小时扫描到期查询,调用引用检测引擎执行;更新查询最近与下次执行时间。
|
||||
- 即时查询:手动触发,为每个配置平台创建任务并入队。
|
||||
- 核心价值
|
||||
- 减少人工干预:按计划自动抓取与检测,提升研究效率。
|
||||
- 灵活控制:支持日/周/月频率与手动触发,兼顾实时性与成本。
|
||||
- 典型场景
|
||||
- 研究员创建查询(关键词、目标品牌、平台、频率),系统按时自动执行;也可随时“立即执行”。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["创建查询"]) --> CheckLimit["检查用户配额"]
|
||||
CheckLimit --> |未超限| CalcNext["按频率计算下次执行时间"]
|
||||
CheckLimit --> |超限| Deny["拒绝创建"]
|
||||
CalcNext --> SaveQ["保存查询"]
|
||||
SaveQ --> Schedule["等待调度器扫描"]
|
||||
Schedule --> Due{"到期?"}
|
||||
Due --> |否| Wait["继续等待"]
|
||||
Due --> |是| Trigger["触发引用检测"]
|
||||
Trigger --> UpdateTime["更新最近/下次执行时间"]
|
||||
UpdateTime --> Done(["完成一轮周期"])
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/services/query.py:45-81](file://backend/app/services/query.py#L45-L81)
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/workers/citation_engine.py:291-300](file://backend/app/workers/citation_engine.py#L291-L300)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/services/query.py:1-130](file://backend/app/services/query.py#L1-L130)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
|
||||
### 品牌引用检测引擎
|
||||
- 功能要点
|
||||
- 品牌匹配器:精确命中(置信度1.0)→ 别名命中(置信度0.9)→ 模糊相似度(阈值0.4);返回是否引用、置信度、位置、上下文。
|
||||
- 竞争品牌检测:基于预设行业品牌库识别其他品牌。
|
||||
- 结果持久化:记录平台、是否引用、位置、文本、竞品、原始响应。
|
||||
- 核心价值
|
||||
- 置信度评分:帮助判断引用可靠性;模糊匹配提供兜底发现。
|
||||
- 上下文定位:快速定位品牌在原文中的首次出现段落,便于人工复核。
|
||||
- 典型场景
|
||||
- 文本中提及“XX品牌”,匹配器判定为“别名命中”,置信度0.9,并返回首次出现段落片段。
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class CitationEngine {
|
||||
+execute_query(query, db) CitationRecord[]
|
||||
+execute_single_platform(keyword, platform, target_brand, aliases) dict
|
||||
}
|
||||
class BrandMatcher {
|
||||
+match(text) dict
|
||||
}
|
||||
class CompetitorDetector {
|
||||
+detect(text, target_brand) str[]
|
||||
}
|
||||
class BasePlatformAdapter {
|
||||
<<abstract>>
|
||||
+query(keyword) str
|
||||
}
|
||||
CitationEngine --> BrandMatcher : "使用"
|
||||
CitationEngine --> CompetitorDetector : "使用"
|
||||
CitationEngine --> BasePlatformAdapter : "委托查询"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/citation_engine.py:19-120](file://backend/app/workers/citation_engine.py#L19-L120)
|
||||
- [backend/app/workers/citation_engine.py:122-146](file://backend/app/workers/citation_engine.py#L122-L146)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
|
||||
### 多 AI 平台数据集成
|
||||
- 功能要点
|
||||
- 适配器基类定义统一接口(平台名、URL、查询方法)。
|
||||
- 内置“文心”“Kimi”适配器;引擎按查询配置的平台列表逐一执行。
|
||||
- 核心价值
|
||||
- 解耦平台差异:统一调用入口,便于扩展更多平台。
|
||||
- 可观测性:每个平台独立任务状态(pending/running/success/failed)。
|
||||
- 典型场景
|
||||
- 查询配置包含“wenxin,kimi”,引擎为两者分别创建任务并行执行,最终汇总结果。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant CE as "CitationEngine"
|
||||
participant Q as "Query"
|
||||
participant T as "QueryTask"
|
||||
participant A as "平台适配器"
|
||||
CE->>Q : 读取平台列表
|
||||
loop 遍历平台
|
||||
CE->>T : 获取/创建任务
|
||||
CE->>A : 调用 query(keyword)
|
||||
A-->>CE : 返回原始响应
|
||||
CE->>CE : 品牌匹配/竞品检测
|
||||
CE->>T : 更新任务状态
|
||||
end
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [backend/app/workers/platforms/base.py:10-17](file://backend/app/workers/platforms/base.py#L10-L17)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/citation_engine.py:151-157](file://backend/app/workers/citation_engine.py#L151-L157)
|
||||
|
||||
### 数据分析与可视化
|
||||
- 功能要点
|
||||
- 统计接口:总查询/引用数、引用率、平均位置、按平台汇总、30 天趋势(按自然周聚合)。
|
||||
- 前端仪表盘:卡片展示核心指标;趋势折线图展示过去 30 天每周引用次数。
|
||||
- 核心价值
|
||||
- 快速洞察:总览指标帮助评估监测效果与变化趋势。
|
||||
- 易用性:图表直观呈现,降低阅读成本。
|
||||
- 典型场景
|
||||
- 研究人员查看“过去30天引用趋势”,发现某周显著上升,结合上下文进一步分析。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant FE as "前端"
|
||||
participant API as "统计API"
|
||||
participant S as "统计服务"
|
||||
participant DB as "数据库"
|
||||
FE->>API : GET /api/v1/citations/stats
|
||||
API->>S : 统计聚合
|
||||
S->>DB : 聚合查询/分组统计
|
||||
DB-->>S : 结果集
|
||||
S-->>API : {total,rate,avg,by_platform,trend}
|
||||
API-->>FE : 返回JSON
|
||||
FE->>FE : 渲染卡片与趋势图
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:20-155](file://frontend/app/(dashboard)/dashboard/page.tsx#L20-L155)
|
||||
- [frontend/components/charts/trend-chart.tsx:22-59](file://frontend/components/charts/trend-chart.tsx#L22-L59)
|
||||
- [backend/app/api/citations.py:49-56](file://backend/app/api/citations.py#L49-L56)
|
||||
- [backend/app/services/citation.py:76-201](file://backend/app/services/citation.py#L76-L201)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/(dashboard)/dashboard/page.tsx:1-156](file://frontend/app/(dashboard)/dashboard/page.tsx#L1-L156)
|
||||
- [frontend/components/charts/trend-chart.tsx:1-60](file://frontend/components/charts/trend-chart.tsx#L1-L60)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [backend/app/services/citation.py:1-269](file://backend/app/services/citation.py#L1-L269)
|
||||
|
||||
### 报告导出
|
||||
- 功能要点
|
||||
- 支持 CSV 导出指定查询的所有引用记录,包含日期、平台、是否引用、引用位置、引用文本、竞品品牌。
|
||||
- 流式响应,避免大文件内存压力。
|
||||
- 核心价值
|
||||
- 离线分析:便于导入 Excel/BI 工具做深度分析。
|
||||
- 合规归档:结构化导出满足审计与存档需求。
|
||||
- 典型场景
|
||||
- 研究员导出某周的全部引用记录,用于撰写专题报告。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant FE as "前端"
|
||||
participant API as "导出API"
|
||||
participant S as "导出服务"
|
||||
participant DB as "数据库"
|
||||
FE->>API : GET /api/v1/reports/export/csv?query_id=...
|
||||
API->>S : 导出CSV
|
||||
S->>DB : 查询引用记录
|
||||
DB-->>S : 记录集
|
||||
S-->>API : CSV字符串
|
||||
API-->>FE : 流式响应(Attachment)
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/api/reports.py:16-46](file://backend/app/api/reports.py#L16-L46)
|
||||
- [backend/app/services/citation.py:237-268](file://backend/app/services/citation.py#L237-L268)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/reports.py:1-47](file://backend/app/api/reports.py#L1-L47)
|
||||
- [backend/app/services/citation.py:237-268](file://backend/app/services/citation.py#L237-L268)
|
||||
|
||||
## 依赖分析
|
||||
- 组件耦合
|
||||
- API 层仅负责参数解析与鉴权,业务逻辑集中在服务层,降低控制器复杂度。
|
||||
- 引擎与平台适配器通过抽象接口解耦,便于替换与扩展。
|
||||
- 调度器与引擎通过 ORM 与任务表协作,避免直接耦合业务数据。
|
||||
- 外部依赖
|
||||
- FastAPI/SQLAlchemy:Web 框架与 ORM。
|
||||
- APScheduler:异步定时任务调度。
|
||||
- Recharts:前端图表渲染。
|
||||
- 潜在风险
|
||||
- 平台适配器异常需隔离,避免影响其他平台任务。
|
||||
- 大量并发查询可能带来数据库与外部平台压力,建议限流与重试策略。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
API["API层"] --> SVC["服务层"]
|
||||
SVC --> MODEL["模型层"]
|
||||
SVC --> WORKER["工作器"]
|
||||
WORKER --> ADAPTER["平台适配器"]
|
||||
FE["前端"] --> API
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/main.py:38-42](file://backend/app/main.py#L38-L42)
|
||||
- [backend/app/workers/citation_engine.py:151-157](file://backend/app/workers/citation_engine.py#L151-L157)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
|
||||
## 性能考虑
|
||||
- 数据库
|
||||
- 查询索引:查询与引用记录表均建立常用过滤字段索引,减少扫描开销。
|
||||
- 分页与聚合:统计接口使用分组与聚合,避免一次性拉取全量数据。
|
||||
- 引擎与平台
|
||||
- 并行执行:同一查询的不同平台可并行处理,缩短总耗时。
|
||||
- 错误隔离:单平台失败不影响其他平台,保证整体可用性。
|
||||
- 前端
|
||||
- 图表懒加载与响应式容器,提升大屏体验。
|
||||
- 导出采用流式响应,避免内存峰值。
|
||||
|
||||
[本节为通用指导,无需具体文件分析]
|
||||
|
||||
## 故障排查指南
|
||||
- 认证问题
|
||||
- 注册失败:邮箱已被注册;检查重复提交或换用其他邮箱。
|
||||
- 登录失败:邮箱或密码错误;确认凭据正确与网络可达。
|
||||
- 查询任务
|
||||
- 创建被拒:超出配额;联系管理员提升限额或清理历史查询。
|
||||
- 无法执行:查询状态非“active”或未配置平台;检查状态与平台列表。
|
||||
- 即时查询无响应:平台适配器异常或网络超时;查看任务状态与错误信息。
|
||||
- 统计与导出
|
||||
- 统计为空:可能因筛选条件导致无数据;尝试放宽时间范围或移除查询筛选。
|
||||
- 导出失败:查询不存在或无权限;确认 query_id 与登录态。
|
||||
- 调度器
|
||||
- 未触发:检查调度器是否启动、时区设置、下次执行时间是否已到达。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/services/auth.py:37-69](file://backend/app/services/auth.py#L37-L69)
|
||||
- [backend/app/services/query.py:45-81](file://backend/app/services/query.py#L45-L81)
|
||||
- [backend/app/services/citation.py:204-234](file://backend/app/services/citation.py#L204-L234)
|
||||
- [backend/app/api/reports.py:16-46](file://backend/app/api/reports.py#L16-L46)
|
||||
- [backend/app/workers/scheduler.py:30-40](file://backend/app/workers/scheduler.py#L30-L40)
|
||||
|
||||
## 结论
|
||||
GEO 平台以“查询—检测—统计—可视—导出”为主线,构建了从自动化采集到深度分析的完整链路。通过严格的权限控制、可扩展的平台适配器、稳健的定时调度与清晰的可视化输出,既能满足管理员对系统运行的掌控,也能为研究人员提供高效、可靠的品牌监测工具。建议后续在平台适配器层面引入重试与熔断、在数据库侧增加慢查询监控与索引优化,持续提升稳定性与性能。
|
||||
|
||||
[本节为总结性内容,无需具体文件分析]
|
||||
|
||||
## 附录
|
||||
- 典型使用流程(管理员)
|
||||
- 新建用户/分配配额 → 配置平台密钥 → 监控调度器运行 → 查看任务状态与错误日志 → 调整频率策略。
|
||||
- 典型使用流程(研究人员)
|
||||
- 登录 → 创建查询(关键词/目标品牌/平台/频率) → 查看仪表盘趋势 → 导出报告 → 深度分析与汇报。
|
||||
- 关键接口路径参考
|
||||
- 认证:POST /api/v1/auth/register, POST /api/v1/auth/login, GET /api/v1/auth/me
|
||||
- 查询:GET/POST/GET/PATCH/DELETE /api/v1/queries
|
||||
- 引用:GET /api/v1/citations, GET /api/v1/citations/stats, POST /api/v1/queries/{query_id}/run-now
|
||||
- 报告:GET /api/v1/reports/export/csv
|
||||
|
||||
[本节为概览性内容,无需具体文件分析]
|
||||
|
|
@ -0,0 +1,366 @@
|
|||
# 系统架构
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [backend/Dockerfile](file://backend/Dockerfile)
|
||||
- [frontend/Dockerfile](file://frontend/Dockerfile)
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/services/query.py](file://backend/app/services/query.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/platforms/base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [backend/app/workers/platforms/kimi.py](file://backend/app/workers/platforms/kimi.py)
|
||||
- [backend/app/workers/platforms/wenxin.py](file://backend/app/workers/platforms/wenxin.py)
|
||||
- [frontend/lib/api.ts](file://frontend/lib/api.ts)
|
||||
- [frontend/components/providers.tsx](file://frontend/components/providers.tsx)
|
||||
- [frontend/app/layout.tsx](file://frontend/app/layout.tsx)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [简介](#简介)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 简介
|
||||
本系统是一个学术查询与引用管理平台,采用前后端分离架构:前端使用 Next.js 构建用户界面,后端基于 FastAPI 提供 REST API,并通过 SQLAlchemy ORM 访问 PostgreSQL 数据库;AI 平台查询由后端工作器通过适配器模式对接不同平台(Kimi、文心一言),并通过 Redis 缓存与队列进行异步调度。系统通过 Docker Compose 将数据库、缓存、后端与前端容器化编排,便于开发与部署。
|
||||
|
||||
## 项目结构
|
||||
系统分为四层:
|
||||
- 表现层(前端 Next.js):负责用户界面渲染与 API 调用
|
||||
- 业务逻辑层(后端 FastAPI):提供 REST 接口、业务服务与调度
|
||||
- 数据访问层(SQLAlchemy ORM):模型定义与数据库交互
|
||||
- 基础设施层(Docker 容器):PostgreSQL、Redis、后端与前端容器
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "表现层前端 Next.js"
|
||||
FE_APP["Next.js 应用<br/>路由与页面"]
|
||||
FE_LIB_API["API 客户端<br/>frontend/lib/api.ts"]
|
||||
FE_PROVIDERS["会话提供者<br/>frontend/components/providers.tsx"]
|
||||
end
|
||||
subgraph "业务逻辑层后端 FastAPI"
|
||||
BE_MAIN["应用入口<br/>backend/app/main.py"]
|
||||
BE_ROUTERS["API 路由<br/>backend/app/api/*"]
|
||||
BE_SERVICES["业务服务<br/>backend/app/services/*"]
|
||||
BE_WORKERS["工作器与适配器<br/>backend/app/workers/*"]
|
||||
end
|
||||
subgraph "数据访问层SQLAlchemy ORM"
|
||||
BE_MODELS["ORM 模型<br/>backend/app/models/*"]
|
||||
end
|
||||
subgraph "基础设施层Docker 容器"
|
||||
DC_DB["PostgreSQL 容器"]
|
||||
DC_REDIS["Redis 容器"]
|
||||
DC_BE["后端容器"]
|
||||
DC_FE["前端容器"]
|
||||
end
|
||||
FE_APP --> FE_LIB_API
|
||||
FE_LIB_API --> BE_MAIN
|
||||
BE_MAIN --> BE_ROUTERS
|
||||
BE_ROUTERS --> BE_SERVICES
|
||||
BE_SERVICES --> BE_MODELS
|
||||
BE_WORKERS --> BE_MODELS
|
||||
BE_MAIN --> DC_BE
|
||||
BE_MODELS --> DC_DB
|
||||
BE_WORKERS --> DC_REDIS
|
||||
FE_APP --> DC_FE
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/services/query.py:1-130](file://backend/app/services/query.py#L1-L130)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [frontend/components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [frontend/app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
|
||||
## 核心组件
|
||||
- 前端 Next.js:提供登录注册、查询管理、引用统计与报告导出等功能页面,通过统一 API 客户端发起请求。
|
||||
- 后端 FastAPI:提供认证、查询词、引用数据、报告导出等接口;启动时初始化调度器并挂载 CORS 中间件。
|
||||
- 工作器与适配器:以适配器模式封装不同 AI 平台(Kimi、文心一言)的查询行为,统一对外接口。
|
||||
- ORM 模型:定义查询词、引用记录、任务等实体及索引,支撑业务数据持久化。
|
||||
- 基础设施:PostgreSQL 存储业务数据,Redis 支持缓存与队列,Docker Compose 统一编排。
|
||||
|
||||
章节来源
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
## 架构总览
|
||||
系统采用分层解耦与容器化部署,前端通过 REST API 与后端交互,后端通过工作器调用 AI 平台,数据库与缓存分别承担数据持久化与临时状态管理。下图展示从用户请求到 AI 平台查询再到结果返回的完整链路。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant U as "用户"
|
||||
participant FE as "前端 Next.js"
|
||||
participant API as "后端 API 路由"
|
||||
participant SVC as "业务服务"
|
||||
participant ENG as "引用检测引擎"
|
||||
participant ADP as "AI 平台适配器"
|
||||
participant DB as "PostgreSQL"
|
||||
U->>FE : "提交查询/触发查询"
|
||||
FE->>API : "HTTP 请求带鉴权"
|
||||
API->>SVC : "调用业务方法"
|
||||
SVC->>ENG : "执行引用检测"
|
||||
loop "遍历平台"
|
||||
ENG->>ADP : "query(keyword)"
|
||||
ADP-->>ENG : "返回平台响应文本"
|
||||
end
|
||||
ENG->>DB : "写入引用记录/更新任务状态"
|
||||
DB-->>ENG : "确认写入"
|
||||
ENG-->>SVC : "返回检测结果"
|
||||
SVC-->>API : "组装响应"
|
||||
API-->>FE : "返回结果"
|
||||
FE-->>U : "展示引用统计/报告"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/services/query.py:1-130](file://backend/app/services/query.py#L1-L130)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 前端组件分析
|
||||
- API 客户端:集中封装鉴权头、错误处理与基础路径,统一暴露认证、查询、引用、报告等接口。
|
||||
- 会话提供者:为 NextAuth 集成提供会话上下文,保障页面与 API 调用的鉴权一致性。
|
||||
- 布局与字体:全局样式与字体配置,保证一致的视觉体验。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["页面发起请求"]) --> BuildHeaders["构建请求头<br/>含 Authorization"]
|
||||
BuildHeaders --> FetchAPI["调用 fetchWithAuth"]
|
||||
FetchAPI --> RespOK{"响应成功?"}
|
||||
RespOK --> |是| ParseJSON["解析 JSON 响应"]
|
||||
RespOK --> |否| HandleError["抛出错误包含 HTTP 状态"]
|
||||
ParseJSON --> ReturnData["返回数据给页面"]
|
||||
HandleError --> ThrowErr["上抛错误供 UI 处理"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
章节来源
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [frontend/components/providers.tsx:1-9](file://frontend/components/providers.tsx#L1-L9)
|
||||
- [frontend/app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
|
||||
### 后端组件分析
|
||||
- 应用入口:注册路由、启用 CORS、启动/关闭调度器生命周期钩子。
|
||||
- API 路由:提供认证、查询词、引用数据、报告导出等接口,统一前缀与标签。
|
||||
- 业务服务:封装查询词的增删改查与配额校验、频率与下次查询时间计算。
|
||||
- ORM 模型:定义查询词实体及其索引,关联用户、引用记录与任务。
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class Query {
|
||||
+UUID id
|
||||
+UUID user_id
|
||||
+string keyword
|
||||
+string target_brand
|
||||
+list brand_aliases
|
||||
+list platforms
|
||||
+string frequency
|
||||
+string status
|
||||
+datetime last_queried_at
|
||||
+datetime next_query_at
|
||||
+datetime created_at
|
||||
+datetime updated_at
|
||||
}
|
||||
class User {
|
||||
+UUID id
|
||||
+string name
|
||||
+string email
|
||||
+int max_queries
|
||||
}
|
||||
class CitationRecord {
|
||||
+UUID id
|
||||
+UUID query_id
|
||||
+string platform
|
||||
+bool cited
|
||||
+int citation_position
|
||||
+string citation_text
|
||||
+list competitor_brands
|
||||
+string raw_response
|
||||
}
|
||||
class QueryTask {
|
||||
+UUID id
|
||||
+UUID query_id
|
||||
+string platform
|
||||
+string status
|
||||
+datetime started_at
|
||||
+datetime completed_at
|
||||
+string error_message
|
||||
}
|
||||
User "1" --> "many" Query : "拥有"
|
||||
Query "1" --> "many" CitationRecord : "生成"
|
||||
Query "1" --> "many" QueryTask : "驱动"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
|
||||
章节来源
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/services/query.py:1-130](file://backend/app/services/query.py#L1-L130)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
|
||||
### 工作器与适配器分析
|
||||
- 引用检测引擎:根据查询词与目标品牌,遍历平台列表,调用对应适配器获取响应,执行品牌匹配与竞争品牌检测,生成引用记录并更新任务状态与查询时间。
|
||||
- 适配器基类:定义平台名称、URL 与抽象查询方法,统一资源清理接口。
|
||||
- Kimi 适配器:通过 Playwright 自动化模拟用户输入与提交,等待回复稳定后提取文本。
|
||||
- 文心一言适配器:同 Kimi,但针对文心一言页面结构进行适配。
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class BasePlatformAdapter {
|
||||
<<abstract>>
|
||||
+string platform_name
|
||||
+string platform_url
|
||||
+query(keyword) str
|
||||
+close()
|
||||
}
|
||||
class KimiAdapter {
|
||||
+platform_name = "kimi"
|
||||
+platform_url = "https : //kimi.moonshot.cn"
|
||||
+query(keyword) str
|
||||
+close()
|
||||
}
|
||||
class WenxinAdapter {
|
||||
+platform_name = "wenxin"
|
||||
+platform_url = "https : //yiyan.baidu.com"
|
||||
+query(keyword) str
|
||||
+close()
|
||||
}
|
||||
class CitationEngine {
|
||||
+execute_query(query, db) CitationRecord[]
|
||||
+execute_single_platform(keyword, platform, target_brand, brand_aliases) dict
|
||||
+close()
|
||||
}
|
||||
BasePlatformAdapter <|-- KimiAdapter
|
||||
BasePlatformAdapter <|-- WenxinAdapter
|
||||
CitationEngine --> BasePlatformAdapter : "使用"
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
|
||||
章节来源
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
|
||||
### 数据流与处理流程
|
||||
- 用户在前端提交查询或手动触发查询。
|
||||
- 前端通过 API 客户端调用后端接口,携带鉴权信息。
|
||||
- 后端路由将请求委派给业务服务,服务层执行权限与配额检查、频率计算等逻辑。
|
||||
- 引用检测引擎根据平台列表逐个调用适配器,等待回复稳定后进行品牌匹配与竞争品牌检测。
|
||||
- 结果写入数据库并更新任务状态,前端轮询或接收通知后刷新页面。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A["用户提交查询"] --> B["前端 API 客户端"]
|
||||
B --> C["后端 API 路由"]
|
||||
C --> D["业务服务权限/配额/频率"]
|
||||
D --> E["引用检测引擎"]
|
||||
E --> F["Kimi 适配器"]
|
||||
E --> G["文心一言适配器"]
|
||||
F --> H["品牌匹配/竞争品牌检测"]
|
||||
G --> H
|
||||
H --> I["写入引用记录/更新任务状态"]
|
||||
I --> J["返回结果给前端"]
|
||||
J --> K["展示统计/报告"]
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/services/query.py:1-130](file://backend/app/services/query.py#L1-L130)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
|
||||
## 依赖分析
|
||||
- 组件内聚与耦合:后端 API 路由仅负责参数解析与委派,业务服务与模型解耦良好;工作器通过适配器模式与具体平台解耦。
|
||||
- 外部依赖:后端依赖 PostgreSQL 与 Redis;前端依赖 Next.js 生态与 NextAuth;工作器依赖 Playwright 浏览器自动化。
|
||||
- 容器编排:Compose 文件定义数据库、缓存、后端与前端服务,设置健康检查与端口映射,后端依赖数据库与缓存健康。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
FE["前端"] --> BE["后端"]
|
||||
BE --> DB["PostgreSQL"]
|
||||
BE --> RD["Redis"]
|
||||
BE --> PW["Playwright 浏览器"]
|
||||
DC["Docker Compose"] --> DB
|
||||
DC --> RD
|
||||
DC --> BE
|
||||
DC --> FE
|
||||
```
|
||||
|
||||
图表来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
|
||||
## 性能考虑
|
||||
- 异步与并发:后端使用异步数据库会话与异步适配器调用,提升 I/O 密集场景下的吞吐。
|
||||
- 重试与指数退避:适配器对平台请求进行多轮重试与指数退避,降低偶发网络波动影响。
|
||||
- 回复稳定性检测:等待 AI 回复文本稳定后再提取,减少因动态渲染导致的截断误差。
|
||||
- 索引优化:查询词模型建立多字段索引,加速分页与过滤查询。
|
||||
- 缓存与队列:建议结合 Redis 实现任务队列与结果缓存,进一步降低重复查询成本。
|
||||
|
||||
## 故障排查指南
|
||||
- 健康检查:Compose 为数据库与缓存配置健康检查,若服务未就绪,前端将无法连接后端。
|
||||
- Playwright 依赖:适配器需使用 Playwright 安装 Chromium,若启动失败请先安装浏览器依赖。
|
||||
- CORS 限制:后端仅允许本地前端域名访问,跨域请求需调整中间件配置。
|
||||
- 错误处理:前端客户端对非 2xx 响应抛出错误,后端路由对未找到与权限问题返回明确状态码。
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/workers/platforms/kimi.py:1-206](file://backend/app/workers/platforms/kimi.py#L1-L206)
|
||||
- [backend/app/workers/platforms/wenxin.py:1-205](file://backend/app/workers/platforms/wenxin.py#L1-L205)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
## 结论
|
||||
该系统通过清晰的分层架构与容器化部署,实现了从前端到后端、再到 AI 平台与数据库的完整闭环。适配器模式有效隔离平台差异,业务服务与 ORM 模型保障可维护性与扩展性。建议后续引入任务队列与缓存策略以进一步提升性能与用户体验。
|
||||
|
||||
## 附录
|
||||
- 开发环境启动:使用 Compose 启动数据库、缓存、后端与前端服务,分别暴露 5432、6379、8000、3000 端口。
|
||||
- 后端镜像:基于 Python slim 镜像,预装 Playwright 与系统依赖,暴露 8000 端口。
|
||||
- 前端镜像:基于 Node Alpine 镜像,安装依赖后暴露 3000 端口。
|
||||
|
||||
章节来源
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
- [backend/Dockerfile:1-41](file://backend/Dockerfile#L1-L41)
|
||||
- [frontend/Dockerfile:1-15](file://frontend/Dockerfile#L1-L15)
|
||||
|
|
@ -0,0 +1,348 @@
|
|||
# 项目介绍
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [backend/app/models/user.py](file://backend/app/models/user.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/api/citations.py](file://backend/app/api/citations.py)
|
||||
- [frontend/package.json](file://frontend/package.json)
|
||||
- [frontend/lib/api.ts](file://frontend/lib/api.ts)
|
||||
- [frontend/components/layout/header.tsx](file://frontend/components/layout/header.tsx)
|
||||
- [frontend/app/layout.tsx](file://frontend/app/layout.tsx)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [引言](#引言)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 引言
|
||||
GEO平台是一个面向智能学术查询与引用管理的现代化系统,旨在帮助研究人员、市场分析师与品牌监测团队高效地追踪特定品牌在多平台上的提及情况。平台通过“智能引用检测”“多AI平台集成”“定时查询调度”等核心能力,解决传统学术研究中品牌引用检测的痛点与多平台数据整合难题,提供统一的数据采集、分析与可视化入口。
|
||||
|
||||
- 核心使命:以自动化与智能化手段降低品牌监测与学术研究的人工成本,提升信息获取与洞察效率。
|
||||
- 价值主张:统一平台、可扩展的AI适配、稳定的定时调度、清晰的统计与导出能力,满足不同规模用户的使用需求。
|
||||
|
||||
## 项目结构
|
||||
项目采用前后端分离架构,后端基于FastAPI构建REST服务,数据库与缓存分别使用PostgreSQL与Redis;前端基于Next.js,提供用户认证、查询管理、引用数据展示与报告导出等功能。Docker Compose用于本地开发环境的一键编排。
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "前端(Frontend)"
|
||||
FE_APP["Next.js 应用"]
|
||||
FE_AUTH["认证模块<br/>登录/注册/会话"]
|
||||
FE_UI["UI组件与布局<br/>头部/侧边栏/图表"]
|
||||
FE_API["API封装<br/>统一请求与鉴权"]
|
||||
end
|
||||
subgraph "后端(Backend)"
|
||||
BE_MAIN["FastAPI 应用<br/>路由与生命周期"]
|
||||
BE_SCHED["定时调度器<br/>APScheduler"]
|
||||
BE_ENGINE["引用检测引擎<br/>品牌匹配/竞争品牌检测"]
|
||||
BE_MODELS["数据模型<br/>用户/查询/引用记录"]
|
||||
BE_API["API接口<br/>认证/查询/引用/报告"]
|
||||
end
|
||||
subgraph "基础设施"
|
||||
DB["PostgreSQL 数据库"]
|
||||
REDIS["Redis 缓存/任务队列"]
|
||||
end
|
||||
FE_APP --> FE_API
|
||||
FE_API --> BE_MAIN
|
||||
BE_MAIN --> BE_API
|
||||
BE_MAIN --> BE_SCHED
|
||||
BE_SCHED --> BE_ENGINE
|
||||
BE_ENGINE --> DB
|
||||
BE_MAIN --> DB
|
||||
BE_MAIN --> REDIS
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [backend/app/main.py:24-47](file://backend/app/main.py#L24-L47)
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
## 核心组件
|
||||
- 引用检测引擎:负责对指定关键词在多个AI平台进行检索,并执行品牌引用检测与竞争品牌识别,生成标准化的引用记录。
|
||||
- 定时调度器:基于APScheduler周期性扫描数据库中到期的查询任务,自动触发引用检测流程。
|
||||
- 数据模型:围绕用户、查询、引用记录与任务状态建立清晰的关系模型,支撑查询计划、历史记录与统计分析。
|
||||
- API层:提供认证、查询管理、引用数据查询与统计、即时执行、报告导出等接口。
|
||||
- 前端应用:提供用户认证、查询配置、引用数据浏览、趋势与平台分布图表以及CSV导出能力。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
|
||||
## 架构总览
|
||||
下图展示了从用户操作到数据落库的关键交互路径,包括认证、查询创建、定时调度、平台适配与结果存储。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant U as "用户"
|
||||
participant FE as "前端应用"
|
||||
participant API as "后端API"
|
||||
participant S as "调度器"
|
||||
participant E as "引用检测引擎"
|
||||
participant P as "AI平台适配器"
|
||||
participant DB as "数据库"
|
||||
U->>FE : 登录/创建查询
|
||||
FE->>API : 发起请求(带鉴权)
|
||||
API->>DB : 写入查询/更新状态
|
||||
API-->>FE : 返回响应
|
||||
S->>DB : 查询到期的查询任务
|
||||
S->>E : 触发执行
|
||||
E->>P : 平台查询(keyword)
|
||||
P-->>E : 返回原始回复
|
||||
E->>E : 品牌匹配/竞争品牌检测
|
||||
E->>DB : 写入引用记录
|
||||
E-->>S : 完成并更新下次执行时间
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [backend/app/workers/scheduler.py:51-84](file://backend/app/workers/scheduler.py#L51-L84)
|
||||
- [backend/app/workers/citation_engine.py:159-234](file://backend/app/workers/citation_engine.py#L159-L234)
|
||||
- [backend/app/models/query.py:25-31](file://backend/app/models/query.py#L25-L31)
|
||||
- [backend/app/models/citation_record.py:19-33](file://backend/app/models/citation_record.py#L19-L33)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 引擎与平台适配
|
||||
- 品牌匹配器:支持精确、别名与模糊匹配,返回是否引用、置信度、首次出现段落位置及上下文片段。
|
||||
- 竞争品牌检测器:基于预定义行业品牌清单,识别文本中除目标品牌外的其他品牌。
|
||||
- 引用检测引擎:封装平台适配器调用、品牌匹配与竞争品牌检测,生成标准化引用记录并更新查询的下次执行时间。
|
||||
- 平台适配器:当前包含“文心”“Kimi”两个适配器,未来可扩展更多平台。
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class CitationEngine {
|
||||
+execute_query(query, db) list
|
||||
+execute_single_platform(keyword, platform, target_brand, aliases) dict
|
||||
-_get_or_create_task(db, query_id, platform) QueryTask
|
||||
-_calculate_next_query_at(freq) datetime
|
||||
+close() void
|
||||
}
|
||||
class BrandMatcher {
|
||||
+match(text) dict
|
||||
-_extract_candidates(text) list
|
||||
-_extract_position_and_context(text, keyword) tuple
|
||||
}
|
||||
class CompetitorDetector {
|
||||
+detect(text, target_brand) list
|
||||
}
|
||||
class QueryScheduler {
|
||||
+start() void
|
||||
+shutdown() void
|
||||
+check_and_execute_queries() void
|
||||
}
|
||||
CitationEngine --> BrandMatcher : "使用"
|
||||
CitationEngine --> CompetitorDetector : "使用"
|
||||
QueryScheduler --> CitationEngine : "驱动执行"
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [backend/app/workers/citation_engine.py:19-120](file://backend/app/workers/citation_engine.py#L19-L120)
|
||||
- [backend/app/workers/citation_engine.py:122-146](file://backend/app/workers/citation_engine.py#L122-L146)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
- [backend/app/workers/scheduler.py:25-95](file://backend/app/workers/scheduler.py#L25-L95)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/citation_engine.py:19-120](file://backend/app/workers/citation_engine.py#L19-L120)
|
||||
- [backend/app/workers/citation_engine.py:122-146](file://backend/app/workers/citation_engine.py#L122-L146)
|
||||
- [backend/app/workers/citation_engine.py:148-309](file://backend/app/workers/citation_engine.py#L148-L309)
|
||||
|
||||
### 定时调度流程
|
||||
- 启动阶段:应用生命周期内启动调度器,设置每小时检查一次。
|
||||
- 执行阶段:查询状态为“active”且下次执行时间已到达的任务,逐个触发引用检测。
|
||||
- 错误处理:单个任务失败不影响整体调度,异常被记录并继续处理其他任务。
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["启动调度器"]) --> AddJob["添加定时任务(每小时)"]
|
||||
AddJob --> Wait["等待触发"]
|
||||
Wait --> Trigger{"到达触发时间?"}
|
||||
Trigger --> |否| Wait
|
||||
Trigger --> |是| QueryDue["查询到期的查询任务"]
|
||||
QueryDue --> HasTasks{"是否有待执行任务?"}
|
||||
HasTasks --> |否| Wait
|
||||
HasTasks --> |是| ExecOne["执行单个查询任务"]
|
||||
ExecOne --> UpdateTime["更新下次执行时间"]
|
||||
UpdateTime --> Wait
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [backend/app/workers/scheduler.py:30-90](file://backend/app/workers/scheduler.py#L30-L90)
|
||||
- [backend/app/models/query.py:25-31](file://backend/app/models/query.py#L25-L31)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
|
||||
### 数据模型与关系
|
||||
- 用户(User):用户基本信息、订阅计划与配额。
|
||||
- 查询(Query):关键词、目标品牌、别名、平台集合、频率、状态与时间戳。
|
||||
- 引用记录(CitationRecord):平台来源、是否引用、引用位置、上下文、竞争品牌列表与原始回复。
|
||||
- 关系:用户与查询一对多;查询与引用记录、查询任务一对多。
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
USERS {
|
||||
uuid id PK
|
||||
string email UK
|
||||
string name
|
||||
string plan
|
||||
int max_queries
|
||||
boolean is_active
|
||||
}
|
||||
QUERIES {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
timestamp last_queried_at
|
||||
timestamp next_query_at
|
||||
}
|
||||
CITATION_RECORDS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
boolean cited
|
||||
int citation_position
|
||||
text citation_text
|
||||
jsonb competitor_brands
|
||||
text raw_response
|
||||
}
|
||||
USERS ||--o{ QUERIES : "拥有"
|
||||
QUERIES ||--o{ CITATION_RECORDS : "产生"
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/models/user.py:11-41](file://backend/app/models/user.py#L11-L41)
|
||||
- [backend/app/models/query.py:11-55](file://backend/app/models/query.py#L11-L55)
|
||||
- [backend/app/models/citation_record.py:11-42](file://backend/app/models/citation_record.py#L11-L42)
|
||||
|
||||
### API与前端交互
|
||||
- 后端API:提供认证、查询管理、引用数据分页与统计、即时执行、报告导出等接口。
|
||||
- 前端封装:统一的API模块负责鉴权头注入与错误处理,页面通过hooks与组件化UI实现交互。
|
||||
- 认证流程:登录成功后保存会话,后续请求携带令牌访问受保护资源。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as "客户端"
|
||||
participant A as "认证API"
|
||||
participant S as "会话状态"
|
||||
participant D as "受保护API"
|
||||
C->>A : POST /api/v1/auth/login
|
||||
A-->>C : 返回令牌
|
||||
C->>S : 保存令牌
|
||||
C->>D : GET /api/v1/queries (携带Authorization)
|
||||
D-->>C : 返回数据
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [backend/app/api/citations.py:25-78](file://backend/app/api/citations.py#L25-L78)
|
||||
- [frontend/lib/api.ts:23-57](file://frontend/lib/api.ts#L23-L57)
|
||||
- [frontend/components/layout/header.tsx:7-28](file://frontend/components/layout/header.tsx#L7-L28)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/citations.py:1-78](file://backend/app/api/citations.py#L1-L78)
|
||||
- [frontend/lib/api.ts:1-58](file://frontend/lib/api.ts#L1-L58)
|
||||
- [frontend/components/layout/header.tsx:1-30](file://frontend/components/layout/header.tsx#L1-L30)
|
||||
|
||||
## 依赖分析
|
||||
- 运行时依赖:FastAPI、SQLAlchemy、Pydantic、Redis、APScheduler、Playwright、HTTPX、Python-Jose、Passlib等。
|
||||
- 前端依赖:Next.js、Next-Auth、Recharts、Radix UI等。
|
||||
- 基础设施:PostgreSQL与Redis容器通过Docker Compose编排,后端服务暴露8000端口,前端服务暴露3000端口。
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
subgraph "后端"
|
||||
F["FastAPI"]
|
||||
S["SQLAlchemy"]
|
||||
P["Pydantic"]
|
||||
R["Redis"]
|
||||
A["APScheduler"]
|
||||
PW["Playwright"]
|
||||
H["HTTPX"]
|
||||
end
|
||||
subgraph "前端"
|
||||
N["Next.js"]
|
||||
NA["Next-Auth"]
|
||||
RC["Recharts"]
|
||||
RU["Radix UI"]
|
||||
end
|
||||
F --> S
|
||||
F --> P
|
||||
F --> R
|
||||
F --> A
|
||||
F --> PW
|
||||
F --> H
|
||||
N --> NA
|
||||
N --> RC
|
||||
N --> RU
|
||||
```
|
||||
|
||||
**图示来源**
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:11-28](file://frontend/package.json#L11-L28)
|
||||
|
||||
**章节来源**
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
- [docker-compose.yml:36-66](file://docker-compose.yml#L36-L66)
|
||||
|
||||
## 性能考虑
|
||||
- 异步与并发:后端使用异步数据库会话与异步调度器,减少阻塞;平台适配器在执行查询时应避免同步阻塞操作。
|
||||
- 缓存策略:利用Redis缓存短期高频数据与任务状态,降低数据库压力。
|
||||
- 分页与索引:引用数据查询支持分页与多条件过滤;数据库表建立必要索引以优化查询性能。
|
||||
- 调度粒度:默认每小时检查一次到期任务,可根据业务量调整间隔。
|
||||
- 前端渲染:图表组件按需加载,避免一次性渲染大量数据导致卡顿。
|
||||
|
||||
## 故障排查指南
|
||||
- 健康检查:后端提供健康检查端点,可用于容器编排下的存活探针。
|
||||
- 日志定位:调度器与引擎均输出详细日志,便于定位任务失败原因。
|
||||
- 数据一致性:若发现引用记录缺失,检查任务状态与错误信息,确认平台适配器可用性与网络连通性。
|
||||
- 认证问题:确认前端令牌是否正确注入,后端CORS配置是否允许前端域名访问。
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:45-47](file://backend/app/main.py#L45-L47)
|
||||
- [backend/app/workers/scheduler.py:76-84](file://backend/app/workers/scheduler.py#L76-L84)
|
||||
- [backend/app/workers/citation_engine.py:211-227](file://backend/app/workers/citation_engine.py#L211-L227)
|
||||
|
||||
## 结论
|
||||
GEO平台通过“智能引用检测+多平台集成+定时调度”的组合拳,有效解决了品牌监测与学术研究中的重复劳动与数据割裂问题。其模块化设计与前后端分离架构便于扩展与维护,适合研究人员、市场分析师与品牌监测团队在不同场景下灵活使用。
|
||||
|
||||
## 附录
|
||||
- 快速开始:使用Docker Compose一键启动数据库、缓存、后端与前端服务,访问 http://localhost:3000 进行体验。
|
||||
- 开发建议:新增平台适配器时遵循现有适配器接口规范;为关键流程补充单元测试与集成测试;持续优化数据库索引与查询性能。
|
||||
|
|
@ -0,0 +1,408 @@
|
|||
# 项目概述
|
||||
|
||||
<cite>
|
||||
**本文档引用的文件**
|
||||
- [backend/app/main.py](file://backend/app/main.py)
|
||||
- [backend/app/config.py](file://backend/app/config.py)
|
||||
- [backend/app/workers/scheduler.py](file://backend/app/workers/scheduler.py)
|
||||
- [backend/app/workers/citation_engine.py](file://backend/app/workers/citation_engine.py)
|
||||
- [backend/app/workers/platforms/base.py](file://backend/app/workers/platforms/base.py)
|
||||
- [backend/app/api/queries.py](file://backend/app/api/queries.py)
|
||||
- [backend/app/models/query.py](file://backend/app/models/query.py)
|
||||
- [backend/app/models/citation_record.py](file://backend/app/models/citation_record.py)
|
||||
- [frontend/app/layout.tsx](file://frontend/app/layout.tsx)
|
||||
- [frontend/package.json](file://frontend/package.json)
|
||||
- [frontend/components/charts/platform-chart.tsx](file://frontend/components/charts/platform-chart.tsx)
|
||||
- [frontend/lib/platforms.ts](file://frontend/lib/platforms.ts)
|
||||
- [docker-compose.yml](file://docker-compose.yml)
|
||||
- [backend/requirements.txt](file://backend/requirements.txt)
|
||||
</cite>
|
||||
|
||||
## 目录
|
||||
1. [引言](#引言)
|
||||
2. [项目结构](#项目结构)
|
||||
3. [核心组件](#核心组件)
|
||||
4. [架构总览](#架构总览)
|
||||
5. [详细组件分析](#详细组件分析)
|
||||
6. [依赖分析](#依赖分析)
|
||||
7. [性能考虑](#性能考虑)
|
||||
8. [故障排查指南](#故障排查指南)
|
||||
9. [结论](#结论)
|
||||
10. [附录](#附录)
|
||||
|
||||
## 引言
|
||||
GEO平台是一个面向智能学术查询与引用管理的系统,旨在帮助用户通过多AI平台进行关键词检索,并自动识别目标品牌在检索结果中的被引用情况,同时提供可视化报表与定时任务调度能力。平台采用前后端分离架构,后端基于FastAPI构建REST服务,前端基于Next.js提供交互界面;数据库采用PostgreSQL,缓存与任务调度由Redis与APScheduler支撑。
|
||||
|
||||
本项目的核心目标是:
|
||||
- 提供统一的查询入口,整合多家AI平台能力
|
||||
- 自动化定时查询与引用检测,降低人工成本
|
||||
- 以图表形式直观展示各平台引用率趋势
|
||||
- 保障可扩展性与可维护性,便于后续接入更多平台与功能
|
||||
|
||||
## 项目结构
|
||||
项目采用前后端分离与容器化部署策略:
|
||||
- 后端:FastAPI应用,负责API路由、业务逻辑、定时任务调度与数据库交互
|
||||
- 前端:Next.js应用,负责用户界面、图表展示与API调用
|
||||
- 数据层:PostgreSQL存储查询与引用记录;Redis用于任务调度与缓存
|
||||
- 容器编排:Docker Compose统一管理数据库、缓存、后端与前端服务
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "前端"
|
||||
FE_APP["Next.js 应用<br/>页面与图表组件"]
|
||||
end
|
||||
subgraph "后端"
|
||||
BE_API["FastAPI 应用<br/>路由与服务"]
|
||||
BE_SCHED["APScheduler 调度器<br/>定时任务"]
|
||||
BE_ENGINE["引用检测引擎<br/>品牌匹配与竞争品牌检测"]
|
||||
end
|
||||
subgraph "数据与基础设施"
|
||||
DB[("PostgreSQL")]
|
||||
CACHE[("Redis 缓存/队列")]
|
||||
end
|
||||
FE_APP --> |"HTTP 请求"| BE_API
|
||||
BE_API --> |"数据库访问"| DB
|
||||
BE_API --> |"任务调度"| BE_SCHED
|
||||
BE_SCHED --> |"调用引擎"| BE_ENGINE
|
||||
BE_ENGINE --> |"写入记录"| DB
|
||||
BE_API --> CACHE
|
||||
BE_SCHED --> CACHE
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
## 核心组件
|
||||
- 后端主程序与生命周期管理:负责应用启动/关闭、CORS配置、路由注册与健康检查
|
||||
- 定时任务调度器:基于APScheduler的异步调度器,周期性扫描待执行查询并触发引用检测
|
||||
- 引用检测引擎:封装品牌匹配、竞争品牌检测与平台适配器调用流程
|
||||
- 平台适配器基类:定义统一的AI平台查询接口,便于扩展新的平台
|
||||
- API路由与服务:提供查询词的增删改查、运行一次查询等接口
|
||||
- 数据模型:定义查询、引用记录、任务等核心实体及其索引
|
||||
- 前端布局与图表:提供页面骨架、主题样式与平台引用率柱状图展示
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
- [frontend/app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [frontend/components/charts/platform-chart.tsx:1-68](file://frontend/components/charts/platform-chart.tsx#L1-L68)
|
||||
|
||||
## 架构总览
|
||||
GEO平台采用分层架构:
|
||||
- 表现层:Next.js应用,负责UI渲染与用户交互
|
||||
- 控制层:FastAPI路由与服务,处理请求、鉴权与业务编排
|
||||
- 领域层:引用检测引擎与平台适配器,实现品牌匹配与跨平台查询
|
||||
- 基础设施层:PostgreSQL与Redis,提供持久化与任务调度支撑
|
||||
- 集成层:Docker Compose统一编排,确保服务间依赖与网络互通
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
UI["前端页面<br/>Next.js"] --> API["后端API<br/>FastAPI"]
|
||||
API --> SCHED["调度器<br/>APScheduler"]
|
||||
API --> DBM["数据库<br/>SQLAlchemy ORM"]
|
||||
API --> CACHE["缓存<br/>Redis"]
|
||||
SCHED --> ENGINE["引擎<br/>CitationEngine"]
|
||||
ENGINE --> ADAPTERS["平台适配器<br/>BasePlatformAdapter"]
|
||||
ENGINE --> DBM
|
||||
ADAPTERS --> |"调用外部平台"| EXTERNAL["外部AI平台"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
|
||||
## 详细组件分析
|
||||
|
||||
### 后端主程序与生命周期
|
||||
- 负责注册认证、查询、引用、报告等路由
|
||||
- 配置CORS允许前端本地开发环境访问
|
||||
- 应用生命周期内启动调度器并在关闭时优雅退出
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client as "浏览器"
|
||||
participant API as "FastAPI 应用"
|
||||
participant Sched as "调度器"
|
||||
participant Engine as "引用引擎"
|
||||
Client->>API : "GET /health"
|
||||
API-->>Client : "{status : ok}"
|
||||
Note over API,Sched : "应用启动时"
|
||||
API->>Sched : "start()"
|
||||
Sched->>Sched : "注册每小时检查任务"
|
||||
Note over Sched,Engine : "定时触发"
|
||||
Sched->>Engine : "执行查询与检测"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
|
||||
### 定时任务调度器
|
||||
- 使用AsyncIOScheduler按小时轮询查询表
|
||||
- 过滤状态为active且到达下次查询时间的记录
|
||||
- 逐条调用引用引擎执行查询,更新任务状态与下次查询时间
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Start(["定时任务触发"]) --> Fetch["查询数据库<br/>筛选待执行查询"]
|
||||
Fetch --> HasMore{"是否有待执行查询?"}
|
||||
HasMore --> |否| End(["结束"])
|
||||
HasMore --> |是| Exec["调用引擎执行查询"]
|
||||
Exec --> Update["更新查询任务状态与下次查询时间"]
|
||||
Update --> Fetch
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
|
||||
### 引用检测引擎
|
||||
- 品牌匹配器:支持精确、别名、模糊匹配,输出引用位置与置信度
|
||||
- 竞争品牌检测:在已知品牌库中识别其他相关品牌
|
||||
- 平台适配器:封装不同AI平台的查询接口,统一返回原始文本
|
||||
- 任务管理:为每次查询创建或获取任务记录,跟踪状态与错误信息
|
||||
- 结果持久化:生成引用记录并写入数据库
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class CitationEngine {
|
||||
+execute_query(query, db) list
|
||||
+execute_single_platform(keyword, platform, target_brand, aliases) dict
|
||||
-_get_or_create_task(db, query_id, platform) QueryTask
|
||||
-_calculate_next_query_at(frequency) datetime
|
||||
+close() void
|
||||
}
|
||||
class BrandMatcher {
|
||||
+match(text) dict
|
||||
-_extract_candidates(text) list
|
||||
-_extract_position_and_context(text, keyword) tuple
|
||||
}
|
||||
class CompetitorDetector {
|
||||
+detect(text, target_brand) list
|
||||
}
|
||||
class BasePlatformAdapter {
|
||||
<<abstract>>
|
||||
+platform_name str
|
||||
+platform_url str
|
||||
+query(keyword) str
|
||||
+close() void
|
||||
}
|
||||
CitationEngine --> BrandMatcher : "使用"
|
||||
CitationEngine --> CompetitorDetector : "使用"
|
||||
CitationEngine --> BasePlatformAdapter : "调用"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [backend/app/workers/platforms/base.py:1-18](file://backend/app/workers/platforms/base.py#L1-L18)
|
||||
|
||||
### API与数据模型
|
||||
- 查询API:提供查询词的分页列表、创建、读取、更新、删除
|
||||
- 查询模型:包含关键词、目标品牌、别名、平台集合、频率、状态与时间戳
|
||||
- 引用记录模型:记录每次查询在特定平台上的引用结果、竞争品牌与原始响应
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
QUERIES {
|
||||
uuid id PK
|
||||
uuid user_id FK
|
||||
string keyword
|
||||
string target_brand
|
||||
jsonb brand_aliases
|
||||
jsonb platforms
|
||||
string frequency
|
||||
string status
|
||||
timestamp last_queried_at
|
||||
timestamp next_query_at
|
||||
timestamp created_at
|
||||
timestamp updated_at
|
||||
}
|
||||
CITATION_RECORDS {
|
||||
uuid id PK
|
||||
uuid query_id FK
|
||||
string platform
|
||||
boolean cited
|
||||
int citation_position
|
||||
text citation_text
|
||||
jsonb competitor_brands
|
||||
text raw_response
|
||||
timestamp queried_at
|
||||
}
|
||||
QUERIES ||--o{ CITATION_RECORDS : "包含"
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/models/query.py:1-55](file://backend/app/models/query.py#L1-L55)
|
||||
- [backend/app/models/citation_record.py:1-42](file://backend/app/models/citation_record.py#L1-L42)
|
||||
|
||||
### 前端布局与可视化
|
||||
- 页面布局:定义站点元数据与全局样式,提供Provider容器
|
||||
- 图表组件:基于Recharts绘制平台引用率柱状图,支持响应式与自定义颜色
|
||||
- 平台映射:提供平台键值到中文名称的映射,便于展示
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
Layout["页面布局<br/>layout.tsx"] --> Providers["全局Provider"]
|
||||
Providers --> Charts["图表组件<br/>PlatformChart"]
|
||||
Charts --> Data["引用统计数据"]
|
||||
Charts --> Map["平台映射<br/>platforms.ts"]
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [frontend/app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [frontend/components/charts/platform-chart.tsx:1-68](file://frontend/components/charts/platform-chart.tsx#L1-L68)
|
||||
- [frontend/lib/platforms.ts:1-18](file://frontend/lib/platforms.ts#L1-L18)
|
||||
|
||||
**章节来源**
|
||||
- [frontend/app/layout.tsx:1-37](file://frontend/app/layout.tsx#L1-L37)
|
||||
- [frontend/components/charts/platform-chart.tsx:1-68](file://frontend/components/charts/platform-chart.tsx#L1-L68)
|
||||
- [frontend/lib/platforms.ts:1-18](file://frontend/lib/platforms.ts#L1-L18)
|
||||
|
||||
## 依赖分析
|
||||
- 技术栈选择理由
|
||||
- 后端:FastAPI提供高性能异步API与自动生成文档;SQLAlchemy与异步驱动支持高并发;APScheduler与Redis满足任务调度与缓存需求
|
||||
- 前端:Next.js提供SSR/CSR混合模式与良好开发体验;Radix UI与Tailwind提供一致的UI组件与样式;Recharts用于数据可视化
|
||||
- 数据与基础设施:PostgreSQL适合结构化数据与复杂查询;Redis适合任务调度与会话缓存
|
||||
- 外部依赖与集成点
|
||||
- 平台适配器:通过抽象基类统一不同AI平台的查询接口
|
||||
- CORS:允许前端本地开发端口访问后端
|
||||
- 环境变量:数据库、缓存、密钥与浏览器自动化路径通过配置集中管理
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "后端依赖"
|
||||
FAST["FastAPI"]
|
||||
SQL["SQLAlchemy + asyncpg"]
|
||||
REDIS["Redis"]
|
||||
APS["APScheduler"]
|
||||
PW["Playwright"]
|
||||
end
|
||||
subgraph "前端依赖"
|
||||
NEXT["Next.js"]
|
||||
RADIX["Radix UI"]
|
||||
RECHARTS["Recharts"]
|
||||
TAILWIND["TailwindCSS"]
|
||||
end
|
||||
FAST --> SQL
|
||||
FAST --> REDIS
|
||||
FAST --> APS
|
||||
FAST --> PW
|
||||
NEXT --> RADIX
|
||||
NEXT --> RECHARTS
|
||||
NEXT --> TAILWIND
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
|
||||
**章节来源**
|
||||
- [backend/requirements.txt:1-35](file://backend/requirements.txt#L1-L35)
|
||||
- [frontend/package.json:1-40](file://frontend/package.json#L1-L40)
|
||||
|
||||
## 性能考虑
|
||||
- 异步化与并发
|
||||
- 后端采用异步数据库会话与异步调度器,提升I/O密集型场景下的吞吐量
|
||||
- 索引优化
|
||||
- 查询与引用记录模型建立复合索引,加速定时任务扫描与报表查询
|
||||
- 缓存与任务解耦
|
||||
- Redis承担任务调度与会话缓存,避免阻塞主业务线程
|
||||
- 可视化性能
|
||||
- 图表组件使用响应式容器与轻量级渲染,减少重绘开销
|
||||
|
||||
[本节为通用性能建议,无需特定文件引用]
|
||||
|
||||
## 故障排查指南
|
||||
- 健康检查
|
||||
- 后端提供健康检查端点,用于确认服务可用性
|
||||
- 日志与错误处理
|
||||
- 调度器与引擎在执行过程中记录错误日志,便于定位失败原因
|
||||
- 常见问题
|
||||
- 数据库连接失败:检查PostgreSQL容器健康状态与连接字符串
|
||||
- 任务未执行:确认Redis可用与调度器已启动
|
||||
- 平台适配器异常:检查对应平台的API密钥与网络连通性
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/main.py:45-48](file://backend/app/main.py#L45-L48)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/config.py:1-17](file://backend/app/config.py#L1-L17)
|
||||
|
||||
## 结论
|
||||
GEO平台通过“多AI平台集成 + 定时查询任务调度 + 数据可视化”的组合,为用户提供从查询到洞察的一体化能力。其分层清晰、模块解耦的设计便于后续扩展更多平台与功能;容器化部署降低了运维复杂度。对于初学者,平台提供了直观的可视化与简洁的API;对于开发者,平台展示了异步架构、任务调度与领域建模的最佳实践。
|
||||
|
||||
[本节为总结性内容,无需特定文件引用]
|
||||
|
||||
## 附录
|
||||
|
||||
### 系统边界与核心组件关系图
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "外部系统"
|
||||
Users["用户"]
|
||||
AIs["多家AI平台"]
|
||||
end
|
||||
subgraph "GEO平台"
|
||||
Frontend["前端应用"]
|
||||
Backend["后端API"]
|
||||
Scheduler["调度器"]
|
||||
Engine["引用引擎"]
|
||||
DB[("PostgreSQL")]
|
||||
Cache[("Redis")]
|
||||
end
|
||||
Users --> Frontend
|
||||
Frontend --> Backend
|
||||
Backend --> DB
|
||||
Backend --> Cache
|
||||
Scheduler --> Engine
|
||||
Engine --> AIs
|
||||
Engine --> DB
|
||||
Scheduler --> DB
|
||||
Scheduler --> Cache
|
||||
```
|
||||
|
||||
**图表来源**
|
||||
- [backend/app/main.py:1-48](file://backend/app/main.py#L1-L48)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [docker-compose.yml:1-71](file://docker-compose.yml#L1-L71)
|
||||
|
||||
### 数据流向说明
|
||||
- 用户在前端发起查询管理与报表查看请求
|
||||
- 后端API接收请求并访问数据库与缓存
|
||||
- 调度器周期性触发引用引擎执行跨平台查询
|
||||
- 引擎对平台返回的原始文本进行品牌匹配与竞争品牌检测
|
||||
- 结果写入数据库并供前端图表组件消费
|
||||
|
||||
**章节来源**
|
||||
- [backend/app/api/queries.py:1-86](file://backend/app/api/queries.py#L1-L86)
|
||||
- [backend/app/workers/scheduler.py:1-95](file://backend/app/workers/scheduler.py#L1-L95)
|
||||
- [backend/app/workers/citation_engine.py:1-309](file://backend/app/workers/citation_engine.py#L1-L309)
|
||||
- [frontend/components/charts/platform-chart.tsx:1-68](file://frontend/components/charts/platform-chart.tsx#L1-L68)
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1,40 @@
|
|||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# 安装系统依赖(Playwright需要)
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
wget \
|
||||
gnupg \
|
||||
libglib2.0-0 \
|
||||
libnss3 \
|
||||
libnspr4 \
|
||||
libatk1.0-0 \
|
||||
libatk-bridge2.0-0 \
|
||||
libcups2 \
|
||||
libdrm2 \
|
||||
libxkbcommon0 \
|
||||
libxcomposite1 \
|
||||
libxdamage1 \
|
||||
libxrandr2 \
|
||||
libgbm1 \
|
||||
libpango-1.0-0 \
|
||||
libcairo2 \
|
||||
libasound2 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# 复制并安装Python依赖
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# 安装Playwright浏览器
|
||||
RUN playwright install chromium
|
||||
RUN playwright install-deps chromium
|
||||
|
||||
# 复制应用代码
|
||||
COPY . .
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
|
|
@ -0,0 +1,149 @@
|
|||
# A generic, single database configuration.
|
||||
|
||||
[alembic]
|
||||
# path to migration scripts.
|
||||
# this is typically a path given in POSIX (e.g. forward slashes)
|
||||
# format, relative to the token %(here)s which refers to the location of this
|
||||
# ini file
|
||||
script_location = %(here)s/alembic
|
||||
|
||||
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
|
||||
# Uncomment the line below if you want the files to be prepended with date and time
|
||||
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
|
||||
# for all available tokens
|
||||
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
|
||||
# Or organize into date-based subdirectories (requires recursive_version_locations = true)
|
||||
# file_template = %%(year)d/%%(month).2d/%%(day).2d_%%(hour).2d%%(minute).2d_%%(second).2d_%%(rev)s_%%(slug)s
|
||||
|
||||
# sys.path path, will be prepended to sys.path if present.
|
||||
# defaults to the current working directory. for multiple paths, the path separator
|
||||
# is defined by "path_separator" below.
|
||||
prepend_sys_path = .
|
||||
|
||||
|
||||
# timezone to use when rendering the date within the migration file
|
||||
# as well as the filename.
|
||||
# If specified, requires the tzdata library which can be installed by adding
|
||||
# `alembic[tz]` to the pip requirements.
|
||||
# string value is passed to ZoneInfo()
|
||||
# leave blank for localtime
|
||||
# timezone =
|
||||
|
||||
# max length of characters to apply to the "slug" field
|
||||
# truncate_slug_length = 40
|
||||
|
||||
# set to 'true' to run the environment during
|
||||
# the 'revision' command, regardless of autogenerate
|
||||
# revision_environment = false
|
||||
|
||||
# set to 'true' to allow .pyc and .pyo files without
|
||||
# a source .py file to be detected as revisions in the
|
||||
# versions/ directory
|
||||
# sourceless = false
|
||||
|
||||
# version location specification; This defaults
|
||||
# to <script_location>/versions. When using multiple version
|
||||
# directories, initial revisions must be specified with --version-path.
|
||||
# The path separator used here should be the separator specified by "path_separator"
|
||||
# below.
|
||||
# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
|
||||
|
||||
# path_separator; This indicates what character is used to split lists of file
|
||||
# paths, including version_locations and prepend_sys_path within configparser
|
||||
# files such as alembic.ini.
|
||||
# The default rendered in new alembic.ini files is "os", which uses os.pathsep
|
||||
# to provide os-dependent path splitting.
|
||||
#
|
||||
# Note that in order to support legacy alembic.ini files, this default does NOT
|
||||
# take place if path_separator is not present in alembic.ini. If this
|
||||
# option is omitted entirely, fallback logic is as follows:
|
||||
#
|
||||
# 1. Parsing of the version_locations option falls back to using the legacy
|
||||
# "version_path_separator" key, which if absent then falls back to the legacy
|
||||
# behavior of splitting on spaces and/or commas.
|
||||
# 2. Parsing of the prepend_sys_path option falls back to the legacy
|
||||
# behavior of splitting on spaces, commas, or colons.
|
||||
#
|
||||
# Valid values for path_separator are:
|
||||
#
|
||||
# path_separator = :
|
||||
# path_separator = ;
|
||||
# path_separator = space
|
||||
# path_separator = newline
|
||||
#
|
||||
# Use os.pathsep. Default configuration used for new projects.
|
||||
path_separator = os
|
||||
|
||||
# set to 'true' to search source files recursively
|
||||
# in each "version_locations" directory
|
||||
# new in Alembic version 1.10
|
||||
# recursive_version_locations = false
|
||||
|
||||
# the output encoding used when revision files
|
||||
# are written from script.py.mako
|
||||
# output_encoding = utf-8
|
||||
|
||||
# database URL. This is consumed by the user-maintained env.py script only.
|
||||
# other means of configuring database URLs may be customized within the env.py
|
||||
# file.
|
||||
sqlalchemy.url = postgresql+asyncpg://postgres:postgres123@db:5432/geo_platform
|
||||
|
||||
|
||||
[post_write_hooks]
|
||||
# post_write_hooks defines scripts or Python functions that are run
|
||||
# on newly generated revision scripts. See the documentation for further
|
||||
# detail and examples
|
||||
|
||||
# format using "black" - use the console_scripts runner, against the "black" entrypoint
|
||||
# hooks = black
|
||||
# black.type = console_scripts
|
||||
# black.entrypoint = black
|
||||
# black.options = -l 79 REVISION_SCRIPT_FILENAME
|
||||
|
||||
# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module
|
||||
# hooks = ruff
|
||||
# ruff.type = module
|
||||
# ruff.module = ruff
|
||||
# ruff.options = check --fix REVISION_SCRIPT_FILENAME
|
||||
|
||||
# Alternatively, use the exec runner to execute a binary found on your PATH
|
||||
# hooks = ruff
|
||||
# ruff.type = exec
|
||||
# ruff.executable = ruff
|
||||
# ruff.options = check --fix REVISION_SCRIPT_FILENAME
|
||||
|
||||
# Logging configuration. This is also consumed by the user-maintained
|
||||
# env.py script only.
|
||||
[loggers]
|
||||
keys = root,sqlalchemy,alembic
|
||||
|
||||
[handlers]
|
||||
keys = console
|
||||
|
||||
[formatters]
|
||||
keys = generic
|
||||
|
||||
[logger_root]
|
||||
level = WARNING
|
||||
handlers = console
|
||||
qualname =
|
||||
|
||||
[logger_sqlalchemy]
|
||||
level = WARNING
|
||||
handlers =
|
||||
qualname = sqlalchemy.engine
|
||||
|
||||
[logger_alembic]
|
||||
level = INFO
|
||||
handlers =
|
||||
qualname = alembic
|
||||
|
||||
[handler_console]
|
||||
class = StreamHandler
|
||||
args = (sys.stderr,)
|
||||
level = NOTSET
|
||||
formatter = generic
|
||||
|
||||
[formatter_generic]
|
||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||
datefmt = %H:%M:%S
|
||||
|
|
@ -0,0 +1 @@
|
|||
Generic single-database configuration.
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
import asyncio
|
||||
from logging.config import fileConfig
|
||||
|
||||
from sqlalchemy import pool
|
||||
from sqlalchemy.engine import Connection
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
|
||||
from alembic import context
|
||||
|
||||
from app.config import settings
|
||||
from app.database import Base
|
||||
from app.models import * # noqa: F401, F403
|
||||
|
||||
# this is the Alembic Config object, which provides
|
||||
# access to the values within the .ini file in use.
|
||||
config = context.config
|
||||
|
||||
# Interpret the config file for Python logging.
|
||||
# This line sets up loggers basically.
|
||||
if config.config_file_name is not None:
|
||||
fileConfig(config.config_file_name)
|
||||
|
||||
# add your model's MetaData object here
|
||||
# for 'autogenerate' support
|
||||
target_metadata = Base.metadata
|
||||
|
||||
# other values from the config, defined by the needs of env.py,
|
||||
# can be acquired:
|
||||
# my_important_option = config.get_main_option("my_important_option")
|
||||
# ... etc.
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
"""Run migrations in 'offline' mode.
|
||||
|
||||
This configures the context with just a URL
|
||||
and not an Engine, though an Engine is acceptable
|
||||
here as well. By skipping the Engine creation
|
||||
we don't even need a DBAPI to be available.
|
||||
|
||||
Calls to context.execute() here emit the given string to the
|
||||
script output.
|
||||
|
||||
"""
|
||||
url = settings.DATABASE_URL
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata,
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def do_run_migrations(connection: Connection) -> None:
|
||||
context.configure(connection=connection, target_metadata=target_metadata)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
async def run_async_migrations() -> None:
|
||||
"""In this scenario we need to create an Engine
|
||||
and associate a connection with the context.
|
||||
|
||||
"""
|
||||
connectable = create_async_engine(
|
||||
settings.DATABASE_URL,
|
||||
poolclass=pool.NullPool,
|
||||
)
|
||||
|
||||
async with connectable.connect() as connection:
|
||||
await connection.run_sync(do_run_migrations)
|
||||
|
||||
await connectable.dispose()
|
||||
|
||||
|
||||
def run_migrations_online() -> None:
|
||||
"""Run migrations in 'online' mode."""
|
||||
asyncio.run(run_async_migrations())
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
run_migrations_online()
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
"""${message}
|
||||
|
||||
Revision ID: ${up_revision}
|
||||
Revises: ${down_revision | comma,n}
|
||||
Create Date: ${create_date}
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
${imports if imports else ""}
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = ${repr(up_revision)}
|
||||
down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
|
||||
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
|
||||
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema."""
|
||||
${upgrades if upgrades else "pass"}
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
${downgrades if downgrades else "pass"}
|
||||
|
|
@ -0,0 +1,127 @@
|
|||
"""Initial migration
|
||||
|
||||
Revision ID: 488d0bd5ab01
|
||||
Revises:
|
||||
Create Date: 2026-04-22 18:06:46.629263
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '488d0bd5ab01'
|
||||
down_revision: Union[str, Sequence[str], None] = None
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema."""
|
||||
# users table
|
||||
op.create_table(
|
||||
'users',
|
||||
sa.Column('id', postgresql.UUID(as_uuid=True), server_default=sa.text('gen_random_uuid()'), nullable=False),
|
||||
sa.Column('email', sa.String(255), nullable=False),
|
||||
sa.Column('password_hash', sa.String(255), nullable=False),
|
||||
sa.Column('name', sa.String(100), nullable=True),
|
||||
sa.Column('plan', sa.String(20), server_default='free', nullable=False),
|
||||
sa.Column('max_queries', sa.Integer(), server_default='5', nullable=False),
|
||||
sa.Column('is_active', sa.Boolean(), server_default='true', nullable=False),
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('NOW()'), nullable=False),
|
||||
sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('NOW()'), nullable=False),
|
||||
sa.PrimaryKeyConstraint('id'),
|
||||
sa.UniqueConstraint('email')
|
||||
)
|
||||
|
||||
# queries table
|
||||
op.create_table(
|
||||
'queries',
|
||||
sa.Column('id', postgresql.UUID(as_uuid=True), server_default=sa.text('gen_random_uuid()'), nullable=False),
|
||||
sa.Column('user_id', postgresql.UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('keyword', sa.String(200), nullable=False),
|
||||
sa.Column('target_brand', sa.String(100), nullable=False),
|
||||
sa.Column('brand_aliases', postgresql.JSONB(), server_default='[]', nullable=False),
|
||||
sa.Column('platforms', postgresql.JSONB(), server_default='["wenxin", "kimi"]', nullable=False),
|
||||
sa.Column('frequency', sa.String(20), server_default='weekly', nullable=False),
|
||||
sa.Column('status', sa.String(20), server_default='active', nullable=False),
|
||||
sa.Column('last_queried_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('next_query_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('NOW()'), nullable=False),
|
||||
sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('NOW()'), nullable=False),
|
||||
sa.PrimaryKeyConstraint('id'),
|
||||
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ondelete='CASCADE')
|
||||
)
|
||||
op.create_index('idx_queries_user_id', 'queries', ['user_id'])
|
||||
op.create_index('idx_queries_status', 'queries', ['status'])
|
||||
op.create_index('idx_queries_next_query_at', 'queries', ['next_query_at'])
|
||||
|
||||
# citation_records table
|
||||
op.create_table(
|
||||
'citation_records',
|
||||
sa.Column('id', postgresql.UUID(as_uuid=True), server_default=sa.text('gen_random_uuid()'), nullable=False),
|
||||
sa.Column('query_id', postgresql.UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('platform', sa.String(50), nullable=False),
|
||||
sa.Column('cited', sa.Boolean(), server_default='false', nullable=False),
|
||||
sa.Column('citation_position', sa.Integer(), nullable=True),
|
||||
sa.Column('citation_text', sa.Text(), nullable=True),
|
||||
sa.Column('competitor_brands', postgresql.JSONB(), server_default='[]', nullable=False),
|
||||
sa.Column('raw_response', sa.Text(), nullable=True),
|
||||
sa.Column('queried_at', sa.DateTime(timezone=True), server_default=sa.text('NOW()'), nullable=False),
|
||||
sa.PrimaryKeyConstraint('id'),
|
||||
sa.ForeignKeyConstraint(['query_id'], ['queries.id'], ondelete='CASCADE')
|
||||
)
|
||||
op.create_index('idx_citation_records_query_id', 'citation_records', ['query_id'])
|
||||
op.create_index('idx_citation_records_queried_at', 'citation_records', ['queried_at'])
|
||||
op.create_index('idx_citation_records_platform', 'citation_records', ['platform'])
|
||||
|
||||
# query_tasks table
|
||||
op.create_table(
|
||||
'query_tasks',
|
||||
sa.Column('id', postgresql.UUID(as_uuid=True), server_default=sa.text('gen_random_uuid()'), nullable=False),
|
||||
sa.Column('query_id', postgresql.UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('platform', sa.String(50), nullable=False),
|
||||
sa.Column('status', sa.String(20), server_default='pending', nullable=False),
|
||||
sa.Column('error_message', sa.Text(), nullable=True),
|
||||
sa.Column('scheduled_at', sa.DateTime(timezone=True), server_default=sa.text('NOW()'), nullable=False),
|
||||
sa.Column('started_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('completed_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.PrimaryKeyConstraint('id'),
|
||||
sa.ForeignKeyConstraint(['query_id'], ['queries.id'], ondelete='CASCADE')
|
||||
)
|
||||
op.create_index('idx_query_tasks_status', 'query_tasks', ['status'])
|
||||
|
||||
# subscriptions table
|
||||
op.create_table(
|
||||
'subscriptions',
|
||||
sa.Column('id', postgresql.UUID(as_uuid=True), server_default=sa.text('gen_random_uuid()'), nullable=False),
|
||||
sa.Column('user_id', postgresql.UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('plan', sa.String(20), nullable=False),
|
||||
sa.Column('status', sa.String(20), server_default='active', nullable=False),
|
||||
sa.Column('start_date', sa.Date(), nullable=False),
|
||||
sa.Column('end_date', sa.Date(), nullable=False),
|
||||
sa.Column('amount', sa.Numeric(10, 2), nullable=True),
|
||||
sa.Column('payment_method', sa.String(50), nullable=True),
|
||||
sa.Column('payment_id', sa.String(255), nullable=True),
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('NOW()'), nullable=False),
|
||||
sa.PrimaryKeyConstraint('id'),
|
||||
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ondelete='CASCADE')
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
op.drop_table('subscriptions')
|
||||
op.drop_index('idx_query_tasks_status', table_name='query_tasks')
|
||||
op.drop_table('query_tasks')
|
||||
op.drop_index('idx_citation_records_platform', table_name='citation_records')
|
||||
op.drop_index('idx_citation_records_queried_at', table_name='citation_records')
|
||||
op.drop_index('idx_citation_records_query_id', table_name='citation_records')
|
||||
op.drop_table('citation_records')
|
||||
op.drop_index('idx_queries_next_query_at', table_name='queries')
|
||||
op.drop_index('idx_queries_status', table_name='queries')
|
||||
op.drop_index('idx_queries_user_id', table_name='queries')
|
||||
op.drop_table('queries')
|
||||
op.drop_table('users')
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
"""Add confidence and match_type to citation_records
|
||||
|
||||
Revision ID: b2c4d6e8fa10
|
||||
Revises: 488d0bd5ab01
|
||||
Create Date: 2026-04-23 16:10:00.000000
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = 'b2c4d6e8fa10'
|
||||
down_revision: Union[str, Sequence[str], None] = '488d0bd5ab01'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Add confidence and match_type columns to citation_records."""
|
||||
op.add_column(
|
||||
'citation_records',
|
||||
sa.Column('confidence', sa.Float(), nullable=True)
|
||||
)
|
||||
op.add_column(
|
||||
'citation_records',
|
||||
sa.Column('match_type', sa.String(20), nullable=True)
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Remove confidence and match_type columns from citation_records."""
|
||||
op.drop_column('citation_records', 'match_type')
|
||||
op.drop_column('citation_records', 'confidence')
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
from app.api.auth import router as auth_router
|
||||
|
||||
__all__ = ["auth_router"]
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.api.deps import get_current_user
|
||||
from app.database import get_db
|
||||
from app.models.user import User
|
||||
from app.schemas.auth import TokenResponse, UserLogin, UserRegister, UserResponse
|
||||
from app.services.auth import authenticate_user, create_access_token, register_user
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/register", response_model=UserResponse, status_code=status.HTTP_201_CREATED)
|
||||
async def register(user_data: UserRegister, db: AsyncSession = Depends(get_db)):
|
||||
try:
|
||||
user = await register_user(db, user_data)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e) if str(e) else "邮箱已被注册")
|
||||
return user
|
||||
|
||||
|
||||
@router.post("/login", response_model=TokenResponse)
|
||||
async def login(user_data: UserLogin, db: AsyncSession = Depends(get_db)):
|
||||
user = await authenticate_user(db, user_data.email, user_data.password)
|
||||
if not user:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="邮箱或密码错误",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
access_token = create_access_token(data={"sub": str(user.id)})
|
||||
return {
|
||||
"access_token": access_token,
|
||||
"token_type": "bearer",
|
||||
"user": user,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/me", response_model=UserResponse)
|
||||
async def read_current_user(current_user: User = Depends(get_current_user)):
|
||||
return current_user
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, status
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.api.deps import get_current_user
|
||||
from app.database import get_db
|
||||
from app.models.user import User
|
||||
from app.schemas.citation import (
|
||||
CitationListResponse,
|
||||
CitationStatsResponse,
|
||||
)
|
||||
from app.services.citation import (
|
||||
get_citation_stats,
|
||||
get_citations,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/", response_model=CitationListResponse)
|
||||
async def list_citations(
|
||||
query_id: uuid.UUID | None = Query(None),
|
||||
platform: str | None = Query(None),
|
||||
start_date: datetime | None = Query(None),
|
||||
end_date: datetime | None = Query(None),
|
||||
skip: int = Query(0, ge=0),
|
||||
limit: int = Query(20, ge=1, le=100),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
items, total = await get_citations(
|
||||
db,
|
||||
current_user.id,
|
||||
query_id=query_id,
|
||||
platform=platform,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
skip=skip,
|
||||
limit=limit,
|
||||
)
|
||||
return {"items": items, "total": total}
|
||||
|
||||
|
||||
@router.get("/stats", response_model=CitationStatsResponse)
|
||||
async def citation_stats(
|
||||
query_id: uuid.UUID | None = Query(None),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
stats = await get_citation_stats(db, current_user.id, query_id=query_id)
|
||||
return stats
|
||||
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
import uuid
|
||||
|
||||
from fastapi import Depends, HTTPException, status
|
||||
from fastapi.security import OAuth2PasswordBearer
|
||||
from jose import JWTError
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.database import get_db
|
||||
from app.models.user import User
|
||||
from app.services.auth import verify_token
|
||||
|
||||
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/v1/auth/login")
|
||||
|
||||
|
||||
async def get_current_user(
|
||||
token: str = Depends(oauth2_scheme),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> User:
|
||||
credentials_exception = HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Could not validate credentials",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
try:
|
||||
payload = verify_token(token)
|
||||
user_id: str | None = payload.get("sub")
|
||||
if user_id is None:
|
||||
raise credentials_exception
|
||||
user_uuid = uuid.UUID(user_id)
|
||||
except (JWTError, ValueError):
|
||||
raise credentials_exception
|
||||
|
||||
stmt = select(User).where(User.id == user_uuid)
|
||||
result = await db.execute(stmt)
|
||||
user = result.scalar_one_or_none()
|
||||
|
||||
if user is None:
|
||||
raise credentials_exception
|
||||
|
||||
return user
|
||||
|
|
@ -0,0 +1,108 @@
|
|||
import uuid
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, status
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.api.deps import get_current_user
|
||||
from app.database import get_db
|
||||
from app.models.user import User
|
||||
from app.schemas.citation import RunNowResponse
|
||||
from app.schemas.query import QueryCreate, QueryListResponse, QueryResponse, QueryUpdate
|
||||
from app.services.citation import trigger_query_now
|
||||
from app.services.query import create_query, delete_query, get_queries, get_query, update_query
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/", response_model=QueryListResponse)
|
||||
async def list_queries(
|
||||
skip: int = Query(0, ge=0),
|
||||
limit: int = Query(20, ge=1, le=100),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
items, total = await get_queries(db, current_user.id, skip=skip, limit=limit)
|
||||
return {"items": items, "total": total}
|
||||
|
||||
|
||||
@router.post("/", response_model=QueryResponse, status_code=status.HTTP_201_CREATED)
|
||||
async def create_new_query(
|
||||
query_data: QueryCreate,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
try:
|
||||
query = await create_query(db, current_user.id, query_data)
|
||||
except PermissionError as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail=str(e) if str(e) else "查询词数量已达上限",
|
||||
)
|
||||
return query
|
||||
|
||||
|
||||
@router.get("/{query_id}", response_model=QueryResponse)
|
||||
async def retrieve_query(
|
||||
query_id: uuid.UUID,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
query = await get_query(db, query_id, current_user.id)
|
||||
if query is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="查询词不存在",
|
||||
)
|
||||
return query
|
||||
|
||||
|
||||
@router.put("/{query_id}", response_model=QueryResponse)
|
||||
async def modify_query(
|
||||
query_id: uuid.UUID,
|
||||
update_data: QueryUpdate,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
query = await update_query(db, query_id, current_user.id, update_data)
|
||||
if query is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="查询词不存在",
|
||||
)
|
||||
return query
|
||||
|
||||
|
||||
@router.delete("/{query_id}", status_code=status.HTTP_204_NO_CONTENT)
|
||||
async def remove_query(
|
||||
query_id: uuid.UUID,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
deleted = await delete_query(db, query_id, current_user.id)
|
||||
if not deleted:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="查询词不存在",
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
@router.post("/{query_id}/run-now", response_model=RunNowResponse, status_code=status.HTTP_202_ACCEPTED)
|
||||
async def run_query_now(
|
||||
query_id: uuid.UUID,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
try:
|
||||
task = await trigger_query_now(db, current_user.id, query_id)
|
||||
except ValueError as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=str(e),
|
||||
)
|
||||
|
||||
return {
|
||||
"task_id": task.id,
|
||||
"status": task.status,
|
||||
"message": "查询任务已加入队列",
|
||||
}
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, status
|
||||
from fastapi.responses import StreamingResponse
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.api.deps import get_current_user
|
||||
from app.database import get_db
|
||||
from app.models.user import User
|
||||
from app.services.citation import export_citations_csv
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/export/csv")
|
||||
async def export_report(
|
||||
query_id: uuid.UUID = Query(...),
|
||||
format: str = Query("csv"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
if format != "csv":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Only CSV format is supported",
|
||||
)
|
||||
|
||||
try:
|
||||
csv_content = await export_citations_csv(db, current_user.id, query_id)
|
||||
except ValueError as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=str(e),
|
||||
)
|
||||
|
||||
date_str = datetime.now().strftime("%Y%m%d")
|
||||
filename = f"geo-report-{date_str}.csv"
|
||||
|
||||
return StreamingResponse(
|
||||
iter([csv_content]),
|
||||
media_type="text/csv",
|
||||
headers={
|
||||
"Content-Disposition": f'attachment; filename="{filename}"',
|
||||
},
|
||||
)
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
from pathlib import Path
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
_env_path = Path(__file__).resolve().parent.parent.parent / ".env"
|
||||
if not _env_path.exists():
|
||||
_env_path = Path(".env")
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
model_config = SettingsConfigDict(env_file=str(_env_path), extra="ignore")
|
||||
|
||||
DATABASE_URL: str = "postgresql+asyncpg://postgres:postgres123@db:5432/geo_platform"
|
||||
REDIS_URL: str = "redis://redis:6379/0"
|
||||
JWT_SECRET: str = "your-secret-key-change-in-production"
|
||||
JWT_EXPIRE_HOURS: int = 24
|
||||
PLAYWRIGHT_BROWSERS_PATH: str = "/ms-playwright"
|
||||
ZHIPU_API_KEY: str = ""
|
||||
TONGYI_API_KEY: str = ""
|
||||
CORS_ORIGINS: str = "http://localhost:3000"
|
||||
|
||||
|
||||
settings = Settings()
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncSession
|
||||
from sqlalchemy.orm import declarative_base
|
||||
|
||||
from app.config import settings
|
||||
|
||||
engine = create_async_engine(
|
||||
settings.DATABASE_URL,
|
||||
echo=False,
|
||||
future=True,
|
||||
)
|
||||
|
||||
AsyncSessionLocal = async_sessionmaker(
|
||||
engine,
|
||||
class_=AsyncSession,
|
||||
expire_on_commit=False,
|
||||
autoflush=False,
|
||||
autocommit=False,
|
||||
)
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
|
||||
async def get_db() -> AsyncSession:
|
||||
async with AsyncSessionLocal() as session:
|
||||
try:
|
||||
yield session
|
||||
finally:
|
||||
await session.close()
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from app.api.auth import router as auth_router
|
||||
from app.api.citations import router as citations_router
|
||||
from app.api.queries import router as queries_router
|
||||
from app.api.reports import router as reports_router
|
||||
from app.config import settings
|
||||
from app.database import engine, Base
|
||||
from app.workers.scheduler import query_scheduler
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
import app.models
|
||||
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
|
||||
query_scheduler.start()
|
||||
|
||||
yield
|
||||
|
||||
await query_scheduler.shutdown()
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="GEO Platform API",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
_allow_origins = [origin.strip() for origin in settings.CORS_ORIGINS.split(",") if origin.strip()]
|
||||
if not _allow_origins:
|
||||
_allow_origins = ["http://localhost:3000"]
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=_allow_origins,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
app.include_router(auth_router, prefix="/api/v1/auth", tags=["认证"])
|
||||
app.include_router(queries_router, prefix="/api/v1/queries", tags=["查询词"])
|
||||
app.include_router(citations_router, prefix="/api/v1/citations", tags=["引用数据"])
|
||||
app.include_router(reports_router, prefix="/api/v1/reports", tags=["报告"])
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
return {"status": "ok"}
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
from app.models.user import User
|
||||
from app.models.query import Query
|
||||
from app.models.citation_record import CitationRecord
|
||||
from app.models.query_task import QueryTask
|
||||
from app.models.subscription import Subscription
|
||||
|
||||
__all__ = [
|
||||
"User",
|
||||
"Query",
|
||||
"CitationRecord",
|
||||
"QueryTask",
|
||||
"Subscription",
|
||||
]
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import String, Boolean, Integer, Float, ForeignKey, Index, func, Text
|
||||
from sqlalchemy import Uuid, JSON
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class CitationRecord(Base):
|
||||
__tablename__ = "citation_records"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
Uuid(as_uuid=True),
|
||||
primary_key=True,
|
||||
default=uuid.uuid4,
|
||||
)
|
||||
query_id: Mapped[uuid.UUID] = mapped_column(
|
||||
Uuid(as_uuid=True),
|
||||
ForeignKey("queries.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
platform: Mapped[str] = mapped_column(String(50), nullable=False)
|
||||
cited: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
|
||||
citation_position: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||
citation_text: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
competitor_brands: Mapped[list] = mapped_column(JSON, default=list)
|
||||
raw_response: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
confidence: Mapped[float | None] = mapped_column(Float, nullable=True)
|
||||
match_type: Mapped[str | None] = mapped_column(String(20), nullable=True)
|
||||
queried_at: Mapped[datetime] = mapped_column(
|
||||
server_default=func.now(),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
query: Mapped["Query"] = relationship("Query", back_populates="citation_records")
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_citation_records_query_id", "query_id"),
|
||||
Index("idx_citation_records_queried_at", "queried_at"),
|
||||
Index("idx_citation_records_platform", "platform"),
|
||||
)
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import String, ForeignKey, Index, func
|
||||
from sqlalchemy import Uuid, JSON
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class Query(Base):
|
||||
__tablename__ = "queries"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
Uuid(as_uuid=True),
|
||||
primary_key=True,
|
||||
default=uuid.uuid4,
|
||||
)
|
||||
user_id: Mapped[uuid.UUID] = mapped_column(
|
||||
Uuid(as_uuid=True),
|
||||
ForeignKey("users.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
keyword: Mapped[str] = mapped_column(String(200), nullable=False)
|
||||
target_brand: Mapped[str] = mapped_column(String(100), nullable=False)
|
||||
brand_aliases: Mapped[list] = mapped_column(JSON, default=list)
|
||||
platforms: Mapped[list] = mapped_column(JSON, nullable=False, default=lambda: ["wenxin", "kimi"])
|
||||
frequency: Mapped[str] = mapped_column(String(20), default="weekly")
|
||||
status: Mapped[str] = mapped_column(String(20), default="active")
|
||||
last_queried_at: Mapped[datetime | None] = mapped_column(nullable=True)
|
||||
next_query_at: Mapped[datetime | None] = mapped_column(nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
server_default=func.now(),
|
||||
nullable=False,
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
server_default=func.now(),
|
||||
onupdate=func.now(),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
user: Mapped["User"] = relationship("User", back_populates="queries")
|
||||
citation_records: Mapped[list["CitationRecord"]] = relationship(
|
||||
"CitationRecord", back_populates="query", cascade="all, delete-orphan"
|
||||
)
|
||||
query_tasks: Mapped[list["QueryTask"]] = relationship(
|
||||
"QueryTask", back_populates="query", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_queries_user_id", "user_id"),
|
||||
Index("idx_queries_status", "status"),
|
||||
Index("idx_queries_next_query_at", "next_query_at"),
|
||||
)
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import String, ForeignKey, Index, func, Text
|
||||
from sqlalchemy import Uuid
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class QueryTask(Base):
|
||||
__tablename__ = "query_tasks"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
Uuid(as_uuid=True),
|
||||
primary_key=True,
|
||||
default=uuid.uuid4,
|
||||
)
|
||||
query_id: Mapped[uuid.UUID] = mapped_column(
|
||||
Uuid(as_uuid=True),
|
||||
ForeignKey("queries.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
platform: Mapped[str] = mapped_column(String(50), nullable=False)
|
||||
status: Mapped[str] = mapped_column(String(20), default="pending")
|
||||
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
scheduled_at: Mapped[datetime] = mapped_column(
|
||||
server_default=func.now(),
|
||||
nullable=False,
|
||||
)
|
||||
started_at: Mapped[datetime | None] = mapped_column(nullable=True)
|
||||
completed_at: Mapped[datetime | None] = mapped_column(nullable=True)
|
||||
|
||||
query: Mapped["Query"] = relationship("Query", back_populates="query_tasks")
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_query_tasks_status", "status"),
|
||||
)
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
import uuid
|
||||
from datetime import datetime, date
|
||||
|
||||
from sqlalchemy import String, ForeignKey, Numeric, func
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class Subscription(Base):
|
||||
__tablename__ = "subscriptions"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
primary_key=True,
|
||||
default=uuid.uuid4,
|
||||
)
|
||||
user_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("users.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
plan: Mapped[str] = mapped_column(String(20), nullable=False)
|
||||
status: Mapped[str] = mapped_column(String(20), default="active")
|
||||
start_date: Mapped[date] = mapped_column(nullable=False)
|
||||
end_date: Mapped[date] = mapped_column(nullable=False)
|
||||
amount: Mapped[float | None] = mapped_column(Numeric(10, 2), nullable=True)
|
||||
payment_method: Mapped[str | None] = mapped_column(String(50), nullable=True)
|
||||
payment_id: Mapped[str | None] = mapped_column(String(255), nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
server_default=func.now(),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
user: Mapped["User"] = relationship("User", back_populates="subscriptions")
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from sqlalchemy import String, Boolean, Integer, func
|
||||
from sqlalchemy import Uuid
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class User(Base):
|
||||
__tablename__ = "users"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
Uuid(as_uuid=True),
|
||||
primary_key=True,
|
||||
default=uuid.uuid4,
|
||||
)
|
||||
email: Mapped[str] = mapped_column(String(255), unique=True, nullable=False)
|
||||
password_hash: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
name: Mapped[str | None] = mapped_column(String(100), nullable=True)
|
||||
plan: Mapped[str] = mapped_column(String(20), default="free")
|
||||
max_queries: Mapped[int] = mapped_column(Integer, default=5)
|
||||
is_active: Mapped[bool] = mapped_column(Boolean, default=True)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
server_default=func.now(),
|
||||
nullable=False,
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
server_default=func.now(),
|
||||
onupdate=func.now(),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
queries: Mapped[list["Query"]] = relationship(
|
||||
"Query", back_populates="user", cascade="all, delete-orphan"
|
||||
)
|
||||
subscriptions: Mapped[list["Subscription"]] = relationship(
|
||||
"Subscription", back_populates="user", cascade="all, delete-orphan"
|
||||
)
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from pydantic import BaseModel, EmailStr, Field
|
||||
|
||||
|
||||
class UserRegister(BaseModel):
|
||||
email: EmailStr
|
||||
password: str = Field(min_length=8)
|
||||
name: str = Field(min_length=1, max_length=100)
|
||||
|
||||
|
||||
class UserLogin(BaseModel):
|
||||
email: EmailStr
|
||||
password: str
|
||||
|
||||
|
||||
class UserResponse(BaseModel):
|
||||
id: uuid.UUID
|
||||
email: str
|
||||
name: str | None
|
||||
plan: str
|
||||
max_queries: int
|
||||
is_active: bool
|
||||
created_at: datetime
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class TokenResponse(BaseModel):
|
||||
access_token: str
|
||||
token_type: str
|
||||
user: UserResponse
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class CitationResponse(BaseModel):
|
||||
id: uuid.UUID
|
||||
query_id: uuid.UUID
|
||||
platform: str
|
||||
cited: bool
|
||||
citation_position: int | None
|
||||
citation_text: str | None
|
||||
competitor_brands: list[str]
|
||||
confidence: float | None
|
||||
match_type: str | None
|
||||
queried_at: datetime
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class CitationListResponse(BaseModel):
|
||||
items: list[CitationResponse]
|
||||
total: int
|
||||
|
||||
|
||||
class PlatformStats(BaseModel):
|
||||
queries: int
|
||||
citations: int
|
||||
rate: float
|
||||
avg_position: float | None
|
||||
|
||||
|
||||
class TrendItem(BaseModel):
|
||||
date: str
|
||||
citations: int
|
||||
|
||||
|
||||
class CitationStatsResponse(BaseModel):
|
||||
total_queries: int
|
||||
total_citations: int
|
||||
citation_rate: float
|
||||
avg_position: float | None
|
||||
by_platform: dict[str, PlatformStats]
|
||||
trend: list[TrendItem]
|
||||
|
||||
|
||||
class RunNowResponse(BaseModel):
|
||||
task_id: uuid.UUID
|
||||
status: str
|
||||
message: str
|
||||
|
|
@ -0,0 +1,93 @@
|
|||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
VALID_PLATFORMS = {"wenxin", "kimi", "tongyi", "baidu_ai", "yuanbao", "qingyan", "doubao", "tiangong", "xinghuo"}
|
||||
VALID_FREQUENCIES = {"daily", "weekly"}
|
||||
VALID_STATUSES = {"active", "paused", "disabled"}
|
||||
|
||||
|
||||
class QueryCreate(BaseModel):
|
||||
keyword: str = Field(min_length=1, max_length=200)
|
||||
target_brand: str = Field(min_length=1, max_length=100)
|
||||
brand_aliases: list[str] | None = None
|
||||
platforms: list[str]
|
||||
frequency: str = "weekly"
|
||||
|
||||
@field_validator("platforms")
|
||||
@classmethod
|
||||
def validate_platforms(cls, v: list[str]) -> list[str]:
|
||||
if not v:
|
||||
raise ValueError("platforms cannot be empty")
|
||||
invalid = set(v) - VALID_PLATFORMS
|
||||
if invalid:
|
||||
raise ValueError(f"invalid platforms: {', '.join(invalid)}")
|
||||
return v
|
||||
|
||||
@field_validator("frequency")
|
||||
@classmethod
|
||||
def validate_frequency(cls, v: str) -> str:
|
||||
if v not in VALID_FREQUENCIES:
|
||||
raise ValueError(f"frequency must be one of {VALID_FREQUENCIES}")
|
||||
return v
|
||||
|
||||
|
||||
class QueryUpdate(BaseModel):
|
||||
keyword: str | None = Field(default=None, min_length=1, max_length=200)
|
||||
target_brand: str | None = Field(default=None, min_length=1, max_length=100)
|
||||
brand_aliases: list[str] | None = None
|
||||
platforms: list[str] | None = None
|
||||
frequency: str | None = None
|
||||
status: str | None = None
|
||||
|
||||
@field_validator("platforms")
|
||||
@classmethod
|
||||
def validate_platforms(cls, v: list[str] | None) -> list[str] | None:
|
||||
if v is None:
|
||||
return v
|
||||
if not v:
|
||||
raise ValueError("platforms cannot be empty")
|
||||
invalid = set(v) - VALID_PLATFORMS
|
||||
if invalid:
|
||||
raise ValueError(f"invalid platforms: {', '.join(invalid)}")
|
||||
return v
|
||||
|
||||
@field_validator("frequency")
|
||||
@classmethod
|
||||
def validate_frequency(cls, v: str | None) -> str | None:
|
||||
if v is None:
|
||||
return v
|
||||
if v not in VALID_FREQUENCIES:
|
||||
raise ValueError(f"frequency must be one of {VALID_FREQUENCIES}")
|
||||
return v
|
||||
|
||||
@field_validator("status")
|
||||
@classmethod
|
||||
def validate_status(cls, v: str | None) -> str | None:
|
||||
if v is None:
|
||||
return v
|
||||
if v not in VALID_STATUSES:
|
||||
raise ValueError(f"status must be one of {VALID_STATUSES}")
|
||||
return v
|
||||
|
||||
|
||||
class QueryResponse(BaseModel):
|
||||
id: uuid.UUID
|
||||
keyword: str
|
||||
target_brand: str
|
||||
brand_aliases: list
|
||||
platforms: list
|
||||
frequency: str
|
||||
status: str
|
||||
last_queried_at: datetime | None
|
||||
next_query_at: datetime | None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class QueryListResponse(BaseModel):
|
||||
items: list[QueryResponse]
|
||||
total: int
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
import uuid
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from jose import jwt, JWTError
|
||||
from passlib.context import CryptContext
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.config import settings
|
||||
from app.models.user import User
|
||||
from app.schemas.auth import UserRegister
|
||||
|
||||
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
||||
|
||||
|
||||
def hash_password(password: str) -> str:
|
||||
return pwd_context.hash(password)
|
||||
|
||||
|
||||
def verify_password(plain_password: str, hashed_password: str) -> bool:
|
||||
return pwd_context.verify(plain_password, hashed_password)
|
||||
|
||||
|
||||
def create_access_token(data: dict) -> str:
|
||||
to_encode = data.copy()
|
||||
expire = datetime.now(timezone.utc) + timedelta(hours=settings.JWT_EXPIRE_HOURS)
|
||||
to_encode.update({"exp": expire})
|
||||
encoded_jwt = jwt.encode(to_encode, settings.JWT_SECRET, algorithm="HS256")
|
||||
return encoded_jwt
|
||||
|
||||
|
||||
def verify_token(token: str) -> dict:
|
||||
payload = jwt.decode(token, settings.JWT_SECRET, algorithms=["HS256"])
|
||||
return payload
|
||||
|
||||
|
||||
async def register_user(db: AsyncSession, user_data: UserRegister) -> User:
|
||||
stmt = select(User).where(User.email == user_data.email)
|
||||
result = await db.execute(stmt)
|
||||
existing_user = result.scalar_one_or_none()
|
||||
if existing_user:
|
||||
raise ValueError("邮箱已被注册")
|
||||
|
||||
user = User(
|
||||
email=user_data.email,
|
||||
password_hash=hash_password(user_data.password),
|
||||
name=user_data.name,
|
||||
)
|
||||
db.add(user)
|
||||
await db.commit()
|
||||
await db.refresh(user)
|
||||
return user
|
||||
|
||||
|
||||
async def authenticate_user(
|
||||
db: AsyncSession, email: str, password: str
|
||||
) -> User | None:
|
||||
stmt = select(User).where(User.email == email)
|
||||
result = await db.execute(stmt)
|
||||
user = result.scalar_one_or_none()
|
||||
|
||||
if not user:
|
||||
return None
|
||||
|
||||
if not verify_password(password, user.password_hash):
|
||||
return None
|
||||
|
||||
return user
|
||||
|
|
@ -0,0 +1,428 @@
|
|||
import asyncio
|
||||
import csv
|
||||
import io
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from sqlalchemy import func, select, and_, cast, Integer
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.citation_record import CitationRecord
|
||||
from app.models.query import Query
|
||||
from app.models.query_task import QueryTask
|
||||
from app.workers.citation_engine import CitationEngine
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def _verify_query_ownership(
|
||||
db: AsyncSession,
|
||||
query_id: uuid.UUID,
|
||||
user_id: uuid.UUID,
|
||||
) -> Query | None:
|
||||
stmt = select(Query).where(Query.id == query_id, Query.user_id == user_id)
|
||||
result = await db.execute(stmt)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
|
||||
async def get_citations(
|
||||
db: AsyncSession,
|
||||
user_id: uuid.UUID,
|
||||
query_id: uuid.UUID | None = None,
|
||||
platform: str | None = None,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
skip: int = 0,
|
||||
limit: int = 20,
|
||||
) -> tuple[list[CitationRecord], int]:
|
||||
# Build base filter: citations belonging to the user's queries
|
||||
conditions = [Query.user_id == user_id]
|
||||
|
||||
if query_id is not None:
|
||||
conditions.append(CitationRecord.query_id == query_id)
|
||||
# Also verify query ownership explicitly when query_id is provided
|
||||
query = await _verify_query_ownership(db, query_id, user_id)
|
||||
if query is None:
|
||||
return [], 0
|
||||
|
||||
if platform is not None:
|
||||
conditions.append(CitationRecord.platform == platform)
|
||||
|
||||
if start_date is not None:
|
||||
conditions.append(CitationRecord.queried_at >= start_date)
|
||||
|
||||
if end_date is not None:
|
||||
conditions.append(CitationRecord.queried_at <= end_date)
|
||||
|
||||
stmt = (
|
||||
select(CitationRecord)
|
||||
.join(Query, CitationRecord.query_id == Query.id)
|
||||
.where(and_(*conditions))
|
||||
.order_by(CitationRecord.queried_at.desc())
|
||||
.offset(skip)
|
||||
.limit(limit)
|
||||
)
|
||||
result = await db.execute(stmt)
|
||||
items = result.scalars().all()
|
||||
|
||||
count_stmt = (
|
||||
select(func.count())
|
||||
.select_from(CitationRecord)
|
||||
.join(Query, CitationRecord.query_id == Query.id)
|
||||
.where(and_(*conditions))
|
||||
)
|
||||
count_result = await db.execute(count_stmt)
|
||||
total = count_result.scalar_one()
|
||||
|
||||
return list(items), total
|
||||
|
||||
|
||||
async def get_citation_stats(
|
||||
db: AsyncSession,
|
||||
user_id: uuid.UUID,
|
||||
query_id: uuid.UUID | None = None,
|
||||
) -> dict:
|
||||
# Verify ownership if query_id provided
|
||||
if query_id is not None:
|
||||
query = await _verify_query_ownership(db, query_id, user_id)
|
||||
if query is None:
|
||||
return {
|
||||
"total_queries": 0,
|
||||
"total_citations": 0,
|
||||
"citation_rate": 0.0,
|
||||
"avg_position": None,
|
||||
"by_platform": {},
|
||||
"trend": [],
|
||||
}
|
||||
|
||||
# Base filter
|
||||
base_conditions = [Query.user_id == user_id]
|
||||
if query_id is not None:
|
||||
base_conditions.append(CitationRecord.query_id == query_id)
|
||||
|
||||
base_where = and_(*base_conditions)
|
||||
|
||||
# Total queries and citations
|
||||
total_queries_stmt = (
|
||||
select(func.count())
|
||||
.select_from(CitationRecord)
|
||||
.join(Query, CitationRecord.query_id == Query.id)
|
||||
.where(base_where)
|
||||
)
|
||||
total_queries_result = await db.execute(total_queries_stmt)
|
||||
total_queries = total_queries_result.scalar_one()
|
||||
|
||||
total_citations_stmt = (
|
||||
select(func.count())
|
||||
.select_from(CitationRecord)
|
||||
.join(Query, CitationRecord.query_id == Query.id)
|
||||
.where(base_where, CitationRecord.cited.is_(True))
|
||||
)
|
||||
total_citations_result = await db.execute(total_citations_stmt)
|
||||
total_citations = total_citations_result.scalar_one()
|
||||
|
||||
citation_rate = total_citations / total_queries if total_queries > 0 else 0.0
|
||||
|
||||
# Average position (only for cited records with a position)
|
||||
avg_pos_stmt = (
|
||||
select(func.avg(CitationRecord.citation_position))
|
||||
.join(Query, CitationRecord.query_id == Query.id)
|
||||
.where(
|
||||
base_where,
|
||||
CitationRecord.cited.is_(True),
|
||||
CitationRecord.citation_position.isnot(None),
|
||||
)
|
||||
)
|
||||
avg_pos_result = await db.execute(avg_pos_stmt)
|
||||
avg_position = avg_pos_result.scalar_one()
|
||||
avg_position = round(avg_position, 1) if avg_position is not None else None
|
||||
|
||||
# By platform stats
|
||||
platform_stmt = (
|
||||
select(
|
||||
CitationRecord.platform,
|
||||
func.count().label("queries"),
|
||||
func.sum(cast(CitationRecord.cited, Integer)).label("citations"),
|
||||
func.avg(CitationRecord.citation_position).label("avg_position"),
|
||||
)
|
||||
.join(Query, CitationRecord.query_id == Query.id)
|
||||
.where(base_where)
|
||||
.group_by(CitationRecord.platform)
|
||||
)
|
||||
platform_result = await db.execute(platform_stmt)
|
||||
by_platform = {}
|
||||
for row in platform_result.all():
|
||||
platform_name = row.platform
|
||||
queries = row.queries
|
||||
citations = row.citations or 0
|
||||
rate = citations / queries if queries > 0 else 0.0
|
||||
plat_avg_pos = row.avg_position
|
||||
plat_avg_pos = round(plat_avg_pos, 1) if plat_avg_pos is not None else None
|
||||
by_platform[platform_name] = {
|
||||
"queries": queries,
|
||||
"citations": citations,
|
||||
"rate": round(rate, 2),
|
||||
"avg_position": plat_avg_pos,
|
||||
}
|
||||
|
||||
# Trend: past 30 days grouped by week
|
||||
# Use naive datetime to avoid mixing with naive datetimes from database
|
||||
now = datetime.utcnow()
|
||||
thirty_days_ago = now - timedelta(days=30)
|
||||
|
||||
# Cross-database week grouping expression
|
||||
dialect = db.bind.dialect.name if db.bind else "postgresql"
|
||||
if dialect == "postgresql":
|
||||
week_expr = func.date_trunc("week", CitationRecord.queried_at)
|
||||
else:
|
||||
# SQLite compatible week grouping (YYYY-WW format)
|
||||
week_expr = func.strftime("%Y-%W", CitationRecord.queried_at)
|
||||
|
||||
trend_stmt = (
|
||||
select(
|
||||
week_expr.label("week_start"),
|
||||
func.sum(cast(CitationRecord.cited, Integer)).label("citations"),
|
||||
)
|
||||
.join(Query, CitationRecord.query_id == Query.id)
|
||||
.where(
|
||||
base_where,
|
||||
CitationRecord.queried_at >= thirty_days_ago,
|
||||
)
|
||||
.group_by(week_expr)
|
||||
.order_by(week_expr)
|
||||
)
|
||||
trend_result = await db.execute(trend_stmt)
|
||||
trend = []
|
||||
for row in trend_result.all():
|
||||
week_start = row.week_start
|
||||
if isinstance(week_start, datetime):
|
||||
date_str = week_start.date().isoformat()
|
||||
else:
|
||||
date_str = str(week_start)
|
||||
trend.append({
|
||||
"date": date_str,
|
||||
"citations": int(row.citations or 0),
|
||||
})
|
||||
|
||||
return {
|
||||
"total_queries": total_queries,
|
||||
"total_citations": total_citations,
|
||||
"citation_rate": round(citation_rate, 2),
|
||||
"avg_position": avg_position,
|
||||
"by_platform": by_platform,
|
||||
"trend": trend,
|
||||
}
|
||||
|
||||
|
||||
async def trigger_query_now(
|
||||
db: AsyncSession,
|
||||
user_id: uuid.UUID,
|
||||
query_id: uuid.UUID,
|
||||
) -> QueryTask:
|
||||
query = await _verify_query_ownership(db, query_id, user_id)
|
||||
if query is None:
|
||||
raise ValueError("Query not found")
|
||||
|
||||
if query.status != "active":
|
||||
raise ValueError("Query is not active")
|
||||
|
||||
platforms = query.platforms or []
|
||||
if not platforms:
|
||||
raise ValueError("No platforms configured for this query")
|
||||
|
||||
first_task = None
|
||||
for platform in platforms:
|
||||
task = QueryTask(
|
||||
query_id=query_id,
|
||||
platform=platform,
|
||||
status="pending",
|
||||
)
|
||||
db.add(task)
|
||||
if first_task is None:
|
||||
first_task = task
|
||||
|
||||
await db.commit()
|
||||
if first_task is not None:
|
||||
await db.refresh(first_task)
|
||||
|
||||
# 新增:立即在后台执行查询任务
|
||||
asyncio.create_task(
|
||||
_execute_query_tasks(
|
||||
query_id=query_id,
|
||||
platforms=platforms,
|
||||
keyword=query.keyword,
|
||||
target_brand=query.target_brand,
|
||||
brand_aliases=query.brand_aliases or [],
|
||||
)
|
||||
)
|
||||
|
||||
return first_task
|
||||
|
||||
|
||||
async def _execute_query_tasks(
|
||||
query_id: uuid.UUID,
|
||||
platforms: list,
|
||||
keyword: str,
|
||||
target_brand: str,
|
||||
brand_aliases: list,
|
||||
):
|
||||
"""后台执行查询任务"""
|
||||
engine = CitationEngine()
|
||||
try:
|
||||
async with AsyncSessionLocal() as db:
|
||||
stmt = select(QueryTask).where(
|
||||
QueryTask.query_id == query_id,
|
||||
QueryTask.status == "pending",
|
||||
QueryTask.platform.in_(platforms),
|
||||
)
|
||||
result = await db.execute(stmt)
|
||||
tasks = result.scalars().all()
|
||||
|
||||
for task in tasks:
|
||||
try:
|
||||
task.status = "running"
|
||||
task.started_at = datetime.utcnow()
|
||||
task.error_message = None
|
||||
await db.commit()
|
||||
|
||||
citation_result = await engine.execute_single_platform(
|
||||
keyword=keyword,
|
||||
platform=task.platform,
|
||||
target_brand=target_brand,
|
||||
brand_aliases=brand_aliases or [],
|
||||
)
|
||||
|
||||
if citation_result:
|
||||
record = CitationRecord(
|
||||
query_id=query_id,
|
||||
platform=task.platform,
|
||||
cited=citation_result.get("cited", False),
|
||||
citation_position=citation_result.get("position"),
|
||||
citation_text=citation_result.get("citation_text"),
|
||||
competitor_brands=citation_result.get("competitor_brands", []),
|
||||
raw_response=citation_result.get("raw_response", ""),
|
||||
confidence=citation_result.get("confidence"),
|
||||
match_type=citation_result.get("match_type"),
|
||||
)
|
||||
db.add(record)
|
||||
|
||||
task.status = "success"
|
||||
task.completed_at = datetime.utcnow()
|
||||
await db.commit()
|
||||
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
task.status = "failed"
|
||||
task.error_message = str(e)
|
||||
task.completed_at = datetime.utcnow()
|
||||
await db.commit()
|
||||
logger.error(f"查询任务执行失败: {task.id}, 错误: {e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"查询引擎执行失败: {e}")
|
||||
finally:
|
||||
await engine.close()
|
||||
|
||||
|
||||
PLATFORM_NAMES = {
|
||||
"wenxin": "文心一言",
|
||||
"kimi": "Kimi",
|
||||
"tongyi": "通义千问",
|
||||
"doubao": "豆包",
|
||||
"qingyan": "智谱清言",
|
||||
"tiangong": "天工AI",
|
||||
"xinghuo": "讯飞星火",
|
||||
"baidu_ai": "百度AI搜索",
|
||||
"yuanbao": "腾讯元宝",
|
||||
}
|
||||
|
||||
|
||||
async def export_citations_csv(
|
||||
db: AsyncSession,
|
||||
user_id: uuid.UUID,
|
||||
query_id: uuid.UUID,
|
||||
) -> str:
|
||||
query = await _verify_query_ownership(db, query_id, user_id)
|
||||
if query is None:
|
||||
raise ValueError("Query not found")
|
||||
|
||||
stmt = (
|
||||
select(CitationRecord)
|
||||
.where(CitationRecord.query_id == query_id)
|
||||
.order_by(CitationRecord.queried_at.desc())
|
||||
)
|
||||
result = await db.execute(stmt)
|
||||
records = result.scalars().all()
|
||||
|
||||
output = io.StringIO()
|
||||
writer = csv.writer(output)
|
||||
writer.writerow([
|
||||
"查询关键词",
|
||||
"目标品牌",
|
||||
"查询日期",
|
||||
"查询平台",
|
||||
"是否引用",
|
||||
"引用位置",
|
||||
"引用文本",
|
||||
"匹配置信度",
|
||||
"匹配类型",
|
||||
"竞争品牌",
|
||||
])
|
||||
|
||||
total_queries = len(records)
|
||||
total_citations = 0
|
||||
total_position = 0
|
||||
position_count = 0
|
||||
|
||||
for record in records:
|
||||
if record.cited:
|
||||
total_citations += 1
|
||||
if record.citation_position is not None:
|
||||
total_position += record.citation_position
|
||||
position_count += 1
|
||||
|
||||
date_str = ""
|
||||
if record.queried_at:
|
||||
date_str = record.queried_at.strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
platform_name = PLATFORM_NAMES.get(record.platform, record.platform)
|
||||
|
||||
match_type_display = ""
|
||||
if record.match_type == "exact":
|
||||
match_type_display = "精确匹配"
|
||||
elif record.match_type == "alias":
|
||||
match_type_display = "别名匹配"
|
||||
elif record.match_type == "fuzzy":
|
||||
match_type_display = "模糊匹配"
|
||||
|
||||
confidence_str = ""
|
||||
if record.confidence is not None:
|
||||
confidence_str = f"{record.confidence:.2f}"
|
||||
|
||||
writer.writerow([
|
||||
query.keyword,
|
||||
query.target_brand,
|
||||
date_str,
|
||||
platform_name,
|
||||
"是" if record.cited else "否",
|
||||
record.citation_position if record.citation_position is not None else "",
|
||||
record.citation_text or "",
|
||||
confidence_str,
|
||||
match_type_display,
|
||||
", ".join(record.competitor_brands) if record.competitor_brands else "",
|
||||
])
|
||||
|
||||
# 汇总统计
|
||||
writer.writerow([])
|
||||
writer.writerow(["汇总统计"])
|
||||
writer.writerow(["总查询次数", total_queries])
|
||||
writer.writerow(["引用次数", total_citations])
|
||||
citation_rate = (total_citations / total_queries * 100) if total_queries > 0 else 0.0
|
||||
writer.writerow(["引用率", f"{citation_rate:.1f}%"])
|
||||
avg_position = (total_position / position_count) if position_count > 0 else 0.0
|
||||
writer.writerow(["平均引用位置", f"{avg_position:.1f}"])
|
||||
writer.writerow(["报告生成时间", datetime.now().strftime("%Y-%m-%d %H:%M:%S")])
|
||||
|
||||
return output.getvalue()
|
||||
|
|
@ -0,0 +1,122 @@
|
|||
import uuid
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from sqlalchemy import delete, func, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.models.query import Query
|
||||
from app.models.user import User
|
||||
from app.schemas.query import QueryCreate, QueryUpdate
|
||||
|
||||
|
||||
async def get_queries(
|
||||
db: AsyncSession,
|
||||
user_id: uuid.UUID,
|
||||
skip: int = 0,
|
||||
limit: int = 20,
|
||||
) -> tuple[list[Query], int]:
|
||||
stmt = (
|
||||
select(Query)
|
||||
.where(Query.user_id == user_id)
|
||||
.order_by(Query.created_at.desc())
|
||||
.offset(skip)
|
||||
.limit(limit)
|
||||
)
|
||||
result = await db.execute(stmt)
|
||||
items = result.scalars().all()
|
||||
|
||||
count_stmt = select(func.count()).select_from(Query).where(Query.user_id == user_id)
|
||||
count_result = await db.execute(count_stmt)
|
||||
total = count_result.scalar_one()
|
||||
|
||||
return list(items), total
|
||||
|
||||
|
||||
async def get_query(
|
||||
db: AsyncSession,
|
||||
query_id: uuid.UUID,
|
||||
user_id: uuid.UUID,
|
||||
) -> Query | None:
|
||||
stmt = select(Query).where(Query.id == query_id, Query.user_id == user_id)
|
||||
result = await db.execute(stmt)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
|
||||
async def create_query(
|
||||
db: AsyncSession,
|
||||
user_id: uuid.UUID,
|
||||
query_data: QueryCreate,
|
||||
) -> Query:
|
||||
# Check user's current query count against max_queries limit
|
||||
count_stmt = select(func.count()).select_from(Query).where(Query.user_id == user_id)
|
||||
count_result = await db.execute(count_stmt)
|
||||
current_count = count_result.scalar_one()
|
||||
|
||||
user_stmt = select(User).where(User.id == user_id)
|
||||
user_result = await db.execute(user_stmt)
|
||||
user = user_result.scalar_one()
|
||||
|
||||
if current_count >= user.max_queries:
|
||||
raise PermissionError("Query limit exceeded")
|
||||
|
||||
# Calculate next_query_at based on frequency (use naive datetime for DB compatibility)
|
||||
now = datetime.utcnow()
|
||||
if query_data.frequency == "daily":
|
||||
next_query_at = now + timedelta(days=1)
|
||||
else: # weekly
|
||||
next_query_at = now + timedelta(days=7)
|
||||
|
||||
query = Query(
|
||||
user_id=user_id,
|
||||
keyword=query_data.keyword,
|
||||
target_brand=query_data.target_brand,
|
||||
brand_aliases=query_data.brand_aliases or [],
|
||||
platforms=query_data.platforms,
|
||||
frequency=query_data.frequency,
|
||||
next_query_at=next_query_at,
|
||||
)
|
||||
db.add(query)
|
||||
await db.commit()
|
||||
await db.refresh(query)
|
||||
return query
|
||||
|
||||
|
||||
async def update_query(
|
||||
db: AsyncSession,
|
||||
query_id: uuid.UUID,
|
||||
user_id: uuid.UUID,
|
||||
update_data: QueryUpdate,
|
||||
) -> Query | None:
|
||||
stmt = select(Query).where(Query.id == query_id, Query.user_id == user_id)
|
||||
result = await db.execute(stmt)
|
||||
query = result.scalar_one_or_none()
|
||||
if query is None:
|
||||
return None
|
||||
|
||||
update_dict = update_data.model_dump(exclude_unset=True)
|
||||
|
||||
# Recalculate next_query_at if frequency is updated
|
||||
if "frequency" in update_dict:
|
||||
now = datetime.utcnow()
|
||||
if update_dict["frequency"] == "daily":
|
||||
query.next_query_at = now + timedelta(days=1)
|
||||
else: # weekly
|
||||
query.next_query_at = now + timedelta(days=7)
|
||||
|
||||
for field, value in update_dict.items():
|
||||
setattr(query, field, value)
|
||||
|
||||
await db.commit()
|
||||
await db.refresh(query)
|
||||
return query
|
||||
|
||||
|
||||
async def delete_query(
|
||||
db: AsyncSession,
|
||||
query_id: uuid.UUID,
|
||||
user_id: uuid.UUID,
|
||||
) -> bool:
|
||||
stmt = delete(Query).where(Query.id == query_id, Query.user_id == user_id)
|
||||
result = await db.execute(stmt)
|
||||
await db.commit()
|
||||
return result.rowcount > 0
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
from app.workers.citation_engine import CitationEngine
|
||||
from app.workers.platforms.kimi import KimiAdapter
|
||||
from app.workers.platforms.wenxin import WenxinAdapter
|
||||
from app.workers.scheduler import QueryScheduler, query_scheduler
|
||||
|
||||
__all__ = [
|
||||
"CitationEngine",
|
||||
"KimiAdapter",
|
||||
"WenxinAdapter",
|
||||
"QueryScheduler",
|
||||
"query_scheduler",
|
||||
]
|
||||
|
|
@ -0,0 +1,329 @@
|
|||
import difflib
|
||||
import logging
|
||||
import re
|
||||
import uuid
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
|
||||
|
||||
def _sanitize_raw_response(text: str | None) -> str:
|
||||
"""清理原始响应中的无效控制字符,避免 PostgreSQL UTF-8 插入失败"""
|
||||
if not text:
|
||||
return ""
|
||||
# 移除 NULL 字节及其他非法控制字符,保留 \n \t \r
|
||||
return re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f]", "", text)
|
||||
|
||||
from app.models.citation_record import CitationRecord
|
||||
from app.models.query import Query
|
||||
from app.models.query_task import QueryTask
|
||||
from app.workers.platforms.kimi import KimiAdapter
|
||||
from app.workers.platforms.wenxin import WenxinAdapter
|
||||
from app.workers.platforms.tongyi import TongyiAdapter
|
||||
from app.workers.platforms.doubao import DoubaoAdapter
|
||||
from app.workers.platforms.qingyan import QingyanAdapter
|
||||
from app.workers.platforms.tiangong import TiangongAdapter
|
||||
from app.workers.platforms.xinghuo import XinghuoAdapter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BrandMatcher:
|
||||
"""品牌匹配器:检测文本中是否引用了目标品牌"""
|
||||
|
||||
def __init__(self, target_brand: str, brand_aliases: list[str] | None = None):
|
||||
self.target_brand = target_brand
|
||||
self.brand_aliases = brand_aliases or []
|
||||
|
||||
def match(self, text: str) -> dict:
|
||||
"""
|
||||
返回: {
|
||||
"cited": bool,
|
||||
"confidence": float, # 0.0-1.0
|
||||
"match_type": str, # "exact"/"alias"/"fuzzy"/None
|
||||
"position": int|None, # 在文本段落中的位置(第几段提到,1-based)
|
||||
"citation_text": str|None, # 被引用的上下文片段
|
||||
}
|
||||
"""
|
||||
if not text:
|
||||
return {
|
||||
"cited": False,
|
||||
"confidence": 0.0,
|
||||
"match_type": None,
|
||||
"position": None,
|
||||
"citation_text": None,
|
||||
}
|
||||
|
||||
# 1. 精确匹配
|
||||
if self.target_brand in text:
|
||||
position, citation_text = self._extract_position_and_context(text, self.target_brand)
|
||||
return {
|
||||
"cited": True,
|
||||
"confidence": 1.0,
|
||||
"match_type": "exact",
|
||||
"position": position,
|
||||
"citation_text": citation_text,
|
||||
}
|
||||
|
||||
# 2. 别名匹配
|
||||
for alias in self.brand_aliases:
|
||||
if alias in text:
|
||||
position, citation_text = self._extract_position_and_context(text, alias)
|
||||
return {
|
||||
"cited": True,
|
||||
"confidence": 0.9,
|
||||
"match_type": "alias",
|
||||
"position": position,
|
||||
"citation_text": citation_text,
|
||||
}
|
||||
|
||||
# 3. 模糊匹配
|
||||
best_ratio = 0.0
|
||||
best_match = None
|
||||
for word in self._extract_candidates(text):
|
||||
ratio = difflib.SequenceMatcher(None, self.target_brand, word).ratio()
|
||||
if ratio > best_ratio:
|
||||
best_ratio = ratio
|
||||
best_match = word
|
||||
|
||||
for alias in self.brand_aliases:
|
||||
for word in self._extract_candidates(text):
|
||||
ratio = difflib.SequenceMatcher(None, alias, word).ratio()
|
||||
if ratio > best_ratio:
|
||||
best_ratio = ratio
|
||||
best_match = word
|
||||
|
||||
if best_ratio > 0.4 and best_match:
|
||||
position, citation_text = self._extract_position_and_context(text, best_match)
|
||||
return {
|
||||
"cited": True,
|
||||
"confidence": round(best_ratio, 2),
|
||||
"match_type": "fuzzy",
|
||||
"position": position,
|
||||
"citation_text": citation_text,
|
||||
}
|
||||
|
||||
return {
|
||||
"cited": False,
|
||||
"confidence": 0.0,
|
||||
"match_type": None,
|
||||
"position": None,
|
||||
"citation_text": None,
|
||||
}
|
||||
|
||||
def _extract_candidates(self, text: str) -> list[str]:
|
||||
"""从文本中提取候选词(按非文字字符分割)"""
|
||||
# 匹配中文词组、英文单词等
|
||||
return [w for w in re.split(r'[^\w\u4e00-\u9fff]+', text) if len(w) >= 2]
|
||||
|
||||
def _extract_position_and_context(self, text: str, keyword: str) -> tuple[int | None, str | None]:
|
||||
"""提取品牌首次出现的段落位置(1-based)和上下文片段"""
|
||||
paragraphs = [p.strip() for p in text.split('\n') if p.strip()]
|
||||
if not paragraphs:
|
||||
paragraphs = [text]
|
||||
|
||||
for idx, paragraph in enumerate(paragraphs, start=1):
|
||||
if keyword in paragraph:
|
||||
# 截取前200字符
|
||||
snippet = paragraph[:200]
|
||||
return idx, snippet
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
class CompetitorDetector:
|
||||
"""竞争品牌检测器"""
|
||||
|
||||
# 预定义一些常见行业品牌列表
|
||||
KNOWN_BRANDS = {
|
||||
"保险": ["中国平安", "中国人寿", "太平洋保险", "新华保险", "泰康保险", "中国人保", "友邦保险"],
|
||||
"金融": ["工商银行", "建设银行", "农业银行", "中国银行", "招商银行", "交通银行"],
|
||||
"科技": ["华为", "腾讯", "阿里巴巴", "百度", "字节跳动", "小米", "京东"],
|
||||
}
|
||||
|
||||
def detect(self, text: str, target_brand: str) -> list[str]:
|
||||
"""检测文本中出现的其他品牌(排除 target_brand)"""
|
||||
if not text:
|
||||
return []
|
||||
|
||||
competitors = set()
|
||||
for category, brands in self.KNOWN_BRANDS.items():
|
||||
for brand in brands:
|
||||
if brand == target_brand:
|
||||
continue
|
||||
if brand in text:
|
||||
competitors.add(brand)
|
||||
|
||||
return sorted(list(competitors))
|
||||
|
||||
|
||||
class CitationEngine:
|
||||
"""引用检测引擎核心"""
|
||||
|
||||
def __init__(self):
|
||||
self.platforms = {
|
||||
"wenxin": WenxinAdapter(),
|
||||
"kimi": KimiAdapter(),
|
||||
"tongyi": TongyiAdapter(),
|
||||
"doubao": DoubaoAdapter(),
|
||||
"qingyan": QingyanAdapter(),
|
||||
"tiangong": TiangongAdapter(),
|
||||
"xinghuo": XinghuoAdapter(),
|
||||
}
|
||||
self.matcher = None
|
||||
self.competitor_detector = CompetitorDetector()
|
||||
|
||||
async def execute_query(self, query: Query, db: AsyncSession) -> list[CitationRecord]:
|
||||
"""
|
||||
执行一个查询任务:
|
||||
1. 创建 BrandMatcher
|
||||
2. 遍历 query.platforms
|
||||
3. 对每个 platform 执行查询和检测
|
||||
4. 更新 query.last_queried_at 和 query.next_query_at
|
||||
"""
|
||||
self.matcher = BrandMatcher(
|
||||
target_brand=query.target_brand,
|
||||
brand_aliases=query.brand_aliases or [],
|
||||
)
|
||||
|
||||
records: list[CitationRecord] = []
|
||||
platforms = query.platforms or ["wenxin", "kimi"]
|
||||
|
||||
for platform_name in platforms:
|
||||
# 查找或创建 QueryTask
|
||||
task = await self._get_or_create_task(db, query.id, platform_name)
|
||||
|
||||
# 更新状态为 running
|
||||
task.status = "running"
|
||||
task.started_at = datetime.utcnow()
|
||||
task.error_message = None
|
||||
await db.commit()
|
||||
|
||||
try:
|
||||
result = await self.execute_single_platform(
|
||||
keyword=query.keyword,
|
||||
platform=platform_name,
|
||||
target_brand=query.target_brand,
|
||||
brand_aliases=query.brand_aliases or [],
|
||||
)
|
||||
|
||||
# 创建 CitationRecord
|
||||
record = CitationRecord(
|
||||
query_id=query.id,
|
||||
platform=platform_name,
|
||||
cited=result["cited"],
|
||||
citation_position=result.get("position"),
|
||||
citation_text=result.get("citation_text"),
|
||||
competitor_brands=result.get("competitor_brands", []),
|
||||
raw_response=_sanitize_raw_response(result.get("raw_response", "")),
|
||||
confidence=result.get("confidence"),
|
||||
match_type=result.get("match_type"),
|
||||
)
|
||||
db.add(record)
|
||||
records.append(record)
|
||||
|
||||
# 更新 QueryTask 状态为 success
|
||||
task.status = "success"
|
||||
task.completed_at = datetime.utcnow()
|
||||
await db.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"平台 {platform_name} 查询失败: {e}")
|
||||
error_msg = str(e)
|
||||
task.status = "failed"
|
||||
task.error_message = error_msg
|
||||
task.completed_at = datetime.utcnow()
|
||||
|
||||
# 创建一条 cited=False 的记录作为占位
|
||||
record = CitationRecord(
|
||||
query_id=query.id,
|
||||
platform=platform_name,
|
||||
cited=False,
|
||||
raw_response=_sanitize_raw_response(error_msg),
|
||||
)
|
||||
db.add(record)
|
||||
records.append(record)
|
||||
await db.commit()
|
||||
|
||||
# 更新 Query 时间字段
|
||||
query.last_queried_at = datetime.utcnow()
|
||||
query.next_query_at = self._calculate_next_query_at(query.frequency)
|
||||
await db.commit()
|
||||
|
||||
return records
|
||||
|
||||
async def execute_single_platform(
|
||||
self,
|
||||
keyword: str,
|
||||
platform: str,
|
||||
target_brand: str,
|
||||
brand_aliases: list,
|
||||
) -> dict:
|
||||
"""执行单个平台的查询和检测"""
|
||||
adapter = self.platforms.get(platform)
|
||||
if not adapter:
|
||||
raise ValueError(f"不支持的平台: {platform}")
|
||||
|
||||
# 获取平台内容(搜索引擎模式:将关键词与目标品牌组合,确保结果包含品牌信息)
|
||||
search_keyword = f"{keyword} {target_brand}"
|
||||
raw_response = await adapter.query(search_keyword)
|
||||
|
||||
# 品牌匹配
|
||||
matcher = BrandMatcher(target_brand=target_brand, brand_aliases=brand_aliases)
|
||||
match_result = matcher.match(raw_response)
|
||||
|
||||
# 竞争品牌检测
|
||||
competitor_brands = self.competitor_detector.detect(raw_response, target_brand)
|
||||
|
||||
return {
|
||||
"cited": match_result["cited"],
|
||||
"confidence": match_result["confidence"],
|
||||
"match_type": match_result["match_type"],
|
||||
"position": match_result["position"],
|
||||
"citation_text": match_result["citation_text"],
|
||||
"competitor_brands": competitor_brands,
|
||||
"raw_response": raw_response,
|
||||
}
|
||||
|
||||
async def _get_or_create_task(
|
||||
self, db: AsyncSession, query_id: uuid.UUID, platform: str
|
||||
) -> QueryTask:
|
||||
"""获取或创建 QueryTask"""
|
||||
stmt = select(QueryTask).where(
|
||||
QueryTask.query_id == query_id,
|
||||
QueryTask.platform == platform,
|
||||
)
|
||||
result = await db.execute(stmt)
|
||||
task = result.scalar_one_or_none()
|
||||
|
||||
if not task:
|
||||
task = QueryTask(
|
||||
query_id=query_id,
|
||||
platform=platform,
|
||||
status="pending",
|
||||
)
|
||||
db.add(task)
|
||||
await db.commit()
|
||||
await db.refresh(task)
|
||||
|
||||
return task
|
||||
|
||||
def _calculate_next_query_at(self, frequency: str | None) -> datetime:
|
||||
"""根据频率计算下次查询时间"""
|
||||
now = datetime.utcnow()
|
||||
freq_map = {
|
||||
"daily": timedelta(days=1),
|
||||
"weekly": timedelta(days=7),
|
||||
"monthly": timedelta(days=30),
|
||||
}
|
||||
delta = freq_map.get(frequency or "weekly", timedelta(days=7))
|
||||
return now + delta
|
||||
|
||||
async def close(self):
|
||||
"""关闭所有平台适配器"""
|
||||
for adapter in self.platforms.values():
|
||||
try:
|
||||
await adapter.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"关闭适配器 {adapter.platform_name} 时出错: {e}")
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
from app.workers.platforms.base import BasePlatformAdapter
|
||||
from app.workers.platforms.wenxin import WenxinAdapter
|
||||
from app.workers.platforms.kimi import KimiAdapter
|
||||
from app.workers.platforms.tongyi import TongyiAdapter
|
||||
from app.workers.platforms.doubao import DoubaoAdapter
|
||||
from app.workers.platforms.qingyan import QingyanAdapter
|
||||
from app.workers.platforms.tiangong import TiangongAdapter
|
||||
from app.workers.platforms.xinghuo import XinghuoAdapter
|
||||
|
||||
__all__ = [
|
||||
"BasePlatformAdapter",
|
||||
"WenxinAdapter",
|
||||
"KimiAdapter",
|
||||
"TongyiAdapter",
|
||||
"DoubaoAdapter",
|
||||
"QingyanAdapter",
|
||||
"TiangongAdapter",
|
||||
"XinghuoAdapter",
|
||||
]
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class BasePlatformAdapter(ABC):
|
||||
"""AI平台查询适配器基类"""
|
||||
|
||||
platform_name: str # 平台枚举值
|
||||
platform_url: str # 平台URL
|
||||
|
||||
@abstractmethod
|
||||
async def query(self, keyword: str) -> str:
|
||||
"""在AI平台查询关键词,返回原始响应文本"""
|
||||
pass
|
||||
|
||||
async def close(self):
|
||||
"""清理资源"""
|
||||
pass
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
import asyncio
|
||||
import logging
|
||||
|
||||
from app.workers.platforms.base import BasePlatformAdapter
|
||||
from app.workers.platforms.search_engine import fetch_search_content
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DoubaoAdapter(BasePlatformAdapter):
|
||||
"""豆包平台适配器(搜索引擎模式)"""
|
||||
|
||||
platform_name = "doubao"
|
||||
platform_url = "https://www.doubao.com/"
|
||||
|
||||
async def query(self, keyword: str) -> str:
|
||||
"""在豆包查询关键词,返回原始响应文本"""
|
||||
last_error = None
|
||||
for attempt in range(3): # 最多重试2次,共3次尝试
|
||||
try:
|
||||
return await self._do_query(keyword)
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
logger.warning(f"豆包查询第 {attempt + 1} 次尝试失败: {e}")
|
||||
if attempt < 2:
|
||||
await asyncio.sleep(2 ** attempt) # 指数退避
|
||||
|
||||
logger.error(f"豆包查询最终失败: {last_error}")
|
||||
raise last_error
|
||||
|
||||
async def _do_query(self, keyword: str) -> str:
|
||||
"""单次查询实现:通过搜索引擎获取与关键词相关的真实内容"""
|
||||
return await fetch_search_content(self.platform_name, keyword)
|
||||
|
||||
async def close(self):
|
||||
"""清理资源(搜索引擎模式无额外资源需要释放)"""
|
||||
pass
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue