From bbbf9cd40a6ddad73ef5e4db6d99d871cce13a5f Mon Sep 17 00:00:00 2001 From: chiguyong Date: Thu, 25 Jun 2026 01:09:59 +0800 Subject: [PATCH] feat(bitable): add bitable companion service with full P0-P2 fixes Bitable is a multi-dimensional table companion service that runs alongside the main AgentKit server. It provides structured data storage with formula fields, views, and ingestion pipelines. Major components: - Domain models (Pydantic v2): Table, Field, Record, View, RecalcTask - SQLAlchemy 2 async ORM with independent bitable PostgreSQL schema - Formula engine: AST parser, DAG, Kahn topological sort, safe eval - RecalcWorker: atomic task claiming (FOR UPDATE SKIP LOCKED), topo-order processing, stale-threshold reaper for crash recovery - REST API (/api/v1/bitable): tables, fields, records, views, files - BitableTool: agent-facing tool with batch chunking (500/batch) - CLI: agentkit bitable subcommands (create, list, import-excel, etc.) - Frontend: Vue 3 + vxe-table grid with field management, views, filters - Ingestion: Excel (openpyxl), database reflection, API collector Security fixes (ce-code-review P0 + ce-debug P1): - SQL injection prevention (field_id validation, parameterized queries) - IDOR protection (_check_table_ownership on all table-level endpoints) - SSRF prevention (URL scheme + private IP validation in parse_excel_url) - OOM prevention (streaming file upload, batch delete, batch insert) - Atomic recalc task claiming (FOR UPDATE SKIP LOCKED) - Formula engine cache invalidation on field changes - Composite cursor pagination for non-id sort orders - Batch upsert (eliminates N+1 queries) - Sync I/O offloaded to thread pool in async contexts - Internal token auth (X-Internal-Token, hmac.compare_digest) - PK unique index enforcement Test coverage: 88 unit tests (95 skipped without Docker) --- .gitignore | 5 + AGENTS.md | 305 +++--- CLAUDE.md | 1 - README.md | 12 +- .../2026-06-24-bitable-module-requirements.md | 214 +++++ ...-portal-platform-evolution-requirements.md | 182 ++++ ...001-feat-bitable-companion-service-plan.md | 868 ++++++++++++++++++ ...g-horizon-reliability-optimization-plan.md | 521 +++++++++++ ...005-feat-portal-platform-evolution-plan.md | 508 ++++++++++ skills-lock.json | 162 ++++ src/agentkit/bitable/__init__.py | 30 + src/agentkit/bitable/db.py | 321 +++++++ src/agentkit/bitable/formula/__init__.py | 33 + src/agentkit/bitable/formula/engine.py | 293 ++++++ src/agentkit/bitable/formula/functions.py | 101 ++ src/agentkit/bitable/formula/parser.py | 311 +++++++ src/agentkit/bitable/ingestion/__init__.py | 32 + .../bitable/ingestion/api_collector.py | 51 + src/agentkit/bitable/ingestion/database.py | 171 ++++ src/agentkit/bitable/ingestion/excel.py | 249 +++++ src/agentkit/bitable/models.py | 132 +++ src/agentkit/bitable/recalc_worker.py | 266 ++++++ src/agentkit/bitable/repository.py | 803 ++++++++++++++++ src/agentkit/bitable/service.py | 477 ++++++++++ src/agentkit/cli/bitable.py | 251 +++++ src/agentkit/cli/main.py | 4 + src/agentkit/core/middleware.py | 24 +- src/agentkit/core/react.py | 4 + src/agentkit/experts/orchestrator.py | 39 +- src/agentkit/orchestrator/checkpoint.py | 17 +- src/agentkit/server/app.py | 21 + .../server/frontend/src/api/bitable.ts | 327 +++++++ .../src/components/bitable/AttachmentCell.vue | 73 ++ .../src/components/bitable/BitableGrid.vue | 227 +++++ .../components/bitable/FieldConfigForm.vue | 186 ++++ .../components/bitable/FieldManagePanel.vue | 249 +++++ .../src/components/bitable/FilterBuilder.vue | 179 ++++ .../src/components/bitable/ImageCell.vue | 117 +++ .../components/bitable/TableCreateModal.vue | 103 +++ .../src/components/bitable/TableViewList.vue | 121 +++ .../components/bitable/ViewConfigPanel.vue | 175 ++++ .../src/components/bitable/ViewSwitcher.vue | 83 ++ .../server/frontend/src/stores/bitable.ts | 381 ++++++++ .../server/frontend/src/views/BitableView.vue | 294 ++++++ src/agentkit/server/routes/bitable.py | 605 ++++++++++++ src/agentkit/server/routes/tasks.py | 136 +-- src/agentkit/skills/base.py | 2 +- src/agentkit/tools/bitable_tool.py | 486 ++++++++++ tests/unit/bitable/__init__.py | 0 tests/unit/bitable/conftest.py | 143 +++ tests/unit/bitable/test_attachment.py | 322 +++++++ tests/unit/bitable/test_bitable_tool.py | 485 ++++++++++ tests/unit/bitable/test_cli.py | 205 +++++ tests/unit/bitable/test_db.py | 246 +++++ tests/unit/bitable/test_formula_engine.py | 211 +++++ tests/unit/bitable/test_formula_parser.py | 199 ++++ tests/unit/bitable/test_ingestion_excel.py | 182 ++++ tests/unit/bitable/test_models.py | 303 ++++++ tests/unit/bitable/test_recalc.py | 330 +++++++ tests/unit/bitable/test_routes.py | 579 ++++++++++++ tests/unit/bitable/test_service.py | 296 ++++++ tests/unit/test_middleware.py | 4 +- tests/unit/test_pipeline_checkpoint.py | 6 +- tests/unit/test_skill_md.py | 8 +- 64 files changed, 13433 insertions(+), 238 deletions(-) delete mode 120000 CLAUDE.md create mode 100644 docs/brainstorms/2026-06-24-bitable-module-requirements.md create mode 100644 docs/brainstorms/2026-06-24-portal-platform-evolution-requirements.md create mode 100644 docs/plans/2026-06-24-001-feat-bitable-companion-service-plan.md create mode 100644 docs/plans/2026-06-24-004-feat-long-horizon-reliability-optimization-plan.md create mode 100644 docs/plans/2026-06-24-005-feat-portal-platform-evolution-plan.md create mode 100644 src/agentkit/bitable/__init__.py create mode 100644 src/agentkit/bitable/db.py create mode 100644 src/agentkit/bitable/formula/__init__.py create mode 100644 src/agentkit/bitable/formula/engine.py create mode 100644 src/agentkit/bitable/formula/functions.py create mode 100644 src/agentkit/bitable/formula/parser.py create mode 100644 src/agentkit/bitable/ingestion/__init__.py create mode 100644 src/agentkit/bitable/ingestion/api_collector.py create mode 100644 src/agentkit/bitable/ingestion/database.py create mode 100644 src/agentkit/bitable/ingestion/excel.py create mode 100644 src/agentkit/bitable/models.py create mode 100644 src/agentkit/bitable/recalc_worker.py create mode 100644 src/agentkit/bitable/repository.py create mode 100644 src/agentkit/bitable/service.py create mode 100644 src/agentkit/cli/bitable.py create mode 100644 src/agentkit/server/frontend/src/api/bitable.ts create mode 100644 src/agentkit/server/frontend/src/components/bitable/AttachmentCell.vue create mode 100644 src/agentkit/server/frontend/src/components/bitable/BitableGrid.vue create mode 100644 src/agentkit/server/frontend/src/components/bitable/FieldConfigForm.vue create mode 100644 src/agentkit/server/frontend/src/components/bitable/FieldManagePanel.vue create mode 100644 src/agentkit/server/frontend/src/components/bitable/FilterBuilder.vue create mode 100644 src/agentkit/server/frontend/src/components/bitable/ImageCell.vue create mode 100644 src/agentkit/server/frontend/src/components/bitable/TableCreateModal.vue create mode 100644 src/agentkit/server/frontend/src/components/bitable/TableViewList.vue create mode 100644 src/agentkit/server/frontend/src/components/bitable/ViewConfigPanel.vue create mode 100644 src/agentkit/server/frontend/src/components/bitable/ViewSwitcher.vue create mode 100644 src/agentkit/server/frontend/src/stores/bitable.ts create mode 100644 src/agentkit/server/frontend/src/views/BitableView.vue create mode 100644 src/agentkit/server/routes/bitable.py create mode 100644 src/agentkit/tools/bitable_tool.py create mode 100644 tests/unit/bitable/__init__.py create mode 100644 tests/unit/bitable/conftest.py create mode 100644 tests/unit/bitable/test_attachment.py create mode 100644 tests/unit/bitable/test_bitable_tool.py create mode 100644 tests/unit/bitable/test_cli.py create mode 100644 tests/unit/bitable/test_db.py create mode 100644 tests/unit/bitable/test_formula_engine.py create mode 100644 tests/unit/bitable/test_formula_parser.py create mode 100644 tests/unit/bitable/test_ingestion_excel.py create mode 100644 tests/unit/bitable/test_models.py create mode 100644 tests/unit/bitable/test_recalc.py create mode 100644 tests/unit/bitable/test_routes.py create mode 100644 tests/unit/bitable/test_service.py diff --git a/.gitignore b/.gitignore index af2330b..5d55a34 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,8 @@ src/agentkit/server/static/ # Runtime data (auth DB, conversation DB, etc.) data/ + +# Agent skill tooling (local-only, not project code) +.agents/ +.trae/ +.aider* diff --git a/AGENTS.md b/AGENTS.md index 19ee88c..8cbee10 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,203 +1,206 @@ -# Fischer AgentKit — Project Context +# Fischer AgentKit — 项目上下文 -## Rules +## 规则 -- Python >= 3.11, type hints required, `pydantic>=2.0` for all data models -- Ruff for lint + format: `ruff check src/ && ruff format src/` (target py311, line-length 100) -- Tests: `pytest` (asyncio_mode=auto), markers: `integration`, `redis`, `postgres` -- Never use `any` type — use proper Pydantic models or `Unknown` -- API key comparison must use `hmac.compare_digest` (constant-time) -- Expert names validated with `_EXPERT_NAME_RE = re.compile(r"^[a-zA-Z0-9_-]{1,64}$")` -- HandoffTransport queues bounded (`maxsize=1024`), close uses sentinel pattern -- Frontend: Vue 3 + TypeScript + Ant Design Vue, Pinia stores, no `require()` calls -- **Async generator safety**: Never use early `return` before the first `yield` in `async def` — use `return; yield` pattern instead (see `.trae/rules/project_rules.md`) +- Python >= 3.11,必须使用类型注解,所有数据模型使用 `pydantic>=2.0` +- 使用 Ruff 进行 lint 和格式化:`ruff check src/ && ruff format src/`(目标 py311,行宽 100) +- 测试:`pytest`(asyncio\_mode=auto),标记:`integration`、`redis`、`postgres` +- 禁止使用 `any` 类型 — 使用合适的 Pydantic 模型或 `Unknown` +- API Key 比较必须使用 `hmac.compare_digest`(恒定时间比较) +- 专家名称使用 `_EXPERT_NAME_RE = re.compile(r"^[a-zA-Z0-9_-]{1,64}$")` 校验 +- HandoffTransport 队列有界(`maxsize=1024`),关闭使用 sentinel 模式 +- 前端:Vue 3 + TypeScript + Ant Design Vue,Pinia stores,禁止 `require()` 调用 +- **异步生成器安全**:在 `async def` 中禁止在第一个 `yield` 之前使用 `return` — 改用 `return; yield` 模式(见 `.trae/rules/project_rules.md`) +- 所有回复必须是中文 -## Tech Stack +## 技术栈 -- **Backend**: Python 3.11+, FastAPI, Uvicorn, Pydantic v2, SQLAlchemy 2 (async) -- **Frontend**: Vue 3, TypeScript, Vite 5, Ant Design Vue 4, Pinia, Vue Router 4 -- **Desktop**: Tauri 2.x (Rust shell + Python sidecar) -- **Infra**: Redis (bus/cache/state), PostgreSQL + pgvector (episodic memory) -- **CLI**: Typer + Rich -- **Exact versions**: see `pyproject.toml` (Python), `package.json` (Node) +- **后端**:Python 3.11+、FastAPI、Uvicorn、Pydantic v2、SQLAlchemy 2(async) +- **前端**:Vue 3、TypeScript、Vite 5、Ant Design Vue 4、Pinia、Vue Router 4 +- **桌面端**:Tauri 2.x(Rust 外壳 + Python sidecar) +- **基础设施**:Redis(总线/缓存/状态)、PostgreSQL + pgvector(情景记忆) +- **CLI**:Typer + Rich +- **精确版本**:见 `pyproject.toml`(Python)、`package.json`(Node) -## Commands +## 命令 ```bash -# Backend -pip install -e ".[dev]" # Install with dev deps -agentkit gui --port 8002 # Web GUI (frontend + API) -agentkit serve --port 8001 # API-only server -agentkit chat # CLI interactive chat -agentkit init # Generate agentkit.yaml -agentkit version / doctor / usage # Utility commands -agentkit task submit/status/list/cancel # Task management -agentkit skill list/load/info # Skill management -agentkit pair --name X # Generate API key for external system -pytest # Run all tests -pytest -m "not integration" # Unit tests only -ruff check src/ && ruff format src/ # Lint + format +# 后端 +pip install -e ".[dev]" # 安装开发依赖 +agentkit gui --port 8002 # Web GUI(前端 + API) +agentkit serve --port 8001 # 仅 API 服务 +agentkit chat # CLI 交互式聊天 +agentkit init # 生成 agentkit.yaml +agentkit version / doctor / usage # 工具命令 +agentkit task submit/status/list/cancel # 任务管理 +agentkit skill list/load/info # 技能管理 +agentkit pair --name X # 为外部系统生成 API Key +pytest # 运行所有测试 +pytest -m "not integration" # 仅单元测试 +ruff check src/ && ruff format src/ # Lint + 格式化 -# Frontend +# 前端 cd src/agentkit/server/frontend -npm install # Install deps -npm run dev # Vite dev server (proxy /api -> :8000) -npm run build:frontend # Production build -> ../static -npm run typecheck # TypeScript check +npm install # 安装依赖 +npm run dev # Vite 开发服务器(代理 /api -> :8000) +npm run build:frontend # 生产构建 -> ../static +npm run typecheck # TypeScript 检查 -# Desktop +# 桌面端 cd src/agentkit/server/frontend -npm run tauri dev # Tauri dev mode -npm run tauri build # Tauri production build +npm run tauri dev # Tauri 开发模式 +npm run tauri build # Tauri 生产构建 # Docker docker-compose up -d # AgentKit + Redis + PostgreSQL ``` -## Architecture +## 架构 -### Request Flow +### 请求流程 ``` -User Input - ├─ @board prefix -> BoardRouter (experts/board_router.py) -> BoardOrchestrator (multi-round discussion) - ├─ @team prefix -> ExpertTeamRouter (experts/router.py) -> TeamOrchestrator (pipeline collaboration) - └─ otherwise -> RequestPreprocessor (chat/request_preprocessor.py) - Layer 0: @skill:xxx prefix -> explicit skill selection (SKILL_REACT or skill's configured mode) - Layer 1: Trivial-input regex (~0ms, 0 tokens) -> DIRECT_CHAT - (greetings, identity, factual Q&A, math, translation; guarded by _TOOL_CONTEXT_RE) - Default: -> REACT (LLM decides tool usage autonomously in the agent loop) +用户输入 + ├─ @board 前缀 -> BoardRouter (experts/board_router.py) -> BoardOrchestrator(多轮讨论) + ├─ @team 前缀 -> ExpertTeamRouter (experts/router.py) -> TeamOrchestrator(流水线协作) + └─ 其他 -> RequestPreprocessor (chat/request_preprocessor.py) + Layer 0: @skill:xxx 前缀 -> 显式技能选择(SKILL_REACT 或技能配置的模式) + Layer 1: 琐碎输入正则(~0ms,0 tokens)-> DIRECT_CHAT + (问候、身份、事实问答、数学、翻译;由 _TOOL_CONTEXT_RE 守护) + 默认: -> REACT(LLM 在 agent 循环中自主决定工具使用) -> ExecutionMode: DIRECT_CHAT / REACT / SKILL_REACT / REWOO / REFLEXION / PLAN_EXEC / TEAM_COLLAB - (chat handler currently supports DIRECT_CHAT, REACT, SKILL_REACT; others raise "not yet supported") + (chat handler 当前支持 DIRECT_CHAT、REACT、SKILL_REACT;其余抛出 "not yet supported") ``` -**Note**: The old 3-layer `CostAwareRouter` (with `RegexRules` / `HeuristicClassifier` / `SemanticRouter` / `Vickrey Auction`) has been replaced by `RequestPreprocessor`. The `IntentRouter` (`router/intent.py`) exists but is not wired into the chat flow. `AuctionHouse` with Vickrey auction lives in `marketplace/auction.py` (marketplace subsystem, not routing). +**注意**:旧的 3 层 `CostAwareRouter`(含 `RegexRules` / `HeuristicClassifier` / `SemanticRouter` / `Vickrey Auction`)已被 `RequestPreprocessor` 替换。`IntentRouter`(`router/intent.py`)存在但未接入 chat 流程。`AuctionHouse`(Vickrey 拍卖)位于 `marketplace/auction.py`(属于 marketplace 子系统,非路由)。 -### Agent Hierarchy +### Agent 层级 ``` -BaseAgent (core/base.py) — abstract, execute() is final - +-- ConfigDrivenAgent (core/config_driven.py) — YAML-driven, 3 task modes +BaseAgent (core/base.py) — 抽象基类,execute() 是 final 方法 + +-- ConfigDrivenAgent (core/config_driven.py) — YAML 驱动,3 种任务模式 +-- ReActEngine (core/react.py) — Think->Act->Observe - +-- ReflexionAgent (core/reflexion.py) — reflection-driven - +-- ReWOOAgent (core/rewoo.py) — plan-without-observation - +-- StandaloneAgent (core/standalone.py) — standalone runner + +-- ReflexionAgent (core/reflexion.py) — 反思驱动 + +-- ReWOOAgent (core/rewoo.py) — 无观察规划 + +-- StandaloneAgent (core/standalone.py) — 独立运行器 ``` -### Expert Team Mode (Pipeline) +### 专家团队模式(流水线) ``` -ExpertConfig (extends AgentConfig) -> Expert (wraps ConfigDrivenAgent via AgentPool) -ExpertTeam: manages experts, shared workspace, team status (FORMING→PLANNING→EXECUTING→SYNTHESIZING→COMPLETED) -TeamOrchestrator: pipeline execution — Lead decomposes task into PlanPhase with depends_on, topological sort, parallel layers -PlanPhase: id, name, assigned_expert, task_description, depends_on, status (PENDING/RUNNING/COMPLETED/FAILED) -TeamPlan: phases with dependencies, topological_sort() returns execution layers (Kahn's algorithm) -ExpertTeamRouter: @team prefix routing, @team:dev_team template expansion, name validation, MAX_EXPERTS=10 -HandoffTransport: InProcess (asyncio.Queue) + Redis Pub/Sub — used for event broadcasting only +ExpertConfig(继承 AgentConfig)-> Expert(通过 AgentPool 包装 ConfigDrivenAgent) +ExpertTeam:管理专家、共享工作区、团队状态(FORMING→PLANNING→EXECUTING→SYNTHESIZING→COMPLETED) +TeamOrchestrator:流水线执行 — Lead 将任务分解为带 depends_on 的 PlanPhase,拓扑排序,并行分层 +PlanPhase:id、name、assigned_expert、task_description、depends_on、status(PENDING/RUNNING/COMPLETED/FAILED) +TeamPlan:带依赖的阶段,topological_sort() 返回执行层(Kahn 算法) +ExpertTeamRouter:@team 前缀路由、@team:dev_team 模板展开、名称校验、MAX_EXPERTS=10 +HandoffTransport:InProcess(asyncio.Queue)+ Redis Pub/Sub — 仅用于事件广播 ``` -**Pipeline Flow**: -1. `@team` prefix triggers team mode (or `@team:dev_team` for template, `@team:expert1,expert2` for explicit) -2. `ExpertTeam.create_team()` sets status to PLANNING -3. Lead Expert decomposes task into phases via LLM (fallback to single phase on failure) -4. `topological_sort()` arranges phases into layers (same-layer parallel, inter-layer serial) -5. Each phase creates an isolated `ConfigDrivenAgent` via `AgentPool.create_agent` (context isolation, KTD3) -6. Phase outputs passed via `SharedWorkspace` (`{plan_id}/phase/{phase_id}/output`) -7. Lead synthesizes results (BEST strategy) -8. On all-phases-fail: fallback to single agent mode +**流水线流程**: -**Event Sequence**: `team_formed` → `plan_update` → `phase_started` → `expert_step` → `expert_result` → `phase_completed` → `team_synthesis` → `team_dissolved` +1. `@team` 前缀触发团队模式(或 `@team:dev_team` 用模板,`@team:expert1,expert2` 显式指定) +2. `ExpertTeam.create_team()` 将状态置为 PLANNING +3. Lead Expert 通过 LLM 将任务分解为阶段(失败时回退为单阶段) +4. `topological_sort()` 将阶段排成层(同层并行,层间串行) +5. 每个阶段通过 `AgentPool.create_agent` 创建隔离的 `ConfigDrivenAgent`(上下文隔离,KTD3) +6. 阶段输出通过 `SharedWorkspace` 传递(`{plan_id}/phase/{phase_id}/output`) +7. Lead 综合结果(BEST 策略) +8. 所有阶段都失败时:回退到单 agent 模式 -**Team Templates**: `configs/experts/dev_team.yaml` stores member list in `bound_skills` field (tech_lead, frontend_engineer, backend_engineer, qa_engineer, code_reviewer) +**事件序列**:`team_formed` → `plan_update` → `phase_started` → `expert_step` → `expert_result` → `phase_completed` → `team_synthesis` → `team_dissolved` -Lifecycle: FORMING -> PLANNING -> EXECUTING -> SYNTHESIZING -> COMPLETED -> DISSOLVED -On failure: fallback to single-agent mode (lead or first active expert). +**团队模板**:`configs/experts/dev_team.yaml` 在 `bound_skills` 字段存储成员列表(tech\_lead、frontend\_engineer、backend\_engineer、qa\_engineer、code\_reviewer) -### Module Map +生命周期:FORMING -> PLANNING -> EXECUTING -> SYNTHESIZING -> COMPLETED -> DISSOLVED +失败时:回退到单 agent 模式(lead 或第一个活跃专家)。 -| Layer | Modules | Purpose | -|-------|---------|---------| -| API | `server/`, `cli/` | FastAPI routes + Typer CLI | -| Auth | `server/auth/` | JWT + RBAC + terminal security (6-layer whitelist) | -| Service | `core/`, `chat/`, `skills/`, `experts/` | Agent engine, routing, skills, expert teams | -| Data | `memory/`, `session/`, `bus/` | Persistence, sessions, messaging | -| Utility | `llm/`, `tools/`, `evolution/`, `quality/`, `mcp/` | LLM gateway, tools, self-evolution, quality, MCP | -| Client | `client/` | ConfigSync, RemoteLLMProvider integration | +### 模块映射 -### Key Subsystems +| 层级 | 模块 | 用途 | +| --- | ---------------------------------------------- | ------------------------------- | +| API | `server/`、`cli/` | FastAPI 路由 + Typer CLI | +| 认证 | `server/auth/` | JWT + RBAC + 终端安全(6 层白名单) | +| 服务 | `core/`、`chat/`、`skills/`、`experts/` | Agent 引擎、路由、技能、专家团队 | +| 数据 | `memory/`、`session/`、`bus/` | 持久化、会话、消息 | +| 工具 | `llm/`、`tools/`、`evolution/`、`quality/`、`mcp/` | LLM 网关、工具、自进化、质量、MCP | +| 客户端 | `client/` | ConfigSync、RemoteLLMProvider 集成 | -- **LLM Gateway** (`llm/`): 6 providers (OpenAI/Anthropic/Gemini/Doubao/Wenxin/Yuanbao), fallback, semantic cache, usage tracking, RemoteLLMProvider (client→server proxy with 401 refresh retry) -- **Memory** (`memory/`): 4-layer (SOUL/USER/MEMORY/DAILY), WorkingMemory (Redis), EpisodicMemory (PG+pgvector), SemanticMemory (HTTP RAG) -- **Evolution** (`evolution/`): Reflector, PromptOptimizer (genetic), PitfallDetector, ABTester -- **Tools** (`tools/`): 21 built-in + MCP extension, composition (SequentialChain/ParallelFanOut/DynamicSelector) -- **Pipeline** (`orchestrator/`): PipelineEngine, SagaOrchestrator, DynamicPipeline, HandoffManager -- **Bus** (`bus/`): MemoryBus (in-process), RedisBus (distributed) -- **Auth** (`server/auth/`): JWT (access 15min + refresh 7d, HS256), API Key (constant-time compare), 3-level RBAC (member/operator/admin + permission bits), 6-layer terminal security (blocklist→shell-ops→builtin→global→user→session→danger), bcrypt password hashing (rounds=12) +### 关键子系统 -### Server Routes (22 modules) +- **LLM 网关**(`llm/`):6 个 provider(OpenAI/Anthropic/Gemini/Doubao/Wenxin/Yuanbao)、fallback、语义缓存、用量追踪、RemoteLLMProvider(client→server 代理,带 401 刷新重试) +- **记忆**(`memory/`):4 层(SOUL/USER/MEMORY/DAILY)、WorkingMemory(Redis)、EpisodicMemory(PG+pgvector)、SemanticMemory(HTTP RAG) +- **进化**(`evolution/`):Reflector、PromptOptimizer(遗传算法)、PitfallDetector、ABTester +- **工具**(`tools/`):21 个内置 + MCP 扩展,组合(SequentialChain/ParallelFanOut/DynamicSelector) +- **流水线**(`orchestrator/`):PipelineEngine、SagaOrchestrator、DynamicPipeline、HandoffManager +- **总线**(`bus/`):MemoryBus(进程内)、RedisBus(分布式) +- **认证**(`server/auth/`):JWT(access 15min + refresh 7d,HS256)、API Key(恒定时间比较)、3 级 RBAC(member/operator/admin + 权限位)、6 层终端安全(blocklist→shell-ops→builtin→global→user→session→danger)、bcrypt 密码哈希(rounds=12) -| Prefix | Module | Purpose | -|--------|--------|---------| -| `/api/v1/agents` | agents.py | Agent CRUD | -| `/api/v1/tasks` | tasks.py | Task submit/query/cancel | -| `/api/v1/skills` | skills.py | Skill register/list | -| `/api/v1/chat` | chat.py | Chat REST + WebSocket | -| `/api/v1/ws` | ws.py | WebSocket channel | -| `/api/v1/llm` | llm.py | LLM usage | -| `/api/v1/llm/chat` | llm_gateway.py | LLM gateway proxy (JWT auth, SSE streaming) | -| `/api/v1/health` | health.py | Health check | -| `/api/v1/metrics` | metrics.py | Metrics | -| `/api/v1/evolution` | evolution.py + evolution_dashboard.py | Self-evolution API | -| `/api/v1/memory` | memory.py | Memory management | -| `/api/v1/portal` | portal.py | Portal | -| `/api/v1/kb` | kb_management.py | Knowledge base | -| `/api/v1/skill-mgmt` | skill_management.py | Skill management | -| `/api/v1/workflows` | workflows.py | Workflows | -| `/api/v1/terminal` | terminal.py | Local terminal (client sidecar PTY) | -| `/api/v1/terminal/server` | terminal_server.py | Server terminal (server PTY + admin approval) | -| `/api/v1/terminal` | terminal_whitelist.py | Whitelist/blocklist/audit-log management | -| `/api/v1/settings` | settings.py | Settings | -| `/api/v1/auth` | auth.py | Login/refresh/logout/me | -| `/api/v1/system` | system.py | System resources (SYSTEM_CONFIG permission) | -| `/api/v1/config` | config_sync.py | Config version + sync (polling) | +### 服务端路由(22 个模块) -### WebSocket Chat Protocol +| 前缀 | 模块 | 用途 | +| ------------------------- | -------------------------------------- | ------------------------- | +| `/api/v1/agents` | agents.py | Agent CRUD | +| `/api/v1/tasks` | tasks.py | 任务提交/查询/取消 | +| `/api/v1/skills` | skills.py | 技能注册/列表 | +| `/api/v1/chat` | chat.py | Chat REST + WebSocket | +| `/api/v1/ws` | ws.py | WebSocket 通道 | +| `/api/v1/llm` | llm.py | LLM 用量 | +| `/api/v1/llm/chat` | llm\_gateway.py | LLM 网关代理(JWT 认证,SSE 流式) | +| `/api/v1/health` | health.py | 健康检查 | +| `/api/v1/metrics` | metrics.py | 指标 | +| `/api/v1/evolution` | evolution.py + evolution\_dashboard.py | 自进化 API | +| `/api/v1/memory` | memory.py | 记忆管理 | +| `/api/v1/portal` | portal.py | Portal | +| `/api/v1/kb` | kb\_management.py | 知识库 | +| `/api/v1/skill-mgmt` | skill\_management.py | 技能管理 | +| `/api/v1/workflows` | workflows.py | 工作流 | +| `/api/v1/terminal` | terminal.py | 本地终端(client sidecar PTY) | +| `/api/v1/terminal/server` | terminal\_server.py | 服务端终端(server PTY + 管理员审批) | +| `/api/v1/terminal` | terminal\_whitelist.py | 白名单/黑名单/审计日志管理 | +| `/api/v1/settings` | settings.py | 设置 | +| `/api/v1/auth` | auth.py | 登录/刷新/登出/me | +| `/api/v1/system` | system.py | 系统资源(需 SYSTEM\_CONFIG 权限) | +| `/api/v1/config` | config\_sync.py | 配置版本 + 同步(轮询) | -Client -> Server: `message`, `reply`, `confirmation_reply`, `cancel`, `ping` -Server -> Client: `connected`, `token`, `thinking`, `step`, `final_answer`, `skill_match`, `confirmation_request`, `confirmation_result`, `ask_human`, `error`, `pong` -Expert Team events: `team_formed`, `expert_step`, `expert_result`, `plan_update`, `phase_started`, `phase_completed`, `phase_failed`, `team_synthesis`, `team_dissolved` +### WebSocket Chat 协议 -### Frontend Pages +Client -> Server:`message`、`reply`、`confirmation_reply`、`cancel`、`ping` +Server -> Client:`connected`、`token`、`thinking`、`step`、`final_answer`、`skill_match`、`confirmation_request`、`confirmation_result`、`ask_human`、`error`、`pong` +专家团队事件:`team_formed`、`expert_step`、`expert_result`、`plan_update`、`phase_started`、`phase_completed`、`phase_failed`、`team_synthesis`、`team_dissolved` -- `/agent/chat` — Chat with Expert Team view -- `/agent/code` — Code/workflow -- `/agent/monitor` — Evolution dashboard -- `/computer-use` — Desktop control -- `/login` — Login page (JWT auth) -- Terminal panel — Local + server terminal with whitelist manager +### 前端页面 -### Configuration Priority +- `/agent/chat` — 专家团队聊天视图 +- `/agent/code` — 代码/工作流 +- `/agent/monitor` — 进化看板 +- `/computer-use` — 桌面控制 +- `/login` — 登录页(JWT 认证) +- 终端面板 — 本地 + 服务端终端,含白名单管理器 -CLI args > `agentkit.yaml` > env vars (`${VAR:-default}`) > `.env` > hardcoded defaults +### 配置优先级 -Config search: `--config` path > `./agentkit.yaml` > `~/.agentkit/agentkit.yaml` +CLI 参数 > `agentkit.yaml` > 环境变量(`${VAR:-default}`)> `.env` > 硬编码默认值 -## Conventions +配置查找:`--config` 路径 > `./agentkit.yaml` > `~/.agentkit/agentkit.yaml` -- Skill configs: `configs/skills/*.yaml` (16 presets, unified as `SkillConfig`) -- Skill categories: `agent_template` (execution engines: react/direct/rewoo/reflexion/plan_exec/goal_driven) vs `business_skill` (domain skills). Classified via `_ENGINE_TEMPLATE_NAMES` in `server/routes/skill_management.py`. Frontend groups by `category` field — `SkillsView` two-column layout, `SkillCard`/`SkillsTab` show type tags (引擎/技能) and category-based icons -- LLM configs: `agentkit.yaml` llm section (unified with server config) -- Pipeline configs: `configs/pipelines/*.yaml` -- Expert templates: `configs/experts/*.yaml` (5 programming experts + dev_team team template), registered via `ExpertTemplateRegistry` -- Team templates: `bound_skills` field stores member list (e.g., `dev_team.yaml` lists tech_lead, frontend_engineer, backend_engineer, qa_engineer, code_reviewer) -- All Pydantic models use `model_config = ConfigDict(...)` not `class Config` -- Test files: `tests/unit/` and `tests/integration/` -- Frontend stores: Pinia, one per domain (chat, team, settings) -- Frontend components: `src/agentkit/server/frontend/src/components/` +## 约定 -## Boundaries +- 技能配置:`configs/skills/*.yaml`(16 个预设,统一为 `SkillConfig`) +- 技能分类:`agent_template`(执行引擎:react/direct/rewoo/reflexion/plan\_exec/goal\_driven)vs `business_skill`(领域技能)。通过 `server/routes/skill_management.py` 中的 `_ENGINE_TEMPLATE_NAMES` 分类。前端按 `category` 字段分组 — `SkillsView` 双栏布局,`SkillCard`/`SkillsTab` 显示类型标签(引擎/技能)和基于分类的图标 +- LLM 配置:`agentkit.yaml` llm 段(与服务端配置统一) +- 流水线配置:`configs/pipelines/*.yaml` +- 专家模板:`configs/experts/*.yaml`(5 个编程专家 + dev\_team 团队模板),通过 `ExpertTemplateRegistry` 注册 +- 团队模板:`bound_skills` 字段存储成员列表(如 `dev_team.yaml` 列出 tech\_lead、frontend\_engineer、backend\_engineer、qa\_engineer、code\_reviewer) +- 所有 Pydantic 模型使用 `model_config = ConfigDict(...)` 而非 `class Config` +- 测试文件:`tests/unit/` 和 `tests/integration/` +- 前端 stores:Pinia,每个领域一个(chat、team、settings) +- 前端组件:`src/agentkit/server/frontend/src/components/` + +## 边界 + +- 未经明确请求不得修改 `pyproject.toml` 版本 +- 禁止直接推送到 main — 使用 feature 分支 +- 集成测试需要 Docker(Redis + PostgreSQL) +- 桌面端构建需要 Rust 工具链 + PyInstaller -- Never modify `pyproject.toml` version without explicit request -- Never push to main directly — use feature branches -- Integration tests require Docker (Redis + PostgreSQL) -- Desktop builds require Rust toolchain + PyInstaller diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 120000 index 47dc3e3..0000000 --- a/CLAUDE.md +++ /dev/null @@ -1 +0,0 @@ -AGENTS.md \ No newline at end of file diff --git a/README.md b/README.md index 1ad18c8..9921893 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,20 @@ # Fischer AgentKit -统一 AI Agent 开发框架 -- 将 LLM、Tool、Prompt 组装为可执行的 Skill,通过 ReAct 推理引擎自主完成任务,支持记忆持久化、自进化、Pipeline 编排和桌面客户端。 +企业级统一 AI Agent 门户平台 -- 面向企业用户与开发者,将 LLM、Tool、Prompt 组装为可执行的 Skill,通过 ReAct 推理引擎自主完成任务,支持记忆持久化、自进化、Pipeline 编排、专家团队协作和桌面客户端。 ## 项目简介 -AgentKit 解决的核心问题:**从写 150 行 Agent 代码降为 10-20 行 YAML 配置**。 +AgentKit 是企业级统一 AI Agent 门户平台,目标用户覆盖**企业用户**与**开发者**: -传统方式下,每新增一个 Agent 需要编写子类、处理 LLM 调用、管理工具绑定、校验输出质量。AgentKit 将这些能力标准化为可组合模块,开发者只需编写 YAML 配置即可定义一个完整的 Skill(Prompt + Tool + 质量门禁),框架自动完成 ReAct 推理循环、模型路由降级、产出质量检查和标准化输出。 +- **企业用户**:通过 Web GUI / 桌面客户端开箱即用,零代码配置 Skill、专家团队、知识库,直接获得多专家协作、文档生成、桌面操控等能力 +- **开发者**:通过 Python 库 / CLI / HTTP API 深度集成,将 150 行 Agent 代码降为 10-20 行 YAML 配置,框架自动完成 ReAct 推理循环、模型路由降级、产出质量检查和标准化输出 + +AgentKit 将 LLM、Tool、Prompt 标准化为可组合模块,开发者只需编写 YAML 配置即可定义一个完整的 Skill(Prompt + Tool + 质量门禁);企业用户通过门户界面即可编排专家团队、监控自进化、管理知识库与终端安全。 核心定位: -- **配置驱动** -- YAML 定义 Skill,无需写 Agent 子类 +- **门户平台** -- 统一入口聚合 Skill、专家团队、知识库、终端、自进化等能力,企业用户开箱即用 +- **配置驱动** -- YAML 定义 Skill,开发者无需写 Agent 子类 - **生产就绪** -- 内置质量门禁、模型降级、用量统计、级联检测、状态持久化 - **四种使用** -- Python 库引用、CLI 聊天、Web GUI、桌面客户端 - **专家团队** -- Expert Team Mode,多专家协作执行复杂任务,前端以多角色对话流呈现 diff --git a/docs/brainstorms/2026-06-24-bitable-module-requirements.md b/docs/brainstorms/2026-06-24-bitable-module-requirements.md new file mode 100644 index 0000000..7f011f3 --- /dev/null +++ b/docs/brainstorms/2026-06-24-bitable-module-requirements.md @@ -0,0 +1,214 @@ +# 多维表格(Bitable)伴生服务需求文档 + +- **日期**:2026-06-24 +- **状态**:已对齐,待规划 +- **范围分级**:Deep — feature +- **后续**:交由 `/ce-plan` 做实现规划 + +--- + +## 1. 问题与机会 + +AgentKit 当前缺少一个**统一的持久化结构化数据落地载体**。当出现以下需求时,没有合适的地方承接数据: + +- 多系统数据汇总(需把多个来源的结构化数据合并到一处) +- 本地 Excel 上传后持久化(当前 Excel 仅能单向导出或解析为文本进 RAG,无法作为可编辑的结构化表留存) +- 外部数据采集(爬虫/API 抓取的结果需要按字段落地为可查询、可视图、可分析的表) + +现状:Excel 导出是单向的(`src/agentkit/documents/renderers/excel_renderer.py`),Excel 解析只转文本进知识库(`src/agentkit/memory/document_loader.py`),`MultiSourceRetriever` 只做读侧多源检索,`SharedWorkspace` 是带 TTL 的临时 KV。**没有任何模块能把异构数据源的结构化数据持久化为可编辑、可视图、可计算的多维表格。** + +机会:引入多维表格伴生服务,作为 AgentKit 异构数据的统一落地载体。Agent 作为数据的主要作者(采集写入),用户在落地后的表上精修、配视图、做分析。这既补齐了结构化数据持久化的缺口,又让 Agent 获得"数据编排者"的战略能力。 + +## 2. 主要使用者与核心价值 + +| 维度 | 决策 | +|------|------| +| **形态** | 混合模式——Agent 采集 + 用户精修 | +| **Agent 角色** | 数据作者:执行三类采集(Excel/数据库/爬虫API),按字段写入多维表格 | +| **用户角色** | 数据精修者与分析者:在落地后的表上编辑用户列、配置视图、做分析 | +| **核心价值** | 异构数据源的统一持久化落地载体,其上承载视图、分析、公式、引用 | + +**三类采集场景**(均为 Agent 驱动): +1. 上传 Excel 或提供在线 Excel 地址 → 读取内容 → 按字段写入多维表格 +2. 指定数据库 → 根据数据表 → 生成多张多维表格数据表 +3. 根据指令执行数据采集(爬虫或具体 API)→ 获取到的数据按字段写入多维表格 + +## 3. 服务架构 + +**多维表格是 AgentKit 的伴生服务**: + +- **逻辑独立**:自有 API/CLI、自有领域模型(表/字段/记录/视图/公式)、自有存储边界 +- **当前共部署**:物理上与 AgentKit 同进程或同部署单元,UI 级集成于 AgentKit 前端 +- **调用边界**:AgentKit ↔ 多维表格走 API/CLI,**不做进程内紧耦合** +- **未来演进**:可零成本抽离为独立服务,只是部署变更,不改代码 +- **存储边界**:当前共享 AgentKit 的 PostgreSQL,使用**独立 schema** 隔离;未来抽离时迁移 + +> 设计含义:所有跨服务交互按"远程调用"心智设计,即使当前是本地调用。字段所有权模型、upsert 语义、公式引擎都**内建在多维表格服务自身**,而非套在外部工具上。 + +## 4. 关键产品决策 + +### 4.1 写入语义:按主键 upsert + 字段所有权模型 + +Agent 重复采集时,按表的主键字段 upsert: + +- **匹配到的记录**:更新"数据列"(Agent 管理的列),保留"用户列" +- **未匹配的记录**:新增 +- **用户列**永不被 Agent 覆盖 + +**字段所有权**(每列标记为"数据列"或"用户列"): + +- **自动推断**:公式列、引用列、手动标注列 → 用户列;Agent 采集写入的列 → 数据列 +- **Agent 声明**:Agent 采集时可显式声明列的所有权,覆盖自动推断 +- 公式列天然是用户列(派生的,永不被覆盖) + +### 4.2 公式与引用:身份核心,深度分阶段 + +公式列和引用列是"多维表格"身份的核心,**必须从 v1 存在**,但支持深度分阶段扩展: + +- **v1**:基础公式(算术、字符串、简单聚合如 SUM/AVG/COUNT)+ 基础引用(lookup 到另一张表的字段) +- **v2+**:高级公式(日期、条件、跨表 rollup)+ 函数库扩展 + +**公式重算策略**:异步重算 + "计算中"状态标记。Agent 写入数据列后,依赖该列的公式列进入"计算中"状态,由后台异步重算管道更新。避免同步重算阻塞写入,代价是短暂的不一致窗口(用户可见"计算中"标记)。 + +### 4.3 规模与存储:可演进 + +- **起步规模**:单表 < 10 万行,总表 < 1000 张(部门级) +- **架构目标**:支持未来向大规模(10万+行)演进 +- **存储选型**:规范化存储(字段定义、记录、单元格分离)+ 索引 + 分页,**不**用 JSONB 整表塞单行 +- **大规模演进路径**(v3):列式存储、分区、物化视图、异步重算管道 + +## 5. 能力范围与分阶段 + +用户提出的 6 项能力,按复杂度与依赖关系分三阶段: + +| 能力 | 复杂度 | v1 | v2 | v3 | +|------|--------|----|----|----| +| ① 模块搭建(服务骨架+领域模型+API) | 基础 | ✅ | — | — | +| ② 数据采集落地(Excel/DB/爬虫API 三类) | 中 | ✅ | — | — | +| ③ 多视图展示 | 中 | 网格视图 | 看板/甘特/画廊 | 表单 | +| ④ 分析计算 | 中高 | — | 分组/透视 | 高级聚合 | +| ⑤ 公式列+引用列 | 高 | 基础公式+lookup | 高级公式+rollup | 函数库扩展 | +| ⑥ 图片+附件 | 低中 | ✅ | — | — | + +### v1:核心闭环验证 + +验证"Agent 采集 → 持久化落地 → 用户查看/精修"的核心闭环: + +- 服务骨架:领域模型(表/字段/记录/视图)、API/CLI、独立 schema 存储 +- 字段所有权模型 + 按主键 upsert 语义 +- 三类采集落地(Excel 上传/URL、数据库导入、爬虫/API 采集) +- 网格视图(表格视图,支持排序/筛选/分页/单元格编辑) +- 基础公式列(算术、字符串、SUM/AVG/COUNT 等简单聚合) +- 基础引用列(lookup 到另一张表的字段) +- 图片/附件字段类型(复用现有文件上传能力) +- 异步公式重算 + "计算中"状态 + +### v2:多视图与分析 + +- 看板视图(按分组字段分列展示) +- 甘特视图(按日期字段排时间线) +- 画廊视图(以图片/附件为主视觉的卡片展示) +- 高级公式(日期函数、条件函数、跨表 rollup) +- 分析能力(分组聚合、透视表) + +### v3:规模化与协作 + +- 表单视图(以表单形式收集数据写入表) +- 公式函数库扩展 +- 大规模优化(列式存储、分区、物化视图、异步重算管道升级) +- 多人实时协作 + +## 6. 方案探索与推荐 + +### 方案 1:自建多维表格引擎,分阶段交付 + +在 AgentKit 内构建原生 bitable 子系统,规范化存储,字段所有权模型原生内建,公式引擎自建分阶段。Agent 通过新增的 bitable API/CLI 写入。 + +- **优点**:完全可控;与现有栈(PG/Redis/Vue/FastAPI)匹配;字段所有权模型原生;upsert 语义无摩擦 +- **缺点**:构建工作量最大;公式引擎是硬骨头;需长期维护 +- **风险**:公式引擎范围蔓延;大规模重算性能 + +### 方案 2:集成开源多维表格(APITable/NocoDB)作为子服务 + +部署开源 bitable 作为伴生服务,AgentKit 通过其 API 让 Agent 写入,用户编辑在 bitable 原生 UI 完成,上层叠加 upsert-保留用户列逻辑。 + +- **优点**:视图/公式/附件白送,成熟,最快获完整功能 +- **缺点**:AGPL 协议有传染性风险(若商业化);upsert-保留用户列需硬套(外部 bitable 无字段所有权概念);集成走 API 较松 +- **风险**:协议冲突;外部模型与需求偏离 + +### 方案 3(挑战者):Agent 结构化数据底座优先,UI 作为叠加层 + +反转优先级:多维表格首先是 Agent 的持久化结构化工作记忆/输出底座,用户侧多视图 UI 是读写底座的叠加层。 + +- **优点**:最大化 Agent 协同;战略差异化;底座可驱动表格 UI 之外的能力 +- **缺点**:用户主动建表流程次要;底座抽象需更多架构思考 + +### 推荐:方案 1 自建 + 方案 3 底座心智 + +**理由**: + +1. 三类采集场景全是 Agent 驱动——本质是"Agent 作为数据作者",方案 3 心智天然契合;但用户也要精修,需方案 1 的完整引擎 +2. upsert-保留用户列的字段所有权模型是定制的——外部 bitable 没有此概念,硬套很痛;AGPL 对可能商业化的产品是真实风险 +3. 现有基础设施齐全(PG + pgvector + Redis + SQLAlchemy 2 + Vue3 + Ant Design Vue),自建边际成本可控 +4. 伴生服务架构约束天然要求 API/CLI 边界——方案 1 自建反而最契合,因为所有权模型内建在服务自身 +5. 分阶段控制风险——v1 先验证核心闭环 + +**与方案 3 的融合**:架构上以"Agent 的持久化结构化数据底座"心智设计领域模型,使多维表格不仅是"一个表格功能",而是 Agent 的数据编排能力的载体。这让底座未来可驱动表格 UI 之外的能力(仪表盘、报表、Agent 记忆)。 + +## 7. 范围边界 + +### 本次范围内(v1) + +- 多维表格伴生服务骨架(领域模型、API/CLI、独立 schema 存储) +- 字段所有权模型 + 按主键 upsert 语义 +- 三类采集落地(Excel/DB/爬虫API) +- 网格视图 +- 基础公式列 + 基础引用列(lookup) +- 图片/附件字段 +- 异步公式重算 + +### 延后(v2/v3) + +- 看板/甘特/画廊/表单视图 +- 高级公式(日期/条件/跨表 rollup)+ 函数库扩展 +- 分析能力(分组/透视) +- 大规模优化(列式/分区/物化视图) +- 多人实时协作 + +### 本产品身份之外 + +- 不做通用电子表格(非单元格自由编辑,是字段化记录模型) +- 不做 ETL/数据管道平台(采集是 Agent 驱动的按需执行,非定时调度管道) +- 不做 BI 仪表盘产品(分析能力服务于表格内聚合,非独立 BI) +- 不替代知识库 RAG(多维表格是结构化数据载体,非非结构化文档检索) + +## 8. 假设与依赖 + +- **假设**:Agent 已具备执行采集任务的能力(爬虫/API 调用),多维表格只承接"写入"环节,不负责采集执行本身 +- **假设**:共享 PostgreSQL 的性能足以支撑 v1/v2 规模;v3 大规模时再评估独立数据库或列式存储 +- **依赖**:现有文件上传能力(`src/agentkit/server/routes/chat.py` 的上传端点、`data/uploads/` 存储)可复用于附件字段 +- **依赖**:Agent 工具系统(`src/agentkit/tools/base.py` 的 `execute() -> dict` 契约)可扩展新增 bitable 写入工具 +- **假设**:公式异步重算的"计算中"窗口(秒级)对用户可接受 + +## 9. 成功标准 + +**v1 验证成功的标志**: + +1. Agent 能把一份 Excel 上传的数据按字段写入多维表格,并在网格视图中查看 +2. Agent 能指定一个数据库表,生成对应的多维表格 +3. Agent 能执行一次 API 采集,把返回数据按字段写入多维表格 +4. 用户能在网格视图中编辑单元格、新增公式列(如 `=SUM(数据列)`)、看到异步重算结果 +5. Agent 对同一表重复采集时,按主键 upsert 更新数据列,用户的公式列和手动编辑保留不变 +6. 多维表格服务通过 API/CLI 被 AgentKit 调用,无进程内紧耦合 + +## 10. 下一步 + +本需求文档交由 `/ce-plan` 做实现规划,重点规划: + +- v1 的领域模型设计(表/字段/记录/视图/公式 的实体关系) +- 独立 schema 的存储设计(规范化表结构、索引、分页) +- API/CLI 接口设计(CRUD + 采集写入 + upsert + 公式重算触发) +- 字段所有权模型的实现机制(自动推断 + Agent 声明) +- 异步重算管道设计 +- Agent bitable 写入工具设计 +- 前端网格视图组件选型与集成 diff --git a/docs/brainstorms/2026-06-24-portal-platform-evolution-requirements.md b/docs/brainstorms/2026-06-24-portal-platform-evolution-requirements.md new file mode 100644 index 0000000..6184aa2 --- /dev/null +++ b/docs/brainstorms/2026-06-24-portal-platform-evolution-requirements.md @@ -0,0 +1,182 @@ +--- +date: 2026-06-24 +topic: portal-platform-evolution +--- + +# AgentKit 门户平台整体演进路线 + +## Summary + +按优先级串行推进 AgentKit 门户平台演进:先建独立 RAG 平台对标 MaxKB 功能对等,再扩展多端接入与 MCP Server,最后生态替换降本(MCP/Celery/LiteLLM)。不设硬性时间,按完成度推进。 + +## Problem Frame + +AgentKit 定位为企业级统一 AI Agent 门户平台,面向企业用户与开发者。对标 MaxKB(开源企业级智能体平台,GitHub 19k+ stars)后发现,当前能力堆栈在多个方向存在差距: + +- **RAG 工业级管道**:AgentKit 现有 `memory/` 模块是开发者级组件库(基础分块 + pgvector 语义检索 + time_decay 重排),MaxKB 是工业级产品功能(双索引检索 + 智能分段 + 问题生成 + 术语表 + 命中处理模式)。RAG 是门户平台服务企业知识库场景的底线能力。 +- **平台触达**:AgentKit 仅有飞书/Confluence/通用 HTTP 三种 RAG 适配器,MaxKB 原生支持企微/钉钉/飞书/Slack 多端接入。门户平台需要触达企业现有协作工具。 +- **MCP Server**:AgentKit 已有 MCP Client(`mcp/client.py`)和 MCP Server(`mcp/server.py`)基础实现,但尚未将 Skill/专家团队发布为 MCP 工具。门户平台应完善 MCP Server 的 Skill/专家团队发布能力。 +- **自研 vs 生态**:AgentKit 大量自研(Agent 引擎/LLM 网关/工作流画布/MCP 客户端/记忆系统/消息总线),commodity 层维护成本高。 + +本次演进为**预防性演进 + 必备功能补齐**,非救火式驱动。目标是补齐门户平台应有的能力,使 AgentKit 在企业级 AI Agent 平台赛道具备完整竞争力。 + +## Key Decisions + +**串行演进策略(方案 A)。** 按优先级串行推进,每个方向充分交付后再进入下一个。理由:用户要求"对标 MaxKB 功能对等",MVP 驱动难以一次达标;不设硬性时间契合串行节奏;预防性演进无紧急压力,可保障交付质量。 + +**RAG 平台并行独立。** 新建独立 RAG 平台模块,现有 `memory/` 保留给 Agent 记忆(WorkingMemory/EpisodicMemory/SemanticMemory)。理由:职责分离,避免 RAG 与 Agent 记忆耦合;RAG 平台作为门户平台基础设施服务于企业用户知识库场景,Agent 记忆服务于 Agent 运行时。 + +**开放引入生态依赖。** commodity 层优先用生态,降低维护成本。约束:需注意开源协议合规,且对已完成或进行中的特性保持向后兼容。差异化层(Agent 引擎/专家团队/自进化/终端安全)保持自研。 + +**保留现有工作流。** FlowCanvas 不替换为 LogicFlow,现有工作流画布保持自研。理由:避免破坏现有节点类型(SkillNode/ApprovalNode/ConditionNode/ParallelNode)和工作流。 + +**对标 MaxKB 功能对等。** RAG 工业级管道的成功标准是功能对等:双索引检索/智能分段/问题生成/术语表/命中处理模式/rerank 全部具备。 + +## Requirements + +### RAG 工业级管道(优先级 1) + +R1. 新建独立 RAG 平台模块,与现有 `memory/` 模块职责分离,现有 `memory/` 保留给 Agent 记忆使用。现有 `memory/local_rag.py` 的 `LocalRAGService`(pgvector + 分块 + 嵌入 + 语义检索)需明确迁移策略:吸收/扩展至新平台、提取至新模块、或新建并废弃 LocalRAGService。 + +R2. 支持双索引检索:pgvector 语义检索 + PostgreSQL 全文检索(`search_vector`),提供 `embedding`(语义)/ `keywords`(全文)/ `blend`(混合)三种检索模式。检索模式由企业用户按知识库配置默认值,Agent 运行时可按查询特征覆盖。 + +R3. 在 RAG 平台模块中实现智能分段与高级分段能力(可参考现有 `memory/chunking.py` 的分块基础),提供分段预览能力,企业用户可在向量化前查看分段结果。 + +R4. 支持问题自动生成:为文档段落自动生成相关问题/问法,提升检索召回率。 + +R5. 支持术语表(Termbase):通过全文检索分词增强,提升中文场景检索准确率。 + +R6. 支持命中处理模式:模型优化模式(LLM 基于检索结果生成回答)与直接回答模式(直接返回匹配段落),按知识库配置默认模式,企业用户在知识库设置中选择,Agent 可按查询场景覆盖。 + +R7. 支持 rerank 重排:检索结果经 rerank 模型重排后返回,提升相关性排序。 + +R8. 扩展现有 `KnowledgeBaseView`/`DocumentUpload`/`SearchTest` 组件,提供可视化文档管理:文档上传/分段预览/检索测试,企业用户可通过前端界面管理知识库。知识库必须实施 per-KB 访问控制(owner/authorized-users),Agent 检索必须限定于调用用户授权的知识库。文档上传必须验证文件类型(白名单)、强制大小限制、并在索引前净化解析内容(markdown sanitize、PDF 解析安全)。 + +### 平台触达扩展(优先级 2) + +R9. 支持多端消息接入:企微/钉钉/飞书/Slack 消息适配器,企业用户可通过现有协作工具使用 AgentKit。各平台适配器必须验证平台提供的请求签名/token(飞书 encrypt_key、钉钉 token、企微信 EncodingAESKey)后处理消息,拒绝未认证请求。所有第三方平台凭证必须存储于 secrets store(非明文配置),定义轮换策略与访问审计。 + +R10. 完善现有 MCP Server(`mcp/server.py`):支持将 Skill/专家团队发布为 MCP 工具,供外部 AI 系统调用。MCP 工具调用必须要求认证与授权(复用现有 JWT+RBAC 或 API Key 机制),发布 Skill/专家团队为 MCP 工具需管理员级授权。 + +### 生态替换降本(优先级 3) + +**目标**:将 commodity 层(MCP 客户端/异步任务/LLM Provider 适配)迁移至生态方案,降低自研维护成本,使团队聚焦差异化能力。成功标准:替换后现有功能行为不变,维护代码量减少。 + +R11. MCP 客户端替换为 `langchain-mcp-adapters`:跟进行业协议演进,降低自研 3 传输层(Stdio/HTTP/SSE)的维护成本。 + +R12. 引入 Celery 异步任务:与现有 asyncio 原生共存,承接文档向量化/批量任务(利用现有 Redis 作为 broker,不引入新基础设施),提供任务持久化/重试/调度能力。提供异步任务可视化:进度展示、失败通知与重试、任务历史。 + +R13. LLM Provider 底层替换为 LiteLLM:上层网关逻辑(fallback/缓存/用量追踪)保留自研,底层 provider 适配走 LiteLLM 统一接口。 + +## Actors + +A1. **企业用户** — 通过前端界面管理知识库(上传文档/配置分段/测试检索)、配置多端接入、发布 MCP 工具。 + +A2. **开发者** — 通过 API/MCP Server 集成 AgentKit 能力到外部系统。 + +A3. **Agent** — 运行时调用 RAG 平台检索知识库内容,支撑问答与决策。 + +## Key Flows + +F1. RAG 文档处理流程 +- **Trigger:** 企业用户上传文档到知识库。 +- **Actors:** A1, A3 +- **Steps:** 文档解析 → 分段(智能/高级)→ 分段预览 → 向量化 → 全文索引建立 → 问题自动生成 → 可用。 +- **Outcome:** 文档进入知识库,可被 Agent 检索。 + +F2. RAG 检索流程 +- **Trigger:** Agent 需要检索知识库回答用户问题。 +- **Actors:** A3 +- **Steps:** 查询接收 → 检索模式选择(embedding/keywords/blend)→ 双索引检索 → rerank 重排 → 命中处理(模型优化/直接回答)→ 返回结果。 +- **Outcome:** Agent 获得相关知识库内容。 + +F3. 多端消息接入流程 +- **Trigger:** 企业用户通过企微/钉钉/飞书/Slack 发送消息。 +- **Actors:** A1, A3 +- **Steps:** 消息适配器接收 → 转换为 AgentKit 标准格式 → Agent 处理 → 响应转换为目标平台格式 → 返回。 +- **Outcome:** 企业用户通过协作工具获得 Agent 响应。 + +F4. MCP Server 发布流程 +- **Trigger:** 企业用户或开发者将 Skill/专家团队发布为 MCP 工具。 +- **Actors:** A1, A2 +- **Steps:** 选择 Skill/专家团队 → 配置 MCP endpoint → 发布 → 外部 AI 系统可通过 MCP 协议调用。 +- **Outcome:** AgentKit 能力通过 MCP 协议对外输出。 + +## Scope Boundaries + +### Deferred for later + +- 本地模型支持(Ollama)——后续迭代,服务企业私有化部署场景。 +- 现有 `memory/` 模块重构——保留给 Agent 记忆用,不在本次演进范围。 + +### Outside this product's identity + +- FlowCanvas→LogicFlow 替换——保留现有工作流,不替换。 +- Agent 引擎(ReActEngine/ReWOO/Reflexion/PlanExec)——保持自研,是核心差异化能力。 +- 专家团队编排(流水线 + 私董会)——保持自研,生态无对应方案。 +- 自进化系统(16 组件)——保持自研,独有能力。 +- 终端安全(6 层白名单)——保持自研,安全必须自主可控。 + +## Dependencies / Assumptions + +- **开源协议合规**:生态替换涉及的依赖协议需宽松可商用。Celery (BSD-3)、LiteLLM (MIT)、langchain-mcp-adapters 需确认协议。(注:LogicFlow 已因保留 FlowCanvas 决策排除,非协议原因——Apache-2.0 本身可商用) +- **现有特性向后兼容**:生态替换(MCP 客户端/Celery/LiteLLM)不能破坏现有功能,需提供迁移路径。 +- **pgvector 基础设施**:RAG 平台与现有 EpisodicMemory 共用 pgvector 基础设施,但数据模型独立。 +- **前端组件复用**:现有 KnowledgeBaseView/DocumentUpload/SearchTest 组件可能需要重构以支撑 RAG 平台可视化文档管理。 + +## Outstanding Questions + +### Resolve Before Planning + +- **[P0 安全] MCP Server 端点缺少认证/授权决策**:R10 暴露 Skills/Expert Teams 为 MCP 工具,F4 描述发布流程但未提及认证、授权、限流或访问控制。未认证的 MCP 端点允许任何可达客户端调用 Skills、读取 Expert Team 输出,若工具具备文件系统或 shell 访问权限则构成远程代码执行面。需在规划前明确:认证方案(API Key / JWT 复用 / OAuth / mTLS)、授权模型(按 skill / team / tenant)、限流策略。(ce-doc-review 延期,security-lens,置信度 100) +- **[P0 安全] 多端消息适配器缺少输入验证**:R9 多平台消息接入(飞书/钉钉/企业微信),F3 未提及签名校验、来源认证或速率限制。外部平台消息为不可信输入边界,缺少验证允许伪造消息、注入恶意内容、触发未授权 Skill 执行。需在规划前明确:各平台签名机制(飞书 encrypt_key、钉钉 token、企微信 EncodingAESKey)、消息格式校验、重放攻击防护。(ce-doc-review 延期,security-lens,置信度 100) +- **[P0 安全] 文档上传缺少内容净化**:R1/R7 涉及用户上传文档(PDF/Word/Markdown/TXT),需求未提及内容净化、恶意文件检测或大小限制。上传文档可能包含恶意脚本(XSS via markdown)、超大文件导致 OOM、嵌入恶意宏。需在规划前明确:文件类型白名单、大小限制、内容扫描、markdown sanitize、PDF 解析安全。(ce-doc-review 延期,security-lens,置信度 100) + +### Deferred to Planning + +- 飞书消息接入适配器与现有飞书 RAG 适配器(`memory/adapters/feishu.py`)的复用程度——后续确认。 +- RAG 平台数据模型(Knowledge/Document/Paragraph/Problem/Embedding)的具体设计——ce-plan 决策。 +- Celery 与现有 asyncio 原生的共存策略——ce-plan 决策。 +- LiteLLM 与现有自研 Provider 的迁移路径——ce-plan 决策。 +- **[P1 战略] 对标 MaxKB ≠ 竞争力**:功能对等是底线而非差异化。需明确 AgentKit 相对 MaxKB 的差异化定位(专家团队/自进化/终端安全等独有能力如何与 RAG 平台协同)。(ce-doc-review 延期,product-lens,置信度 75) +- **[P1 前提] 演进前提缺少用户痛点证据**:文档以"对标 MaxKB 发现差距"为前提,但未提供用户痛点证据(用户反馈/流失原因/竞品丢失原因)。需确认:是否有用户因缺少 RAG/多端/MCP Server 而流失或抱怨?(ce-doc-review 延期,product-lens,置信度 75,ROOT) +- **[P1 前提] 未评估"不做"基线**:未评估不执行此演进计划的后果。需确认:若不演进,AgentKit 的实际损失是什么?(ce-doc-review 延期,adversarial,置信度 75,DEPENDENT) +- **[P1 战略] 同质化追赶 vs 差异化构建**:RAG/多端/MCP Server 是 commodity 能力,MaxKB 已有。需确认:投入大量资源追赶 commodity 是否优于强化差异化能力?(ce-doc-review 延期,product-lens,置信度 75) +- **[P1 战略] 重建 MaxKB 缺乏正当性论证**:RAG 平台并行独立意味着重建 MaxKB 已有的工业级管道。需确认:为何不直接集成 MaxKB 或 fork?(ce-doc-review 延期,adversarial,置信度 75,ROOT) +- **[P1 基础设施] 并行 RAG 平台导致基础设施翻倍**:独立 RAG 平台与现有 `memory/` 共用 pgvector 但数据模型独立,可能导致维护两套 RAG 基础设施。需确认:长期是否合并?(ce-doc-review 延期,product-lens,置信度 75,DEPENDENT) +- **[P1 设计] RAG 平台信息架构未定义**:RAG 平台的前端信息架构(知识库列表/文档管理/检索测试/配置页)未定义。归 ce-plan 设计。(ce-doc-review 延期,design-lens,置信度 75) +- **[P1 设计] 多端配置流程缺失**:R9 多端消息接入缺少配置流程(如何添加平台/配置凭证/测试连通性)。归 ce-plan 设计。(ce-doc-review 延期,design-lens,置信度 75) +- **[P1 安全] 知识库访问控制未指定**:R8 可视化文档管理未指定访问控制(谁可查看/编辑/删除知识库)。需明确 RBAC 模型。(ce-doc-review 延期,security-lens,置信度 75) +- **[P1 安全] 适配器凭证管理未定义**:R9 多端适配器需要管理各平台 API 凭证(app_id/app_secret/token),需求未提及凭证存储与轮换。归 ce-plan 设计。(ce-doc-review 延期,security-lens,置信度 75) +- **[P1 安全] MCP 发布授权未指定**:R10 MCP Server 发布流程未指定谁有权发布 MCP 工具。需明确发布权限模型。(ce-doc-review 延期,security-lens,置信度 75) +- **[P1 技术] PG 全文检索对中文不适用**:R2 依赖 PostgreSQL 全文检索,但 PG 原生全文检索对中文支持差(缺中文分词)。需确认:是否使用 pg_jieba/zhparser 扩展或外部搜索引擎?(ce-doc-review 延期,feasibility,置信度 75) +- **[P1 优先级] R12 优先级阻断 P1 交付**:R12(Celery)在 P3,但 R1-R8(RAG 管道)的文档向量化需要异步任务能力。需确认:P1 是否依赖 R12?(ce-doc-review 延期,scope-guardian,置信度 75) +- **[P2 证据] Celery 替换缺少必要性证据**:R12 引入 Celery 但未提供 asyncio 原生不足的证据(具体场景/性能瓶颈/故障案例)。需补充必要性论证。(ce-doc-review 延期,feasibility,置信度 75) +- **[P2 量化] LiteLLM 节省未量化**:R13 引入 LiteLLM 但未量化节省(维护代码量/开发效率/兼容 provider 数)。需补充量化数据。(ce-doc-review 延期,feasibility,置信度 75) +- **[P2 设计] MCP 配置流程未指定**:R10 MCP Server 发布流程缺少配置细节(endpoint 路径/工具命名/参数定义)。归 ce-plan 设计。(ce-doc-review 延期,design-lens,置信度 75) +- **[P2 设计] 分块预览未定义**:R3 分段预览的交互模式未定义(预览界面/编辑能力/重新分段)。归 ce-plan 设计。(ce-doc-review 延期,design-lens,置信度 75) +- **[P2 技术] 消息总线替换未处理**:现有 `bus/`(MemoryBus/RedisBus)在生态替换中未提及。需确认:是否保留自研?(ce-doc-review 延期,feasibility,置信度 75) +- **[P2 技术] Rerank 模型未处理**:R7 rerank 未指定模型(本地/API/开源)。归 ce-plan 决策。(ce-doc-review 延期,feasibility,置信度 75) +- **[P1 战略] MaxKB 对等框架解决错误问题**:MaxKB 是 RAG 知识库产品,AgentKit 是 Agent 平台。对标不同产品类别的功能对等可能构建不服务于实际用户的能力。需重构为用户结果导向的成功标准。(ce-doc-review 第 2 轮延期,product-lens+adversarial,置信度 100) +- **[P1 战略] 串行策略饿死差异化投入**:串行执行意味着差异化能力(Agent 引擎/专家团队/自进化)在 P1/P2 完成前零投入。需考虑预留差异化并行轨道。(ce-doc-review 第 2 轮延期,product-lens,置信度 75) +- **[P1 战略] Build-vs-buy 未评估**:R1-R8 从零构建工业级 RAG 管道,未评估集成 MaxKB 或采用 RAG 框架(LlamaIndex/Haystack)。需补充 build-vs-buy 评估。(ce-doc-review 第 2 轮延期,product-lens+adversarial,置信度 100) +- **[P1 前提] 核心替换向后兼容性是假设非验证**:R11/R12/R13 替换三个核心组件但假设"现有功能行为不变"。现有 LLM 网关有 6 provider + fallback + 语义缓存 + RemoteLLMProvider 代理。需将向后兼容从假设转为验证前提。(ce-doc-review 第 2 轮延期,adversarial,置信度 75) +- **[P1 设计] 文档处理失败状态缺失**:F1 未定义解析失败/不支持格式/向量化错误的用户可见状态。归 ce-plan 设计。(ce-doc-review 第 2 轮延期,design-lens,置信度 75) +- **[P1 设计] 分段预览交互模式未定义**:R3 "查看分段结果"是只读还是可编辑(合并/拆分/重新分段)未定义。归 ce-plan 设计。(ce-doc-review 第 2 轮延期,design-lens,置信度 75) +- **[P1 设计] 多端配置与认证流程缺失**:F3 缺少多端 onboarding 流程(webhook 配置/OAuth/app 凭证/连通性测试)。归 ce-plan 设计。(ce-doc-review 第 2 轮延期,design-lens,置信度 75) +- **[P2 战略] 维护成本痛点延期至 P3**:Problem Frame 声明"commodity 层维护成本高"但解决方案在最低优先级 P3。需考虑将高杠杆替换(如 LiteLLM)提前并行。(ce-doc-review 第 2 轮延期,product-lens,置信度 75) +- **[P2 战略] R11-R13 是技术债非产品需求**:R11-R13 成功标准是"现有功能行为不变"(零用户可见影响),无 Actor 受益。需考虑移至独立工程债轨道。(ce-doc-review 第 2 轮延期,product-lens,置信度 75) +- **[P2 战略] 门户触达(P2)反转门户价值主张**:门户平台的核心价值是触达,但多端接入在 P2。需考虑在 P1 并行交付至少一个高价值渠道。(ce-doc-review 第 2 轮延期,product-lens,置信度 75) +- **[P2 技术] Celery 缺乏必要性论证**:R12 引入 Celery 但未论证 asyncio 不足。文档向量化是 CPU-bound(可用 ProcessPoolExecutor),批量任务是 I/O-bound(asyncio 强项)。需补充具体场景。(ce-doc-review 第 2 轮延期,scope-guardian+adversarial,置信度 75) +- **[P2 技术] LiteLLM 替换覆盖缺口**:R13 未评估 LiteLLM 对 6 个现有 provider(尤其 Doubao/Wenxin/Yuanbao)的覆盖,以及语义缓存/用量追踪/RemoteLLMProvider 代理等网关特性。需补充 feature-gap 分析。(ce-doc-review 第 2 轮延期,scope-guardian+adversarial,置信度 75) +- **[P2 战略] 串行策略阻断 MCP Server**:R10 完善现有 MCP 基础设施,无依赖 RAG 或多端。串行策略将其阻断在两个无关工作流之后。需考虑解耦 R10。(ce-doc-review 第 2 轮延期,adversarial,置信度 75) +- **[P2 设计] MCP 发布配置细节未定义**:F4 "配置 MCP endpoint"未定义配置字段(工具名称/描述/输入 schema/鉴权方式/发布前测试)。归 ce-plan 设计。(ce-doc-review 第 2 轮延期,design-lens,置信度 75) +- **[P2 设计] 新 RAG 平台门户 IA 未定义**:R1 新建独立 RAG 平台模块,但未定义其在门户导航中的位置(顶级 section 还是扩展现有知识库管理区)。归 ce-plan 设计。(ce-doc-review 第 2 轮延期,design-lens,置信度 75) + +## Sources / Research + +- MaxKB 系统架构:https://maxkb.cn/docs/v1/system_arch/ +- MaxKB 技术解析:https://juejin.cn/post/7650428235188420651 +- MaxKB GitHub:https://github.com/1Panel-dev/MaxKB +- AgentKit 代码库:`memory/`(RAG 基础组件)、`server/routes/`(22 路由模块)、`src/agentkit/server/frontend/src/components/`(前端组件) +- AgentKit 项目规则:`AGENTS.md`、`CLAUDE.md` diff --git a/docs/plans/2026-06-24-001-feat-bitable-companion-service-plan.md b/docs/plans/2026-06-24-001-feat-bitable-companion-service-plan.md new file mode 100644 index 0000000..725e12e --- /dev/null +++ b/docs/plans/2026-06-24-001-feat-bitable-companion-service-plan.md @@ -0,0 +1,868 @@ +--- +title: "feat: 多维表格(Bitable)伴生服务 v1" +status: active +date: 2026-06-24 +deepened: 2026-06-24 +type: feat +origin: docs/brainstorms/2026-06-24-bitable-module-requirements.md +--- + +# 多维表格(Bitable)伴生服务 v1 实现规划 + +## Summary + +为 AgentKit 引入多维表格伴生服务,作为异构数据源(Excel/数据库/爬虫API)的统一持久化落地载体。Agent 是数据的主要作者(采集写入),用户在落地后的表上精修、配视图、做分析。v1 验证"采集→落地→网格视图→基础公式→附件"核心闭环。 + +本服务逻辑独立(自有 API/CLI/领域模型/存储),当前共部署、UI 级集成,未来可零成本抽离。 + +## Problem Frame + +AgentKit 缺少统一的持久化结构化数据落地载体。Excel 导出是单向的(`src/agentkit/documents/renderers/excel_renderer.py`),Excel 解析只转文本进 RAG(`src/agentkit/memory/document_loader.py`),`SharedWorkspace` 是带 TTL 的临时 KV。没有模块能把异构数据源的结构化数据持久化为可编辑、可视图、可计算的多维表格。 + +详见 origin: `docs/brainstorms/2026-06-24-bitable-module-requirements.md`。 + +## Requirements + +源自需求文档 v1 范围: + +| ID | 需求 | 来源 | +|----|------|------| +| R1 | 服务骨架:领域模型(表/字段/记录/视图)、API/CLI、独立 schema 存储 | 需求文档 §5 v1 | +| R2 | 字段所有权模型 + 按主键 upsert 语义(数据列归 Agent,用户列保留) | 需求文档 §4.1 | +| R3 | 三类采集落地(Excel 上传/URL、数据库导入、爬虫/API 采集) | 需求文档 §2 | +| R4 | 网格视图(排序/筛选/分页/单元格编辑) | 需求文档 §5 v1 | +| R5 | 基础公式列(算术/字符串/SUM/AVG/COUNT)+ 基础引用列(lookup) | 需求文档 §4.2 | +| R6 | 图片/附件字段类型(复用现有文件上传能力) | 需求文档 §5 v1 | +| R7 | 异步公式重算 + "计算中"状态标记 | 需求文档 §4.2 | +| R8 | 伴生服务架构:API/CLI 调用边界,不做进程内紧耦合 | 需求文档 §3 | + +**成功标准**(源自需求文档 §9):Agent 能把 Excel/DB/API 数据写入多维表格;用户能编辑单元格、新增公式列、看到异步重算结果;重复采集时按主键 upsert 保留用户列;服务通过 API/CLI 被调用。 + +--- + +## Key Technical Decisions + +### KTD1: 存储选用 PostgreSQL(非 SQLite),跟随 evolution/memory 模式 + +现有伴生子系统(calendar/documents/auth)用 SQLite + 独立 `.db` 文件。bitable **偏离此模式**,改用 PostgreSQL + 独立 schema,跟随 `src/agentkit/evolution/pg_store.py` 和 `src/agentkit/memory/models.py` 的 PostgreSQL 模式。 + +**理由**:需求文档要求可演进到单表 10万+行 + 并发写入(Agent 采集 + 用户编辑同时)。SQLite 的并发写锁和单文件规模是硬瓶颈。PostgreSQL 的 JSONB 查询能力、行级并发、索引支持是 bitable 的刚需。 + +**代价**:bitable 要求部署环境配置 PostgreSQL(不像 calendar/documents 开箱即用 SQLite)。这是可接受的——需求文档已明确"共享 PG + 独立 schema"。 + +**模式参考**:`src/agentkit/evolution/pg_store.py`(PGBase 独立 + 延迟初始化 + 锁防并发)、`src/agentkit/memory/models.py`(SQLAlchemy 2 declarative + JSONB + pgvector)。 + +### KTD2: 存储模型——字段定义表 + 记录表(JSONB 存值) + +不用 EAV(一行一单元格,100k×20=200万行太慢),不用动态列(加列要 DDL)。采用: + +- **字段定义表** `bitable_fields`:每行一个字段定义(名称、类型、配置、所有权) +- **记录表** `bitable_records`:每行一条记录,`values` 列为 JSONB(`{field_id: value}`) + +这是 Airtable/飞书多维表格的标准模式。JSONB 支持 GIN 索引和 `->>` 查询,兼顾灵活性与查询性能。加列/删列只改字段定义表,不动记录表结构。 + +### KTD3: 公式引擎——自研 Python 轻量引擎 + +不引入 HyperFormula(商业付费)、pycel(GPL 传染风险)、formulas(EUPL 边界模糊)。自研,因为 v1 函数集小(10-50 个)。 + +**架构**:`ast`/`pyparsing` 解析公式为 AST → 构建 DAG(字段依赖关系)→ Kahn 算法拓扑排序 → DFS 检测循环引用 → 增量重算(仅重算受影响下游)。 + +**重算策略**:数据列写入 → 标记依赖该列的公式列为"计算中" → 异步队列按拓扑序重算 → 结果写回记录 JSONB → 状态置"完成"。 + +`ponytail:` 自研引擎的 O(V+E) 拓扑重算在万级公式单元格下足够;若未来公式量到十万级或需 Excel 100% 兼容,升级路径为迁移到 Univer 引擎(Apache-2.0,免费商用)。 + +### KTD4: 网格视图组件——vxe-table(MIT) + +不选 Handsontable(商业付费)、ag-grid Enterprise(付费功能)、a-table 裸用(10k+ 行无虚拟滚动)。选 vxe-table:Vue 3 原生 + TS、MIT、横向+纵向虚拟滚动、可编辑 CRUD、自定义渲染器(插槽实现附件/图片/公式列)。 + +公式列由后端计算后回填值,前端只渲染(不前端算公式)。 + +### KTD5: 服务边界——REST API 即使共部署也走 HTTP + +需求文档要求"API/CLI 调用边界,不做进程内紧耦合"。即使 bitable 与 AgentKit 共进程部署,Agent 调用 bitable 也走 localhost REST API(`/api/v1/bitable/*`),而非直接 import service 类。 + +**理由**:满足伴生服务契约,未来抽离为零成本。代价是本地 HTTP 往返开销(可忽略)。 + +**例外**:CLI 命令(`agentkit bitable ...`)可直接调用 service 层(CLI 是运维工具,不是运行时调用路径)。 + +### KTD6: 字段所有权——field 元数据 `owner` 字段 + 自动推断 + +`bitable_fields` 表增加 `owner` 列(`agent` | `user`)。自动推断规则:公式列/引用列/手动标注列 → `user`;Agent 采集写入的列 → `agent`。Agent 采集时可显式声明覆盖推断。 + +upsert 时只更新 `owner=agent` 的字段值,`owner=user` 的字段值原样保留。 + +### KTD7: 公式引擎安全约束——受限 AST walker + 白名单节点 + +`ast.parse` 后**禁止直接 `eval()`**。必须实现受限 AST walker,仅允许白名单节点类型:`Expression`、`BinOp`、`UnaryOp`、`BoolOp`、`Compare`、`Call`(仅已注册函数)、`Name`(仅字段引用)、`Constant`、`IfExp`。 + +**禁用节点**:`Attribute`(防 `__import__`)、`Subscript`、`Lambda`、`Import`/`ImportFrom`、`Assign`/`AugAssign`、`For`/`While`、`FunctionDef`/`ClassDef`、`Subscript`、`Await`、`Yield`。遇到禁用节点立即抛出 `FormulaSecurityError`。 + +**理由**:公式字符串来自用户输入和 Agent 输出,是信任边界。`ast.eval` 的 `eval` 模式仍允许 `__builtins__` 访问。受限 walker 是唯一安全方案。 + +**模式参考**:Python `ast` 模块的 `NodeVisitor` + 白名单校验,类似 bandit 的 AST 检查模式。 + +### KTD8: Upsert 用 `jsonb_set` 逐字段合并,禁止整行替换 + +upsert 更新 agent 列时,**禁止** `UPDATE ... SET values = :new_values`(整行替换会覆盖 user 列)。必须用 `jsonb_set` 逐字段合并: + +```sql +-- ponytail: 逐字段 jsonb_set,O(字段数) per record,万级批量 upsert 可接受 +UPDATE bitable_records +SET values = jsonb_set(values, :field_path, :field_value, true) +WHERE id = :record_id +``` + +对每条记录的每个 agent 列执行一次 `jsonb_set`,或在单条 SQL 中嵌套多个 `jsonb_set`。user 列(`owner=user`)的值绝不出现在 UPDATE 语句中。 + +**理由**:整行替换是 upsert 语义破坏的最常见实现错误。`jsonb_set` 逐字段合并是唯一能保证"只更新 agent 列、保留 user 列"的正确实现。 + +### KTD9: 记录分页用 cursor-based,非 offset-based + +`GET /tables/{id}/records` 分页用 cursor(`?cursor=...&limit=50`),非 `?offset=0&limit=50`。 + +**理由**:offset 分页在 100k 行时深翻页慢(`OFFSET 50000` 仍扫描前 5 万行)。cursor 分页用 `WHERE id > :cursor ORDER BY id LIMIT :limit`,恒定性能。代价是不支持随机跳页(v1 不需要——网格视图是连续滚动)。 + +`ponytail:` cursor 分页不支持跳页;未来若需"跳到第 N 页",升级路径为 keyset + 估算偏移或预计算页索引。 + +### KTD10: vxe-table 与 Ant Design Vue CSS 隔离 + +vxe-table 引入全局 CSS(`.vxe-*` 前缀),可能与 Ant Design Vue 的 `.ant-*` 样式冲突。隔离策略: + +1. vxe-table 样式通过 `@import` 局部引入到 `BitableGrid.vue` 的 ` diff --git a/src/agentkit/server/frontend/src/components/bitable/BitableGrid.vue b/src/agentkit/server/frontend/src/components/bitable/BitableGrid.vue new file mode 100644 index 0000000..5491c70 --- /dev/null +++ b/src/agentkit/server/frontend/src/components/bitable/BitableGrid.vue @@ -0,0 +1,227 @@ + + + + + diff --git a/src/agentkit/server/frontend/src/components/bitable/FieldConfigForm.vue b/src/agentkit/server/frontend/src/components/bitable/FieldConfigForm.vue new file mode 100644 index 0000000..4d97479 --- /dev/null +++ b/src/agentkit/server/frontend/src/components/bitable/FieldConfigForm.vue @@ -0,0 +1,186 @@ + + + + + diff --git a/src/agentkit/server/frontend/src/components/bitable/FieldManagePanel.vue b/src/agentkit/server/frontend/src/components/bitable/FieldManagePanel.vue new file mode 100644 index 0000000..2923fd1 --- /dev/null +++ b/src/agentkit/server/frontend/src/components/bitable/FieldManagePanel.vue @@ -0,0 +1,249 @@ + + + + + diff --git a/src/agentkit/server/frontend/src/components/bitable/FilterBuilder.vue b/src/agentkit/server/frontend/src/components/bitable/FilterBuilder.vue new file mode 100644 index 0000000..c95756a --- /dev/null +++ b/src/agentkit/server/frontend/src/components/bitable/FilterBuilder.vue @@ -0,0 +1,179 @@ + + + + + diff --git a/src/agentkit/server/frontend/src/components/bitable/ImageCell.vue b/src/agentkit/server/frontend/src/components/bitable/ImageCell.vue new file mode 100644 index 0000000..5d8e425 --- /dev/null +++ b/src/agentkit/server/frontend/src/components/bitable/ImageCell.vue @@ -0,0 +1,117 @@ + + + + + diff --git a/src/agentkit/server/frontend/src/components/bitable/TableCreateModal.vue b/src/agentkit/server/frontend/src/components/bitable/TableCreateModal.vue new file mode 100644 index 0000000..17c8d72 --- /dev/null +++ b/src/agentkit/server/frontend/src/components/bitable/TableCreateModal.vue @@ -0,0 +1,103 @@ + + + diff --git a/src/agentkit/server/frontend/src/components/bitable/TableViewList.vue b/src/agentkit/server/frontend/src/components/bitable/TableViewList.vue new file mode 100644 index 0000000..a8cd81d --- /dev/null +++ b/src/agentkit/server/frontend/src/components/bitable/TableViewList.vue @@ -0,0 +1,121 @@ + + + + + diff --git a/src/agentkit/server/frontend/src/components/bitable/ViewConfigPanel.vue b/src/agentkit/server/frontend/src/components/bitable/ViewConfigPanel.vue new file mode 100644 index 0000000..8205228 --- /dev/null +++ b/src/agentkit/server/frontend/src/components/bitable/ViewConfigPanel.vue @@ -0,0 +1,175 @@ + + + + + diff --git a/src/agentkit/server/frontend/src/components/bitable/ViewSwitcher.vue b/src/agentkit/server/frontend/src/components/bitable/ViewSwitcher.vue new file mode 100644 index 0000000..4b33441 --- /dev/null +++ b/src/agentkit/server/frontend/src/components/bitable/ViewSwitcher.vue @@ -0,0 +1,83 @@ + + + + + diff --git a/src/agentkit/server/frontend/src/stores/bitable.ts b/src/agentkit/server/frontend/src/stores/bitable.ts new file mode 100644 index 0000000..5c3431b --- /dev/null +++ b/src/agentkit/server/frontend/src/stores/bitable.ts @@ -0,0 +1,381 @@ +/** + * Pinia store for bitable feature — tables, fields, records, views, + * and formula recalc status polling. + * + * ponytail: Formula recalc polling uses a simple setInterval. The polling + * stops when no records have pending/calculating formula fields. + * Ceiling: one polling timer per store instance; if multiple BitableGrid + * components mount simultaneously they share the same store-level timer. + */ + +import { defineStore } from 'pinia' +import { ref, computed } from 'vue' +import { notification } from 'ant-design-vue' +import { bitableApi } from '@/api/bitable' +import type { + IBitableTable, + IBitableField, + IBitableRecord, + IBitableView, + FieldType, +} from '@/api/bitable' + +export const useBitableStore = defineStore('bitable', () => { + // --- State --- + const tables = ref([]) + const currentTable = ref(null) + const fields = ref([]) + const records = ref([]) + const views = ref([]) + const currentView = ref(null) + const isLoading = ref(false) + const error = ref(null) + const nextCursor = ref(null) + const recalcPendingCount = ref(0) + + // Polling timer for formula recalc status + let _pollTimer: ReturnType | null = null + const POLL_INTERVAL = 2000 // 2s per plan + + // --- Getters --- + const formulaFields = computed(() => + fields.value.filter((f) => f.field_type === 'formula'), + ) + + const hasFormulaFields = computed(() => formulaFields.value.length > 0) + + // --- Actions --- + + /** Load all bitable tables */ + async function loadTables(): Promise { + isLoading.value = true + error.value = null + try { + const resp = await bitableApi.listTables() + tables.value = resp.tables || [] + } catch (err) { + error.value = err instanceof Error ? err.message : '加载表格列表失败' + notification.error({ message: '加载失败', description: error.value }) + } finally { + isLoading.value = false + } + } + + /** Select a table and load its fields + records */ + async function selectTable(tableId: string): Promise { + stopPolling() + records.value = [] + nextCursor.value = null + recalcPendingCount.value = 0 + views.value = [] + currentView.value = null + + const table = tables.value.find((t) => t.id === tableId) + currentTable.value = table || null + + if (!table) return + + try { + const [fieldsResp, recordsResp, viewsResp] = await Promise.all([ + bitableApi.listFields(tableId), + bitableApi.listRecords(tableId, { limit: 100 }), + bitableApi.listViews(tableId), + ]) + fields.value = fieldsResp.fields || [] + records.value = recordsResp.records || [] + nextCursor.value = recordsResp.next_cursor + views.value = viewsResp.views || [] + + // Start polling if there are formula fields + if (hasFormulaFields.value) { + startPolling(tableId) + } + } catch (err) { + error.value = err instanceof Error ? err.message : '加载表格数据失败' + notification.error({ message: '加载失败', description: error.value }) + } + } + + /** Load more records (cursor pagination) */ + async function loadMoreRecords(): Promise { + if (!currentTable.value || !nextCursor.value) return + try { + const resp = await bitableApi.listRecords(currentTable.value.id, { + cursor: nextCursor.value, + limit: 100, + }) + records.value.push(...(resp.records || [])) + nextCursor.value = resp.next_cursor + } catch (err) { + notification.error({ + message: '加载更多失败', + description: err instanceof Error ? err.message : String(err), + }) + } + } + + /** Update a single cell value */ + async function updateCell( + recordId: string, + fieldId: string, + value: unknown, + ): Promise { + try { + const resp = await bitableApi.updateRecord(recordId, { [fieldId]: value }) + // Update local state + const idx = records.value.findIndex((r) => r.id === recordId) + if (idx >= 0) { + records.value[idx] = resp.record + } + } catch (err) { + notification.error({ + message: '更新失败', + description: err instanceof Error ? err.message : String(err), + }) + } + } + + /** Add a new field */ + async function addField( + name: string, + fieldType: FieldType, + config?: Record, + ): Promise { + if (!currentTable.value) return null + try { + const resp = await bitableApi.createField(currentTable.value.id, { + name, + field_type: fieldType, + config, + }) + fields.value.push(resp.field) + return resp.field + } catch (err) { + notification.error({ + message: '创建字段失败', + description: err instanceof Error ? err.message : String(err), + }) + return null + } + } + + /** Create a new table */ + async function createTable( + name: string, + description?: string, + ): Promise { + try { + const resp = await bitableApi.createTable({ name, description }) + tables.value.push(resp.table) + return resp.table + } catch (err) { + notification.error({ + message: '创建表格失败', + description: err instanceof Error ? err.message : String(err), + }) + return null + } + } + + /** Update an existing field */ + async function updateField( + fieldId: string, + data: { name?: string; config?: Record }, + ): Promise { + try { + const resp = await bitableApi.updateField(fieldId, data) + const idx = fields.value.findIndex((f) => f.id === fieldId) + if (idx >= 0) { + fields.value[idx] = resp.field + } + return resp.field + } catch (err) { + notification.error({ + message: '更新字段失败', + description: err instanceof Error ? err.message : String(err), + }) + return null + } + } + + /** Delete a field; returns dependencies on 409 */ + async function deleteField( + fieldId: string, + force = false, + ): Promise<{ success: boolean; dependencies?: Record }> { + try { + await bitableApi.deleteField(fieldId, force) + fields.value = fields.value.filter((f) => f.id !== fieldId) + return { success: true } + } catch (err) { + const apiErr = err as { status?: number; detail?: unknown } + // 409 = has dependencies, return them for UI confirmation + if (apiErr.status === 409 && apiErr.detail) { + const detail = apiErr.detail as Record + return { success: false, dependencies: detail.dependencies as Record } + } + notification.error({ + message: '删除字段失败', + description: err instanceof Error ? err.message : String(err), + }) + return { success: false } + } + } + + /** Refresh records (e.g. after Agent writes data via BitableTool) */ + async function refreshRecords(): Promise { + if (!currentTable.value) return + try { + const filters = currentView.value?.config?.filters as unknown[] | undefined + const resp = await bitableApi.listRecords(currentTable.value.id, { + limit: 100, + filters: filters ? JSON.stringify(filters) : undefined, + }) + records.value = resp.records || [] + nextCursor.value = resp.next_cursor + } catch (err) { + // Silent fail on refresh — user didn't explicitly request it + console.warn('Failed to refresh records:', err) + } + } + + // --- View management (U5c) --- + + /** Create a new view for the current table */ + async function createView( + name: string, + viewType: IBitableView['view_type'] = 'grid', + config?: Record, + ): Promise { + if (!currentTable.value) return null + try { + const resp = await bitableApi.createView(currentTable.value.id, { + name, + view_type: viewType, + config, + }) + views.value.push(resp.view) + currentView.value = resp.view + return resp.view + } catch (err) { + notification.error({ + message: '创建视图失败', + description: err instanceof Error ? err.message : String(err), + }) + return null + } + } + + /** Update a view's config (filters/sorts/hidden fields) */ + async function updateView( + viewId: string, + data: { name?: string; config?: Record }, + ): Promise { + try { + const resp = await bitableApi.updateView(viewId, data) + const idx = views.value.findIndex((v) => v.id === viewId) + if (idx >= 0) { + views.value[idx] = resp.view + } + if (currentView.value?.id === viewId) { + currentView.value = resp.view + // Re-query records with updated view config + await refreshRecords() + } + } catch (err) { + notification.error({ + message: '更新视图失败', + description: err instanceof Error ? err.message : String(err), + }) + } + } + + /** Switch to a view — applies its config to the records query */ + async function switchView(viewId: string): Promise { + const view = views.value.find((v) => v.id === viewId) + if (!view) return + currentView.value = view + await refreshRecords() + } + + // --- Formula recalc polling (R7) --- + + /** Start polling for formula recalc status */ + function startPolling(tableId: string): void { + stopPolling() + _pollTimer = setInterval(async () => { + await pollRecalcStatus(tableId) + }, POLL_INTERVAL) + } + + /** Stop the polling timer */ + function stopPolling(): void { + if (_pollTimer !== null) { + clearInterval(_pollTimer) + _pollTimer = null + } + } + + /** Poll recalc status: reload records if any formula fields are still calculating */ + async function pollRecalcStatus(tableId: string): Promise { + try { + const resp = await bitableApi.listRecords(tableId, { limit: 100 }) + const newRecords = resp.records || [] + + // Single traversal: collect pending records (formula field values still null) + const pending = newRecords.filter((rec) => + formulaFields.value.some((f) => rec.values[f.id] == null), + ) + const stillCalculating = pending.length > 0 + + // Only update state if records actually changed (avoid unnecessary re-renders) + const oldIds = records.value.map((r) => r.id).join(',') + const newIds = newRecords.map((r) => r.id).join(',') + if (oldIds !== newIds || stillCalculating) { + records.value = newRecords + nextCursor.value = resp.next_cursor + } + + if (stillCalculating) { + recalcPendingCount.value = pending.length + } else { + recalcPendingCount.value = 0 + stopPolling() + } + } catch (err) { + // Silent fail on poll — don't spam notifications + console.warn('Recalc poll failed:', err) + } + } + + return { + // State + tables, + currentTable, + fields, + records, + views, + currentView, + isLoading, + error, + nextCursor, + recalcPendingCount, + // Getters + formulaFields, + hasFormulaFields, + // Actions + loadTables, + selectTable, + loadMoreRecords, + updateCell, + addField, + createTable, + updateField, + deleteField, + refreshRecords, + createView, + updateView, + switchView, + stopPolling, + } +}) diff --git a/src/agentkit/server/frontend/src/views/BitableView.vue b/src/agentkit/server/frontend/src/views/BitableView.vue new file mode 100644 index 0000000..bf0fe9f --- /dev/null +++ b/src/agentkit/server/frontend/src/views/BitableView.vue @@ -0,0 +1,294 @@ + + + + + diff --git a/src/agentkit/server/routes/bitable.py b/src/agentkit/server/routes/bitable.py new file mode 100644 index 0000000..4fe1c72 --- /dev/null +++ b/src/agentkit/server/routes/bitable.py @@ -0,0 +1,605 @@ +"""REST API routes for the bitable companion service. + +All endpoints are prefixed ``/api/v1/bitable``. Auth via ``require_bitable_auth`` +which accepts either a user JWT (``Authorization: Bearer``) or an internal +service token (``X-Internal-Token``) per KTD11. Service is obtained from +``app.state.bitable_service`` (503 if not initialized). +""" + +from __future__ import annotations + +import hmac +import logging +import os +import uuid +from pathlib import Path +from typing import Any + +from fastapi import ( + APIRouter, + Depends, + File, + HTTPException, + Query, + Request, + UploadFile, +) +from fastapi.responses import FileResponse +from pydantic import BaseModel, Field + +from agentkit.bitable.models import FieldOwner, FieldType, ViewType +from agentkit.bitable.service import BitableService, FieldDependencyError +from agentkit.server.auth.dependencies import get_current_user + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/bitable", tags=["bitable"]) + +# ── Upload config (U6) ─────────────────────────────────── +MAX_UPLOAD_SIZE = 10 * 1024 * 1024 # 10 MB +BITABLE_UPLOAD_DIR = Path(os.environ.get("AGENTKIT_BITABLE_UPLOAD_DIR", "data/uploads/bitable")) +_IMAGE_MIME_PREFIXES = ("image/",) + + +def _get_service(request: Request) -> BitableService: + """Get bitable service from app.state, 503 if not initialized.""" + service = getattr(request.app.state, "bitable_service", None) + if service is None: + raise HTTPException( + status_code=503, + detail="Bitable service not available. Server may not have initialized it.", + ) + return service + + +async def require_bitable_auth(request: Request) -> dict[str, Any]: + """Bitable-specific auth: accept JWT (via middleware) OR X-Internal-Token (KTD11). + + The internal token is compared in constant time (hmac.compare_digest). + On success with the internal token, a synthetic service user is returned. + """ + # 1. Check internal service token (KTD11) + internal_token = getattr(request.app.state, "bitable_internal_token", None) + if internal_token: + provided = request.headers.get("X-Internal-Token", "") + if provided and hmac.compare_digest(provided, internal_token): + return { + "user_id": "__bitable_internal__", + "username": "bitable-internal", + "role": "admin", + "internal": True, + } + + # 2. Fall back to JWT auth + user = await get_current_user(request) + if user is None: + raise HTTPException( + status_code=401, + detail="Authentication required (JWT or X-Internal-Token)", + ) + return user + + +async def _check_table_ownership( + service: BitableService, table_id: str, user: dict[str, Any] +) -> None: + """Verify the user owns the table. Internal service users bypass check. + + Raises 404 if table not found, 403 if not owned. + """ + table = await service.get_table(table_id) + if table is None: + raise HTTPException(status_code=404, detail="Table not found") + if user.get("internal"): + return # Internal service token (KTD11) bypasses ownership + if table.owner_user_id and table.owner_user_id != user.get("user_id"): + raise HTTPException(status_code=403, detail="Not authorized to access this table") + + +# --------------------------------------------------------------------------- +# Request models +# --------------------------------------------------------------------------- + + +class CreateTableRequest(BaseModel): + name: str + description: str = "" + primary_key_field_id: str | None = None + + +class UpdateTableRequest(BaseModel): + name: str | None = None + description: str | None = None + primary_key_field_id: str | None = None + + +class CreateFieldRequest(BaseModel): + name: str + field_type: FieldType + config: dict[str, Any] = Field(default_factory=dict) + owner: FieldOwner = FieldOwner.user + + +class UpdateFieldRequest(BaseModel): + name: str | None = None + config: dict[str, Any] | None = None + + +class CreateRecordRequest(BaseModel): + values: dict[str, Any] = Field(default_factory=dict) + + +class BatchCreateRecordsRequest(BaseModel): + records: list[dict[str, Any]] = Field(default_factory=list, max_length=500) + + +class UpdateRecordRequest(BaseModel): + values: dict[str, Any] = Field(default_factory=dict) + + +class UpsertRequest(BaseModel): + records: list[dict[str, Any]] + primary_key_field_id: str + + +class CreateViewRequest(BaseModel): + name: str + view_type: ViewType = ViewType.grid + config: dict[str, Any] = Field(default_factory=dict) + + +class UpdateViewRequest(BaseModel): + name: str | None = None + config: dict[str, Any] | None = None + + +# --------------------------------------------------------------------------- +# Table endpoints +# --------------------------------------------------------------------------- + + +@router.post("/tables", status_code=201) +async def create_table( + body: CreateTableRequest, + request: Request, + user: dict = Depends(require_bitable_auth), +) -> dict[str, Any]: + service = _get_service(request) + table = await service.create_table( + name=body.name, + description=body.description, + primary_key_field_id=body.primary_key_field_id, + owner_user_id=user.get("user_id"), + ) + return {"success": True, "table": table.model_dump(mode="json")} + + +@router.get("/tables") +async def list_tables( + request: Request, + user: dict = Depends(require_bitable_auth), +) -> dict[str, Any]: + service = _get_service(request) + tables = await service.list_tables(owner_user_id=user.get("user_id")) + return {"success": True, "tables": [t.model_dump(mode="json") for t in tables]} + + +@router.get("/tables/{table_id}") +async def get_table( + table_id: str, + request: Request, + user: dict = Depends(require_bitable_auth), +) -> dict[str, Any]: + service = _get_service(request) + await _check_table_ownership(service, table_id, user) + table = await service.get_table(table_id) + return {"success": True, "table": table.model_dump(mode="json")} + + +@router.patch("/tables/{table_id}") +async def update_table( + table_id: str, + body: UpdateTableRequest, + request: Request, + user: dict = Depends(require_bitable_auth), +) -> dict[str, Any]: + service = _get_service(request) + await _check_table_ownership(service, table_id, user) + kwargs = body.model_dump(exclude_none=True) + table = await service.update_table(table_id, **kwargs) + if table is None: + raise HTTPException(status_code=404, detail="Table not found") + return {"success": True, "table": table.model_dump(mode="json")} + + +@router.delete("/tables/{table_id}") +async def delete_table( + table_id: str, + request: Request, + user: dict = Depends(require_bitable_auth), +) -> dict[str, Any]: + service = _get_service(request) + await _check_table_ownership(service, table_id, user) + deleted = await service.delete_table(table_id) + if not deleted: + raise HTTPException(status_code=404, detail="Table not found") + return {"success": True} + + +# --------------------------------------------------------------------------- +# Field endpoints +# --------------------------------------------------------------------------- + + +@router.post("/tables/{table_id}/fields", status_code=201) +async def create_field( + table_id: str, + body: CreateFieldRequest, + request: Request, + user: dict = Depends(require_bitable_auth), +) -> dict[str, Any]: + service = _get_service(request) + await _check_table_ownership(service, table_id, user) + field = await service.create_field( + table_id=table_id, + name=body.name, + field_type=body.field_type, + config=body.config, + owner=body.owner, + ) + return {"success": True, "field": field.model_dump(mode="json")} + + +@router.get("/tables/{table_id}/fields") +async def list_fields( + table_id: str, + request: Request, + user: dict = Depends(require_bitable_auth), +) -> dict[str, Any]: + service = _get_service(request) + await _check_table_ownership(service, table_id, user) + fields = await service.list_fields(table_id) + return {"success": True, "fields": [f.model_dump(mode="json") for f in fields]} + + +@router.patch("/fields/{field_id}") +async def update_field( + field_id: str, + body: UpdateFieldRequest, + request: Request, + user: dict = Depends(require_bitable_auth), +) -> dict[str, Any]: + service = _get_service(request) + kwargs = body.model_dump(exclude_none=True) + field = await service.update_field(field_id, **kwargs) + if field is None: + raise HTTPException(status_code=404, detail="Field not found") + return {"success": True, "field": field.model_dump(mode="json")} + + +@router.delete("/fields/{field_id}") +async def delete_field( + field_id: str, + request: Request, + force: bool = Query(False, description="Force delete with cascade cleanup"), + user: dict = Depends(require_bitable_auth), +) -> dict[str, Any]: + service = _get_service(request) + try: + deleted = await service.delete_field(field_id, force=force) + except FieldDependencyError as e: + raise HTTPException( + status_code=409, + detail={"message": str(e), "dependencies": e.dependencies}, + ) + if not deleted: + raise HTTPException(status_code=404, detail="Field not found") + return {"success": True} + + +# --------------------------------------------------------------------------- +# Formula validation endpoint (U5b) +# --------------------------------------------------------------------------- + + +class ValidateFormulaRequest(BaseModel): + """Request body for formula syntax validation.""" + + formula: str = Field(..., min_length=1, max_length=2000) + + +@router.post("/fields/validate-formula") +async def validate_formula( + body: ValidateFormulaRequest, + user: dict = Depends(require_bitable_auth), +) -> dict[str, Any]: + """Validate formula syntax without saving. Returns valid flag + error detail.""" + from agentkit.bitable.formula import ( + FormulaParseError, + FormulaSecurityError, + UnknownFunctionError, + parse_formula, + ) + + try: + parse_formula(body.formula) + except (FormulaParseError, FormulaSecurityError, UnknownFunctionError) as e: + return {"valid": False, "error": str(e)} + except Exception as e: # pragma: no cover — defensive + return {"valid": False, "error": f"Unexpected error: {e}"} + return {"valid": True} + + +# --------------------------------------------------------------------------- +# Record endpoints +# --------------------------------------------------------------------------- + + +@router.post("/tables/{table_id}/records", status_code=201) +async def create_records( + table_id: str, + body: BatchCreateRecordsRequest, + request: Request, + user: dict = Depends(require_bitable_auth), +) -> dict[str, Any]: + service = _get_service(request) + await _check_table_ownership(service, table_id, user) + created = [] + for rec_values in body.records: + record = await service.create_record(table_id, values=rec_values) + created.append(record.model_dump(mode="json")) + return {"success": True, "count": len(created), "records": created} + + +@router.get("/tables/{table_id}/records") +async def list_records( + table_id: str, + request: Request, + cursor: str | None = Query(None), + limit: int = Query(50, ge=1, le=200), + filters: str | None = Query(None, description="JSON-encoded filter list"), + sorts: str | None = Query(None, description="JSON-encoded sort list"), + user: dict = Depends(require_bitable_auth), +) -> dict[str, Any]: + import json + + service = _get_service(request) + await _check_table_ownership(service, table_id, user) + + try: + parsed_filters = json.loads(filters) if filters else None + parsed_sorts = json.loads(sorts) if sorts else None + except json.JSONDecodeError as e: + raise HTTPException(status_code=400, detail=f"Invalid JSON in filters/sorts: {e}") + + if parsed_filters or parsed_sorts: + records, next_cursor = await service.list_records_filtered( + table_id, + filters=parsed_filters, + sorts=parsed_sorts, + cursor=cursor, + limit=limit, + ) + else: + records, next_cursor = await service.list_records(table_id, cursor=cursor, limit=limit) + + return { + "success": True, + "records": [r.model_dump(mode="json") for r in records], + "next_cursor": next_cursor, + } + + +@router.patch("/records/{record_id}") +async def update_record( + record_id: str, + body: UpdateRecordRequest, + request: Request, + user: dict = Depends(require_bitable_auth), +) -> dict[str, Any]: + service = _get_service(request) + record = await service.update_record_values(record_id, body.values) + if record is None: + raise HTTPException(status_code=404, detail="Record not found") + return {"success": True, "record": record.model_dump(mode="json")} + + +@router.delete("/tables/{table_id}/records") +async def delete_records( + table_id: str, + request: Request, + user: dict = Depends(require_bitable_auth), +) -> dict[str, Any]: + service = _get_service(request) + await _check_table_ownership(service, table_id, user) + count = await service.delete_records_by_table(table_id) + return {"success": True, "deleted": count} + + +@router.delete("/records/{record_id}") +async def delete_single_record( + record_id: str, + request: Request, + user: dict = Depends(require_bitable_auth), +) -> dict[str, Any]: + service = _get_service(request) + deleted = await service.delete_record(record_id) + if not deleted: + raise HTTPException(status_code=404, detail="Record not found") + return {"success": True} + + +# --------------------------------------------------------------------------- +# Upsert endpoint (KTD8) +# --------------------------------------------------------------------------- + + +@router.post("/tables/{table_id}/upsert", status_code=201) +async def upsert_records( + table_id: str, + body: UpsertRequest, + request: Request, + user: dict = Depends(require_bitable_auth), +) -> dict[str, Any]: + service = _get_service(request) + await _check_table_ownership(service, table_id, user) + if not body.primary_key_field_id: + raise HTTPException(status_code=400, detail="primary_key_field_id is required") + result = await service.upsert_records(table_id, body.records, body.primary_key_field_id) + return {"success": True, **result} + + +# --------------------------------------------------------------------------- +# View endpoints +# --------------------------------------------------------------------------- + + +@router.post("/tables/{table_id}/views", status_code=201) +async def create_view( + table_id: str, + body: CreateViewRequest, + request: Request, + user: dict = Depends(require_bitable_auth), +) -> dict[str, Any]: + service = _get_service(request) + await _check_table_ownership(service, table_id, user) + view = await service.create_view( + table_id=table_id, + name=body.name, + view_type=body.view_type, + config=body.config, + ) + return {"success": True, "view": view.model_dump(mode="json")} + + +@router.get("/tables/{table_id}/views") +async def list_views( + table_id: str, + request: Request, + user: dict = Depends(require_bitable_auth), +) -> dict[str, Any]: + service = _get_service(request) + await _check_table_ownership(service, table_id, user) + views = await service.list_views(table_id) + return {"success": True, "views": [v.model_dump(mode="json") for v in views]} + + +@router.patch("/views/{view_id}") +async def update_view( + view_id: str, + body: UpdateViewRequest, + request: Request, + user: dict = Depends(require_bitable_auth), +) -> dict[str, Any]: + service = _get_service(request) + kwargs = body.model_dump(exclude_none=True) + view = await service.update_view(view_id, **kwargs) + if view is None: + raise HTTPException(status_code=404, detail="View not found") + return {"success": True, "view": view.model_dump(mode="json")} + + +# --------------------------------------------------------------------------- +# File upload / download (U6: attachment & image fields) +# --------------------------------------------------------------------------- + + +def _sanitize_filename(name: str) -> str: + """Remove path separators and keep only safe characters.""" + name = name.replace("\\", "_").replace("/", "_") + return "".join(c for c in name if c.isalnum() or c in "._-").strip(".") + + +def _ensure_upload_dir() -> Path: + BITABLE_UPLOAD_DIR.mkdir(parents=True, exist_ok=True) + return BITABLE_UPLOAD_DIR + + +@router.post("/tables/{table_id}/upload") +async def upload_file( + table_id: str, + request: Request, + file: UploadFile = File(...), + field_id: str = Query(..., description="Target field ID (determines type validation)"), + user: dict = Depends(require_bitable_auth), +) -> dict[str, Any]: + """Upload a file for an attachment or image field. + + Returns file metadata. The frontend then writes this metadata array + to the record's field value via the normal update-record endpoint. + """ + service = _get_service(request) + await _check_table_ownership(service, table_id, user) + + # Validate field exists and is attachment/image type + field = await service.get_field(field_id) + if field is None: + raise HTTPException(status_code=404, detail="Field not found") + if field.field_type not in (FieldType.attachment, FieldType.image): + raise HTTPException( + status_code=400, + detail=f"Field '{field.name}' is not an attachment or image field", + ) + + # Size check (Content-Length header, fast reject) + if file.size is not None and file.size > MAX_UPLOAD_SIZE: + raise HTTPException(status_code=413, detail="File exceeds 10 MB limit") + + # Image type check + mime = file.content_type or "application/octet-stream" + if field.field_type == FieldType.image and not mime.startswith(_IMAGE_MIME_PREFIXES): + raise HTTPException( + status_code=400, + detail=f"Image field requires an image file, got '{mime}'", + ) + + original_name = file.filename or "unnamed" + safe_name = _sanitize_filename(original_name) or "unnamed" + ext = Path(safe_name).suffix + stored_name = f"{uuid.uuid4().hex}{ext}" + upload_dir = _ensure_upload_dir() + file_path = upload_dir / stored_name + + # Stream-read with size check to prevent OOM (P1 #12) + total_size = 0 + try: + with open(file_path, "wb") as f: + while True: + chunk = await file.read(64 * 1024) # 64KB chunks + if not chunk: + break + total_size += len(chunk) + if total_size > MAX_UPLOAD_SIZE: + f.close() + file_path.unlink(missing_ok=True) + raise HTTPException(status_code=413, detail="File exceeds 10 MB limit") + f.write(chunk) + except HTTPException: + raise + except Exception as exc: + file_path.unlink(missing_ok=True) + logger.error(f"Failed to save uploaded bitable file: {exc}") + raise HTTPException(status_code=500, detail="Failed to save file") from exc + finally: + await file.close() + + return { + "filename": original_name, + "stored_name": stored_name, + "mime_type": mime, + "size": total_size, + "url": f"/api/v1/bitable/files/{stored_name}", + } + + +@router.get("/files/{filename}") +async def download_file( + filename: str, + user: dict = Depends(require_bitable_auth), +) -> FileResponse: + """Download a bitable attachment/image file by its stored filename.""" + safe_filename = _sanitize_filename(filename) + file_path = BITABLE_UPLOAD_DIR / safe_filename + if not file_path.exists() or not file_path.is_file(): + raise HTTPException(status_code=404, detail="File not found") + return FileResponse(file_path, filename=safe_filename) diff --git a/src/agentkit/server/routes/tasks.py b/src/agentkit/server/routes/tasks.py index 6f40bb8..b274e0e 100644 --- a/src/agentkit/server/routes/tasks.py +++ b/src/agentkit/server/routes/tasks.py @@ -1,5 +1,6 @@ """Task submission routes""" +import asyncio import json import uuid from dataclasses import asdict @@ -210,11 +211,17 @@ async def cancel_task(task_id: str, req: Request): @router.post("/tasks/{task_id}/resume") -async def resume_task(task_id: str, req: Request): +async def resume_task(task_id: str, req: Request, plan_id: str | None = None): """Resume a crashed pipeline from the last completed phase checkpoint. Reconstructs the team from the saved plan's expert names, creates a new TeamOrchestrator with the checkpoint manager, and calls resume(). + + Args: + task_id: Task ID from the URL path. + plan_id: Optional plan ID. If not provided, falls back to task_id. + Needed because TeamPlan.id is auto-generated and may differ + from the task_id. """ from agentkit.experts.orchestrator import TeamOrchestrator from agentkit.experts.router import ExpertTeamRouter @@ -223,72 +230,89 @@ async def resume_task(task_id: str, req: Request): app_state = req.app.state - # 1. Create checkpoint manager - checkpoint = PipelineCheckpoint( - redis_client=getattr(app_state, "working_redis_client", None) - ) + # Resolve plan_id: explicit param > task_id fallback + resolved_plan_id = plan_id or task_id - # 2. Load plan to get expert names - plan_dict = await checkpoint.load_plan(task_id) - if plan_dict is None: + # P2 #10: 并发 resume 防护 — 同一 plan_id 的并发 resume 请求只允许一个执行 + lock_attr = f"_resume_lock_{resolved_plan_id}" + lock = getattr(app_state, lock_attr, None) + if lock is None: + lock = asyncio.Lock() + setattr(app_state, lock_attr, lock) + if lock.locked(): raise HTTPException( - status_code=404, - detail=f"No checkpoint found for task '{task_id}'", + status_code=409, + detail=f"Resume already in progress for plan '{resolved_plan_id}'", + ) + async with lock: + # 1. Create checkpoint manager + checkpoint = PipelineCheckpoint( + redis_client=getattr(app_state, "working_redis_client", None) ) - # 3. Extract unique expert names from plan - expert_names: list[str] = [] - lead_name = plan_dict.get("lead_expert", "") - if lead_name: - expert_names.append(lead_name) - for ph in plan_dict.get("phases", []): - name = ph.get("assigned_expert", "") - if name and name not in expert_names: - expert_names.append(name) + # 2. Load plan to get expert names + plan_dict = await checkpoint.load_plan(resolved_plan_id) + if plan_dict is None: + raise HTTPException( + status_code=404, + detail=f"No checkpoint found for plan '{resolved_plan_id}'" + f" (task_id='{task_id}')", + ) - if not expert_names: - raise HTTPException( - status_code=400, - detail="Cannot resume: no experts found in saved plan", + # 3. Extract unique expert names from plan + expert_names: list[str] = [] + lead_name = plan_dict.get("lead_expert", "") + if lead_name: + expert_names.append(lead_name) + for ph in plan_dict.get("phases", []): + name = ph.get("assigned_expert", "") + if name and name not in expert_names: + expert_names.append(name) + + if not expert_names: + raise HTTPException( + status_code=400, + detail="Cannot resume: no experts found in saved plan", + ) + + # 4. Resolve expert configs via ExpertTeamRouter + template_registry = getattr(app_state, "expert_template_registry", None) + if template_registry is None: + from agentkit.experts.registry import ExpertTemplateRegistry + + template_registry = ExpertTemplateRegistry() + + team_router = ExpertTeamRouter(template_registry=template_registry) + expert_configs = team_router.resolve_expert_configs(expert_names) + if not expert_configs: + raise HTTPException( + status_code=400, + detail="Cannot resume: failed to resolve expert configs", + ) + + lead_config = expert_configs[0] + member_configs = expert_configs[1:] if len(expert_configs) > 1 else [] + + # 5. Create team + orchestrator + team = ExpertTeam( + pool=app_state.agent_pool, + template_registry=template_registry, + redis_client=getattr(app_state, "working_redis_client", None), ) + await team.create_team(lead_config=lead_config, member_configs=member_configs) - # 4. Resolve expert configs via ExpertTeamRouter - template_registry = getattr(app_state, "expert_template_registry", None) - if template_registry is None: - from agentkit.experts.registry import ExpertTemplateRegistry - - template_registry = ExpertTemplateRegistry() - - team_router = ExpertTeamRouter(template_registry=template_registry) - expert_configs = team_router.resolve_expert_configs(expert_names) - if not expert_configs: - raise HTTPException( - status_code=400, - detail="Cannot resume: failed to resolve expert configs", - ) - - lead_config = expert_configs[0] - member_configs = expert_configs[1:] if len(expert_configs) > 1 else [] - - # 5. Create team + orchestrator - team = ExpertTeam( - pool=app_state.agent_pool, - template_registry=template_registry, - redis_client=getattr(app_state, "working_redis_client", None), - ) - await team.create_team(lead_config=lead_config, member_configs=member_configs) - - try: - orchestrator = TeamOrchestrator(team=team, checkpoint=checkpoint) - result = await orchestrator.resume(task_id) - finally: try: - await team.dissolve() - except Exception: - pass + orchestrator = TeamOrchestrator(team=team, checkpoint=checkpoint) + result = await orchestrator.resume(resolved_plan_id) + finally: + try: + await team.dissolve() + except Exception: + pass return { "task_id": task_id, + "plan_id": resolved_plan_id, "status": result.get("status", "unknown"), "result": result.get("result"), "phase_results": { diff --git a/src/agentkit/skills/base.py b/src/agentkit/skills/base.py index c5215fe..88b7d2b 100644 --- a/src/agentkit/skills/base.py +++ b/src/agentkit/skills/base.py @@ -81,7 +81,7 @@ class SkillConfig(AgentConfig): evolution: dict[str, Any] | None = None, # v3 新增字段:SKILL.md 支持 skill_md_path: str | None = None, - disclosure_level: int = 0, + disclosure_level: int = 1, # 默认全量加载,向后兼容;0=概要模式需显式指定 # v4 新增字段:依赖声明、能力标签 dependencies: list[dict[str, Any] | DependencyDecl] | None = None, capabilities: list[str | dict[str, Any] | CapabilityTag] | None = None, diff --git a/src/agentkit/tools/bitable_tool.py b/src/agentkit/tools/bitable_tool.py new file mode 100644 index 0000000..b8d44cb --- /dev/null +++ b/src/agentkit/tools/bitable_tool.py @@ -0,0 +1,486 @@ +"""BitableTool — Agent tool for bitable data ingestion and CRUD via HTTP. + +Implements KTD5 (REST API boundary even when co-deployed) and KTD11 +(internal service token auth). The tool uses ``httpx.AsyncClient`` to call +the bitable REST API; it never imports BitableService directly. + +Actions: create_table, import_excel, import_database, collect_api, + upsert_records, query_records. + +Batch chunking: upsert and import operations send at most ``BATCH_SIZE`` +records per HTTP request. On partial failure, the result includes +``successful_count`` and ``resume_from`` for breakpoint continuation. +""" + +from __future__ import annotations + +import asyncio +import logging +from typing import Any + +import httpx + +from agentkit.bitable.ingestion.excel import ParsedSheet, parse_excel, parse_excel_url +from agentkit.bitable.ingestion.database import import_table as import_db_table +from agentkit.bitable.ingestion.api_collector import transform_records +from agentkit.tools.base import Tool + +logger = logging.getLogger(__name__) + +BATCH_SIZE = 500 + + +class BitableTool(Tool): + """Agent tool for bitable operations via REST API. + + Args: + base_url: Bitable API base URL (e.g. ``http://localhost:8001/api/v1/bitable``). + internal_token: Service token for KTD11 auth. If ``None``, requests + go unauthenticated (will fail if the server requires auth). + """ + + def __init__(self, base_url: str, internal_token: str | None = None) -> None: + super().__init__( + name="bitable", + description=( + "Create and manage bitable (multi-dimensional spreadsheet) tables, " + "ingest data from Excel files, databases, or API responses, and " + "query records. Actions: create_table, import_excel, " + "import_database, collect_api, upsert_records, query_records." + ), + input_schema={ + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": [ + "create_table", + "import_excel", + "import_database", + "collect_api", + "upsert_records", + "query_records", + ], + "description": "Bitable operation to perform.", + }, + "table_name": { + "type": "string", + "description": "Name for the new bitable table (create_table, import_excel, import_database).", + }, + "description": { + "type": "string", + "description": "Table description (create_table).", + }, + "file_path": { + "type": "string", + "description": "Path to .xlsx file (import_excel).", + }, + "file_url": { + "type": "string", + "description": "URL to download .xlsx file (import_excel).", + }, + "connection_string": { + "type": "string", + "description": "Database connection string (import_database).", + }, + "table_names": { + "type": "array", + "items": {"type": "string"}, + "description": "Source table names to import (import_database).", + }, + "table_id": { + "type": "string", + "description": "Target bitable table ID (collect_api, upsert_records, query_records).", + }, + "records": { + "type": "array", + "description": "Records to write (collect_api, upsert_records).", + }, + "field_mapping": { + "type": "object", + "description": "Mapping {source_key: bitable_field_id} (collect_api).", + }, + "primary_key_field_id": { + "type": "string", + "description": "Field ID of the primary key (upsert_records, collect_api).", + }, + "resume_from": { + "type": "integer", + "description": "Skip this many records before resuming a failed batch (upsert_records, collect_api).", + }, + "cursor": { + "type": "string", + "description": "Pagination cursor (query_records).", + }, + "limit": { + "type": "integer", + "description": "Max records to return (query_records).", + }, + }, + "required": ["action"], + }, + ) + self._base_url = base_url.rstrip("/") + self._internal_token = internal_token + self._client: httpx.AsyncClient | None = None + + async def _get_client(self) -> httpx.AsyncClient: + if self._client is None or self._client.is_closed: + headers: dict[str, str] = {} + if self._internal_token: + headers["X-Internal-Token"] = self._internal_token + self._client = httpx.AsyncClient( + base_url=self._base_url, + headers=headers, + timeout=60.0, + ) + return self._client + + async def close(self) -> None: + if self._client is not None and not self._client.is_closed: + await self._client.aclose() + + async def execute(self, **kwargs) -> dict[str, Any]: + action = kwargs.get("action") + handlers = { + "create_table": self._create_table, + "import_excel": self._import_excel, + "import_database": self._import_database, + "collect_api": self._collect_api, + "upsert_records": self._upsert_records, + "query_records": self._query_records, + } + handler = handlers.get(action) + if handler is None: + return {"success": False, "error": f"Unknown action: {action!r}"} + try: + return await handler(**kwargs) + except httpx.HTTPStatusError as e: + return { + "success": False, + "error": f"Bitable API error {e.response.status_code}: {e.response.text[:500]}", + } + except httpx.ConnectError as e: + return {"success": False, "error": f"Cannot connect to bitable API: {e}"} + except Exception as e: + return {"success": False, "error": f"{action} failed: {e}"} + + # ------------------------------------------------------------------ + # create_table + # ------------------------------------------------------------------ + + async def _create_table(self, **kwargs) -> dict[str, Any]: + table_name = kwargs.get("table_name") + if not table_name: + return {"success": False, "error": "Missing required field: table_name"} + client = await self._get_client() + resp = await client.post( + "/tables", + json={"name": table_name, "description": kwargs.get("description", "")}, + ) + resp.raise_for_status() + data = resp.json() + return {"success": True, "table": data["table"]} + + # ------------------------------------------------------------------ + # import_excel + # ------------------------------------------------------------------ + + async def _import_excel(self, **kwargs) -> dict[str, Any]: + file_path = kwargs.get("file_path") + file_url = kwargs.get("file_url") + if not file_path and not file_url: + return {"success": False, "error": "Either file_path or file_url is required"} + + # Parse Excel — offload sync I/O to thread pool (P2 #21-23). + if file_path: + sheets = await asyncio.to_thread(parse_excel, file_path) + else: + sheets = await asyncio.to_thread(parse_excel_url, file_url) + + if not sheets: + return {"success": False, "error": "Excel file has no sheets with data"} + + results: list[dict[str, Any]] = [] + for sheet in sheets: + result = await self._import_sheet(sheet) + results.append(result) + return {"success": True, "sheets": results} + + async def _import_sheet(self, sheet: ParsedSheet) -> dict[str, Any]: + """Create a bitable table from a parsed sheet and upsert all rows.""" + client = await self._get_client() + + # 1. Create table + resp = await client.post("/tables", json={"name": sheet.name}) + resp.raise_for_status() + table_id = resp.json()["table"]["id"] + + # 2. Create fields + field_name_to_id: dict[str, str] = {} + for col_name, field_type in zip(sheet.columns, sheet.field_types): + resp = await client.post( + f"/tables/{table_id}/fields", + json={"name": col_name, "field_type": field_type, "owner": "agent"}, + ) + resp.raise_for_status() + field_id = resp.json()["field"]["id"] + field_name_to_id[col_name] = field_id + + # 3. Map record keys to field IDs and batch upsert + mapped_records = [ + {field_name_to_id[k]: v for k, v in rec.items() if k in field_name_to_id} + for rec in sheet.records + ] + + if not mapped_records: + return { + "table_id": table_id, + "table_name": sheet.name, + "field_count": len(field_name_to_id), + "record_count": 0, + } + + # Use first field as PK fallback (import_excel doesn't require a PK) + # If no PK is set, upsert won't work — use create_records instead + upsert_result = await self._batch_create_records(table_id, mapped_records) + return { + "table_id": table_id, + "table_name": sheet.name, + "field_count": len(field_name_to_id), + "record_count": upsert_result["successful_count"], + **upsert_result, + } + + async def _batch_create_records( + self, table_id: str, records: list[dict[str, Any]] + ) -> dict[str, Any]: + """Create records in batches via POST /tables/{id}/records.""" + client = await self._get_client() + total = len(records) + successful = 0 + errors: list[dict[str, Any]] = [] + + for start in range(0, total, BATCH_SIZE): + batch = records[start : start + BATCH_SIZE] + try: + resp = await client.post( + f"/tables/{table_id}/records", + json={"records": batch}, + ) + resp.raise_for_status() + successful += len(batch) + except httpx.HTTPStatusError as e: + errors.append( + { + "batch_start": start, + "batch_size": len(batch), + "status": e.response.status_code, + "error": e.response.text[:300], + } + ) + break # stop on first failure + + return { + "successful_count": successful, + "total": total, + "resume_from": successful, + **({"errors": errors} if errors else {}), + } + + # ------------------------------------------------------------------ + # import_database + # ------------------------------------------------------------------ + + async def _import_database(self, **kwargs) -> dict[str, Any]: + conn_str = kwargs.get("connection_string") + table_names = kwargs.get("table_names") + if not conn_str: + return {"success": False, "error": "Missing required field: connection_string"} + if not table_names: + return {"success": False, "error": "Missing required field: table_names"} + + results: list[dict[str, Any]] = [] + for src_table in table_names: + try: + # Offload sync DB reflection to thread pool (P2 #21-23). + reflected = await asyncio.to_thread(import_db_table, conn_str, src_table) + result = await self._import_reflected_table(reflected) + results.append(result) + except ConnectionError as e: + return {"success": False, "error": str(e), "imported": results} + except Exception as e: + results.append({"table_name": src_table, "success": False, "error": str(e)}) + return {"success": True, "tables": results} + + async def _import_reflected_table(self, reflected: dict[str, Any]) -> dict[str, Any]: + """Create a bitable table from reflected DB data and upsert rows.""" + client = await self._get_client() + table_name = reflected["table_name"] + + # 1. Create table + resp = await client.post("/tables", json={"name": table_name}) + resp.raise_for_status() + table_id = resp.json()["table"]["id"] + + # 2. Create fields + field_name_to_id: dict[str, str] = {} + pk_field_id: str | None = None + for fdef in reflected["fields"]: + resp = await client.post( + f"/tables/{table_id}/fields", + json={ + "name": fdef["name"], + "field_type": fdef["field_type"], + "owner": "agent", + }, + ) + resp.raise_for_status() + fid = resp.json()["field"]["id"] + field_name_to_id[fdef["name"]] = fid + if fdef.get("is_primary_key"): + pk_field_id = fid + + # 3. Set primary key + if pk_field_id: + await client.patch("/tables/" + table_id, json={"primary_key_field_id": pk_field_id}) + + # 4. Map and upsert records + mapped = [ + {field_name_to_id[k]: v for k, v in rec.items() if k in field_name_to_id} + for rec in reflected["records"] + ] + + if not mapped: + return { + "table_id": table_id, + "table_name": table_name, + "record_count": 0, + "success": True, + } + + if pk_field_id: + upsert = await self._batch_upsert(table_id, mapped, pk_field_id) + else: + upsert = await self._batch_create_records(table_id, mapped) + + return { + "table_id": table_id, + "table_name": table_name, + "record_count": upsert["successful_count"], + "success": True, + **upsert, + } + + # ------------------------------------------------------------------ + # collect_api + # ------------------------------------------------------------------ + + async def _collect_api(self, **kwargs) -> dict[str, Any]: + table_id = kwargs.get("table_id") + records = kwargs.get("records") + field_mapping = kwargs.get("field_mapping") + pk_field_id = kwargs.get("primary_key_field_id") + resume_from = kwargs.get("resume_from", 0) + + if not table_id: + return {"success": False, "error": "Missing required field: table_id"} + if not records: + return {"success": False, "error": "Missing required field: records"} + if not field_mapping: + return {"success": False, "error": "Missing required field: field_mapping"} + if not pk_field_id: + return {"success": False, "error": "Missing required field: primary_key_field_id"} + + transformed = transform_records(records, field_mapping) + if resume_from > 0: + transformed = transformed[resume_from:] + + result = await self._batch_upsert(table_id, transformed, pk_field_id) + return {"success": True, **result} + + # ------------------------------------------------------------------ + # upsert_records + # ------------------------------------------------------------------ + + async def _upsert_records(self, **kwargs) -> dict[str, Any]: + table_id = kwargs.get("table_id") + records = kwargs.get("records") + pk_field_id = kwargs.get("primary_key_field_id") + resume_from = kwargs.get("resume_from", 0) + + if not table_id: + return {"success": False, "error": "Missing required field: table_id"} + if not records: + return {"success": False, "error": "Missing required field: records"} + if not pk_field_id: + return {"success": False, "error": "Missing required field: primary_key_field_id"} + + batch = records[resume_from:] if resume_from > 0 else records + result = await self._batch_upsert(table_id, batch, pk_field_id) + return {"success": True, **result} + + async def _batch_upsert( + self, table_id: str, records: list[dict[str, Any]], pk_field_id: str + ) -> dict[str, Any]: + """Upsert records in batches of BATCH_SIZE via POST /tables/{id}/upsert.""" + client = await self._get_client() + total = len(records) + successful = 0 + errors: list[dict[str, Any]] = [] + + for start in range(0, total, BATCH_SIZE): + batch = records[start : start + BATCH_SIZE] + try: + resp = await client.post( + f"/tables/{table_id}/upsert", + json={ + "records": batch, + "primary_key_field_id": pk_field_id, + }, + ) + resp.raise_for_status() + data = resp.json() + successful += data.get("inserted", 0) + data.get("updated", 0) + except httpx.HTTPStatusError as e: + errors.append( + { + "batch_start": start, + "batch_size": len(batch), + "status": e.response.status_code, + "error": e.response.text[:300], + } + ) + break + + return { + "successful_count": successful, + "total": total, + "resume_from": successful, + **({"errors": errors} if errors else {}), + } + + # ------------------------------------------------------------------ + # query_records + # ------------------------------------------------------------------ + + async def _query_records(self, **kwargs) -> dict[str, Any]: + table_id = kwargs.get("table_id") + if not table_id: + return {"success": False, "error": "Missing required field: table_id"} + + client = await self._get_client() + params: dict[str, Any] = {} + if kwargs.get("cursor"): + params["cursor"] = kwargs["cursor"] + if kwargs.get("limit"): + params["limit"] = kwargs["limit"] + + resp = await client.get(f"/tables/{table_id}/records", params=params) + resp.raise_for_status() + data = resp.json() + return { + "success": True, + "records": data["records"], + "next_cursor": data.get("next_cursor"), + } diff --git a/tests/unit/bitable/__init__.py b/tests/unit/bitable/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/bitable/conftest.py b/tests/unit/bitable/conftest.py new file mode 100644 index 0000000..9eeb1fe --- /dev/null +++ b/tests/unit/bitable/conftest.py @@ -0,0 +1,143 @@ +"""Shared fixtures for bitable unit tests. + +Provides: +- ``bitable_db``: initialized BitableDB (skips if PG unavailable) +- ``bitable_service``: BitableService backed by bitable_db +- Factory functions: make_table, make_field, make_record, make_formula_field +""" + +from __future__ import annotations + +import os + +import pytest + +from agentkit.bitable.models import FieldOwner, FieldType + + +def _pg_available() -> bool: + """Check if PostgreSQL is reachable via DATABASE_URL env var.""" + url = os.environ.get("DATABASE_URL") or os.environ.get("AGENTKIT_DATABASE_URL") + return bool(url) + + +@pytest.fixture +async def bitable_db(): + """Initialize a fresh bitable DB for each test (skips if PG unavailable).""" + if not _pg_available(): + pytest.skip("PostgreSQL not available (set DATABASE_URL)") + + from agentkit.bitable.db import BitableDB + + db = BitableDB() + try: + await db.init() + # Clean slate: drop and recreate bitable schema + from sqlalchemy import text + + async with db.engine.begin() as conn: + await conn.execute(text("DROP SCHEMA IF EXISTS bitable CASCADE")) + await db.init() # re-create fresh + yield db + finally: + # Cleanup + from sqlalchemy import text + + if db.engine is not None: + async with db.engine.begin() as conn: + await conn.execute(text("DROP SCHEMA IF EXISTS bitable CASCADE")) + await db.close() + + +@pytest.fixture +async def bitable_service(bitable_db): + """BitableService backed by the test bitable_db.""" + from agentkit.bitable.service import BitableService + + yield BitableService(bitable_db) + + +# ── Factory fixtures ─────────────────────────────────────── + + +@pytest.fixture +def make_table(bitable_service): + """Factory: create a table and return it.""" + + counter = [0] + + async def _make( + name: str | None = None, + description: str = "", + primary_key_field_id: str | None = None, + ): + counter[0] += 1 + return await bitable_service.create_table( + name=name or f"test_table_{counter[0]}", + description=description, + primary_key_field_id=primary_key_field_id, + ) + + return _make + + +@pytest.fixture +def make_field(bitable_service): + """Factory: create a field and return it.""" + + counter = [0] + + async def _make( + table_id: str, + name: str | None = None, + field_type: FieldType = FieldType.text, + config: dict | None = None, + owner: FieldOwner = FieldOwner.agent, + ): + counter[0] += 1 + return await bitable_service.create_field( + table_id=table_id, + name=name or f"field_{counter[0]}", + field_type=field_type, + config=config or {}, + owner=owner, + ) + + return _make + + +@pytest.fixture +def make_record(bitable_service): + """Factory: create a record and return it.""" + counter = [0] + + async def _make(table_id: str, values: dict | None = None): + counter[0] += 1 + return await bitable_service.create_record( + table_id=table_id, + values=values or {}, + ) + + return _make + + +@pytest.fixture +def make_formula_field(bitable_service): + """Factory: create a formula field and return it.""" + counter = [0] + + async def _make( + table_id: str, + name: str | None = None, + formula_expr: str = "=1+1", + ): + counter[0] += 1 + return await bitable_service.create_field( + table_id=table_id, + name=name or f"calc_{counter[0]}", + field_type=FieldType.formula, + config={"formula_expr": formula_expr}, + owner=FieldOwner.user, + ) + + return _make diff --git a/tests/unit/bitable/test_attachment.py b/tests/unit/bitable/test_attachment.py new file mode 100644 index 0000000..77a7f04 --- /dev/null +++ b/tests/unit/bitable/test_attachment.py @@ -0,0 +1,322 @@ +"""Tests for U6: attachment & image field upload, download, and cleanup. + +Requires PostgreSQL — marked ``postgres``. Uses ``httpx.AsyncClient`` with +``ASGITransport`` (same pattern as test_routes.py). +""" + +from __future__ import annotations + +import io +from pathlib import Path +from typing import Any + +import httpx +import pytest +from fastapi import FastAPI +from httpx import ASGITransport + +from agentkit.bitable.service import BitableService +from agentkit.server.routes import bitable as bitable_routes +from agentkit.server.routes.bitable import require_bitable_auth + +pytestmark = pytest.mark.postgres + +TEST_USER_ID = "test-user-id" + + +def _make_test_user() -> dict[str, Any]: + return {"user_id": TEST_USER_ID, "username": "testuser", "role": "member"} + + +@pytest.fixture +def app(bitable_service: BitableService, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> FastAPI: + """Test app with upload dir redirected to tmp_path.""" + upload_dir = tmp_path / "bitable_uploads" + # Patch both the routes module variable AND the env var (service reads env var) + monkeypatch.setattr(bitable_routes, "BITABLE_UPLOAD_DIR", upload_dir) + monkeypatch.setenv("AGENTKIT_BITABLE_UPLOAD_DIR", str(upload_dir)) + + app = FastAPI() + app.state.bitable_service = bitable_service + app.include_router(bitable_routes.router, prefix="/api/v1") + app.dependency_overrides[require_bitable_auth] = lambda: _make_test_user() + + return app + + +@pytest.fixture +async def client(app: FastAPI) -> httpx.AsyncClient: + transport = ASGITransport(app=app) + async with httpx.AsyncClient(transport=transport, base_url="http://test") as c: + yield c + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _create_table_with_field( + client: httpx.AsyncClient, + field_type: str, + field_name: str = "files", +) -> tuple[str, str]: + """Create a table + a field, return (table_id, field_id).""" + table_id = ( + await client.post("/api/v1/bitable/tables", json={"name": "T"}) + ).json()["table"]["id"] + field_id = ( + await client.post( + f"/api/v1/bitable/tables/{table_id}/fields", + json={"name": field_name, "field_type": field_type, "owner": "agent"}, + ) + ).json()["field"]["id"] + return table_id, field_id + + +def _make_image_bytes(name: str = "test.png", size: int = 100) -> tuple[bytes, str]: + """Minimal valid PNG header + padding.""" + png_header = b"\x89PNG\r\n\x1a\n" + body = b"\x00" * size + return png_header + body, name + + +def _make_pdf_bytes(name: str = "doc.pdf", size: int = 50) -> tuple[bytes, str]: + return b"%PDF-1.4\n" + b"\x00" * size, name + + +# --------------------------------------------------------------------------- +# Upload tests +# --------------------------------------------------------------------------- + + +async def test_upload_image_success(client: httpx.AsyncClient, tmp_path: Path) -> None: + table_id, field_id = await _create_table_with_field(client, "image") + img_bytes, img_name = _make_image_bytes() + resp = await client.post( + f"/api/v1/bitable/tables/{table_id}/upload", + params={"field_id": field_id}, + files={"file": (img_name, io.BytesIO(img_bytes), "image/png")}, + ) + assert resp.status_code == 200 + data = resp.json() + assert data["filename"] == img_name + assert data["mime_type"] == "image/png" + assert data["size"] == len(img_bytes) + assert data["stored_name"].endswith(".png") + assert data["url"].startswith("/api/v1/bitable/files/") + # File exists on disk + file_path = bitable_routes.BITABLE_UPLOAD_DIR / data["stored_name"] + assert file_path.exists() + assert file_path.read_bytes() == img_bytes + + +async def test_upload_attachment_pdf(client: httpx.AsyncClient) -> None: + table_id, field_id = await _create_table_with_field(client, "attachment") + pdf_bytes, pdf_name = _make_pdf_bytes() + resp = await client.post( + f"/api/v1/bitable/tables/{table_id}/upload", + params={"field_id": field_id}, + files={"file": (pdf_name, io.BytesIO(pdf_bytes), "application/pdf")}, + ) + assert resp.status_code == 200 + data = resp.json() + assert data["filename"] == pdf_name + assert data["mime_type"] == "application/pdf" + + +async def test_upload_image_rejects_non_image(client: httpx.AsyncClient) -> None: + table_id, field_id = await _create_table_with_field(client, "image") + pdf_bytes, _ = _make_pdf_bytes() + resp = await client.post( + f"/api/v1/bitable/tables/{table_id}/upload", + params={"field_id": field_id}, + files={"file": ("doc.pdf", io.BytesIO(pdf_bytes), "application/pdf")}, + ) + assert resp.status_code == 400 + assert "image" in resp.json()["detail"].lower() + + +async def test_upload_rejects_non_attachment_field(client: httpx.AsyncClient) -> None: + table_id, field_id = await _create_table_with_field(client, "text") + img_bytes, _ = _make_image_bytes() + resp = await client.post( + f"/api/v1/bitable/tables/{table_id}/upload", + params={"field_id": field_id}, + files={"file": ("test.png", io.BytesIO(img_bytes), "image/png")}, + ) + assert resp.status_code == 400 + + +async def test_upload_404_unknown_field(client: httpx.AsyncClient) -> None: + table_id = ( + await client.post("/api/v1/bitable/tables", json={"name": "T"}) + ).json()["table"]["id"] + img_bytes, _ = _make_image_bytes() + resp = await client.post( + f"/api/v1/bitable/tables/{table_id}/upload", + params={"field_id": "nonexistent"}, + files={"file": ("test.png", io.BytesIO(img_bytes), "image/png")}, + ) + assert resp.status_code == 404 + + +async def test_upload_requires_auth(bitable_service: BitableService) -> None: + """No auth override → 401.""" + app = FastAPI() + app.state.bitable_service = bitable_service + app.include_router(bitable_routes.router, prefix="/api/v1") + transport = ASGITransport(app=app) + async with httpx.AsyncClient(transport=transport, base_url="http://test") as c: + resp = await c.post( + "/api/v1/bitable/tables/x/upload", + params={"field_id": "y"}, + files={"file": ("t.png", io.BytesIO(b"x"), "image/png")}, + ) + assert resp.status_code == 401 + + +# --------------------------------------------------------------------------- +# Download tests +# --------------------------------------------------------------------------- + + +async def test_download_file_success(client: httpx.AsyncClient) -> None: + table_id, field_id = await _create_table_with_field(client, "image") + img_bytes, img_name = _make_image_bytes() + upload_resp = await client.post( + f"/api/v1/bitable/tables/{table_id}/upload", + params={"field_id": field_id}, + files={"file": (img_name, io.BytesIO(img_bytes), "image/png")}, + ) + stored_name = upload_resp.json()["stored_name"] + resp = await client.get(f"/api/v1/bitable/files/{stored_name}") + assert resp.status_code == 200 + assert resp.content == img_bytes + + +async def test_download_404_missing_file(client: httpx.AsyncClient) -> None: + resp = await client.get("/api/v1/bitable/files/nonexistent.png") + assert resp.status_code == 404 + + +# --------------------------------------------------------------------------- +# Attachment cleanup on record deletion +# --------------------------------------------------------------------------- + + +async def test_delete_record_cleans_up_files( + client: httpx.AsyncClient, + bitable_service: BitableService, +) -> None: + table_id, field_id = await _create_table_with_field(client, "image") + img_bytes, _ = _make_image_bytes() + upload_resp = await client.post( + f"/api/v1/bitable/tables/{table_id}/upload", + params={"field_id": field_id}, + files={"file": ("pic.png", io.BytesIO(img_bytes), "image/png")}, + ) + file_meta = upload_resp.json() + stored_name = file_meta["stored_name"] + + # Create a record with the image metadata + create_resp = await client.post( + f"/api/v1/bitable/tables/{table_id}/records", + json={"records": [{field_id: [file_meta]}]}, + ) + record_id = create_resp.json()["records"][0]["id"] + + # Verify file exists + file_path = bitable_routes.BITABLE_UPLOAD_DIR / stored_name + assert file_path.exists() + + # Delete the record + del_resp = await client.delete(f"/api/v1/bitable/records/{record_id}") + assert del_resp.status_code == 200 + + # File should be gone + assert not file_path.exists() + + +async def test_delete_records_by_table_cleans_up_files( + client: httpx.AsyncClient, +) -> None: + table_id, field_id = await _create_table_with_field(client, "attachment") + pdf_bytes, _ = _make_pdf_bytes() + upload_resp = await client.post( + f"/api/v1/bitable/tables/{table_id}/upload", + params={"field_id": field_id}, + files={"file": ("doc.pdf", io.BytesIO(pdf_bytes), "application/pdf")}, + ) + file_meta = upload_resp.json() + stored_name = file_meta["stored_name"] + + await client.post( + f"/api/v1/bitable/tables/{table_id}/records", + json={"records": [{field_id: [file_meta]}]}, + ) + + file_path = bitable_routes.BITABLE_UPLOAD_DIR / stored_name + assert file_path.exists() + + # Delete all records + resp = await client.delete(f"/api/v1/bitable/tables/{table_id}/records") + assert resp.status_code == 200 + assert not file_path.exists() + + +async def test_delete_record_when_file_already_missing( + client: httpx.AsyncClient, +) -> None: + """Record deletion should succeed even if the physical file is gone.""" + table_id, field_id = await _create_table_with_field(client, "image") + img_bytes, _ = _make_image_bytes() + upload_resp = await client.post( + f"/api/v1/bitable/tables/{table_id}/upload", + params={"field_id": field_id}, + files={"file": ("pic.png", io.BytesIO(img_bytes), "image/png")}, + ) + file_meta = upload_resp.json() + stored_name = file_meta["stored_name"] + + create_resp = await client.post( + f"/api/v1/bitable/tables/{table_id}/records", + json={"records": [{field_id: [file_meta]}]}, + ) + record_id = create_resp.json()["records"][0]["id"] + + # Manually delete the file before deleting the record + file_path = bitable_routes.BITABLE_UPLOAD_DIR / stored_name + file_path.unlink() + assert not file_path.exists() + + # Record deletion should still succeed + del_resp = await client.delete(f"/api/v1/bitable/records/{record_id}") + assert del_resp.status_code == 200 + + +# --------------------------------------------------------------------------- +# Multiple files in one field +# --------------------------------------------------------------------------- + + +async def test_multiple_files_in_attachment_field(client: httpx.AsyncClient) -> None: + table_id, field_id = await _create_table_with_field(client, "attachment") + metas = [] + for name in ("a.pdf", "b.pdf"): + pdf_bytes, _ = _make_pdf_bytes(name) + resp = await client.post( + f"/api/v1/bitable/tables/{table_id}/upload", + params={"field_id": field_id}, + files={"file": (name, io.BytesIO(pdf_bytes), "application/pdf")}, + ) + metas.append(resp.json()) + + # Store all files as an array in one record + create_resp = await client.post( + f"/api/v1/bitable/tables/{table_id}/records", + json={"records": [{field_id: metas}]}, + ) + assert create_resp.status_code == 200 + record = create_resp.json()["records"][0] + assert len(record["values"][field_id]) == 2 diff --git a/tests/unit/bitable/test_bitable_tool.py b/tests/unit/bitable/test_bitable_tool.py new file mode 100644 index 0000000..b8b20b4 --- /dev/null +++ b/tests/unit/bitable/test_bitable_tool.py @@ -0,0 +1,485 @@ +"""Tests for BitableTool (U4). + +Tests the full HTTP flow: BitableTool → bitable REST API → BitableService. +Uses ``httpx.AsyncClient`` + ``ASGITransport`` so the tool's HTTP calls +and the bitable DB share one event loop. + +Covers: +- KTD11: X-Internal-Token auth (valid token accepted, invalid rejected) +- Batch chunking: 1200 records → 3 HTTP requests (500+500+200) +- Resume from partial failure +- Three ingestion types: Excel, database, API collector +- create_table, upsert_records, query_records +""" + +from __future__ import annotations + +import io + +import httpx +import pytest +from fastapi import FastAPI +from httpx import ASGITransport + +from agentkit.bitable.service import BitableService +from agentkit.server.routes import bitable as bitable_routes +from agentkit.server.routes.bitable import require_bitable_auth +from agentkit.tools.bitable_tool import BATCH_SIZE, BitableTool + +pytestmark = pytest.mark.postgres + +TEST_TOKEN = "test-internal-token-abc123" +TEST_USER = {"user_id": "test-user", "username": "tester", "role": "member"} + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def app(bitable_service: BitableService) -> FastAPI: + """Test app with bitable_service + internal token on app.state.""" + app = FastAPI() + app.state.bitable_service = bitable_service + app.state.bitable_internal_token = TEST_TOKEN + app.include_router(bitable_routes.router, prefix="/api/v1") + # Override auth so JWT path also works (for non-internal-token tests) + app.dependency_overrides[require_bitable_auth] = lambda: TEST_USER + return app + + +@pytest.fixture +def app_no_override(bitable_service: BitableService) -> FastAPI: + """App without auth override — tests real X-Internal-Token path.""" + app = FastAPI() + app.state.bitable_service = bitable_service + app.state.bitable_internal_token = TEST_TOKEN + app.include_router(bitable_routes.router, prefix="/api/v1") + return app + + +@pytest.fixture +def app_no_token(bitable_service: BitableService) -> FastAPI: + """App without internal token configured.""" + app = FastAPI() + app.state.bitable_service = bitable_service + app.include_router(bitable_routes.router, prefix="/api/v1") + app.dependency_overrides[require_bitable_auth] = lambda: TEST_USER + return app + + +def _make_client(app: FastAPI, token: str | None = None) -> httpx.AsyncClient: + """Create an httpx AsyncClient backed by ASGITransport. + + If token is provided, the X-Internal-Token header is set as default + on the client — mirroring how BitableTool._get_client configures it. + """ + base = "http://test/api/v1/bitable" + transport = ASGITransport(app=app) + headers: dict[str, str] = {} + if token: + headers["X-Internal-Token"] = token + return httpx.AsyncClient(transport=transport, base_url=base, headers=headers) + + +@pytest.fixture +async def tool(app: FastAPI) -> BitableTool: + """BitableTool pointing at the test app via ASGITransport. + + ponytail: We patch _client to use ASGITransport instead of real + HTTP — this shares the event loop with the async DB fixtures. + """ + client = _make_client(app, token=TEST_TOKEN) + t = BitableTool(base_url="http://test/api/v1/bitable", internal_token=TEST_TOKEN) + t._client = client + yield t + await client.aclose() + + +@pytest.fixture +async def tool_no_token(app_no_token: FastAPI) -> BitableTool: + """BitableTool without internal token.""" + client = _make_client(app_no_token, token=None) + t = BitableTool(base_url="http://test/api/v1/bitable", internal_token=None) + t._client = client + yield t + await client.aclose() + + +@pytest.fixture +async def tool_real_auth(app_no_override: FastAPI) -> BitableTool: + """BitableTool that sends real X-Internal-Token header (no auth override).""" + client = _make_client(app_no_override, token=TEST_TOKEN) + t = BitableTool(base_url="http://test/api/v1/bitable", internal_token=TEST_TOKEN) + t._client = client + yield t + await client.aclose() + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_xlsx(sheets: dict[str, list[list]]) -> bytes: + """Create an in-memory .xlsx file.""" + from openpyxl import Workbook + + wb = Workbook() + wb.remove(wb.active) + for name, rows in sheets.items(): + ws = wb.create_sheet(title=name) + for row in rows: + ws.append(row) + buf = io.BytesIO() + wb.save(buf) + return buf.getvalue() + + +async def _setup_table_with_pk(tool: BitableTool, name: str = "T") -> tuple[str, str, str]: + """Create a table with a text PK field and a number data field. + + Returns (table_id, pk_field_id, data_field_id). + """ + result = await tool.execute(action="create_table", table_name=name) + assert result["success"], result + table_id = result["table"]["id"] + + client = await tool._get_client() + + # Create PK field + resp = await client.post( + f"/tables/{table_id}/fields", + json={"name": "id", "field_type": "text", "owner": "agent"}, + ) + resp.raise_for_status() + pk_field_id = resp.json()["field"]["id"] + + # Create data field + resp = await client.post( + f"/tables/{table_id}/fields", + json={"name": "val", "field_type": "number", "owner": "agent"}, + ) + resp.raise_for_status() + data_field_id = resp.json()["field"]["id"] + + # Set PK + resp = await client.patch(f"/tables/{table_id}", json={"primary_key_field_id": pk_field_id}) + resp.raise_for_status() + + return table_id, pk_field_id, data_field_id + + +# --------------------------------------------------------------------------- +# create_table +# --------------------------------------------------------------------------- + + +async def test_create_table(tool: BitableTool) -> None: + """create_table action creates a bitable table via HTTP.""" + result = await tool.execute(action="create_table", table_name="MyTable") + assert result["success"] is True + assert result["table"]["name"] == "MyTable" + + +async def test_create_table_missing_name(tool: BitableTool) -> None: + """Missing table_name → error.""" + result = await tool.execute(action="create_table") + assert result["success"] is False + assert "table_name" in result["error"] + + +# --------------------------------------------------------------------------- +# KTD11: Internal token auth +# --------------------------------------------------------------------------- + + +async def test_internal_token_accepted(tool_real_auth: BitableTool) -> None: + """Valid X-Internal-Token → request succeeds (no JWT needed).""" + result = await tool_real_auth.execute(action="create_table", table_name="Authed") + assert result["success"] is True + + +async def test_invalid_token_rejected(app_no_override: FastAPI) -> None: + """Wrong X-Internal-Token → 401.""" + transport = ASGITransport(app=app_no_override) + async with httpx.AsyncClient(transport=transport, base_url="http://test") as client: + resp = await client.post( + "/api/v1/bitable/tables", + json={"name": "X"}, + headers={"X-Internal-Token": "wrong-token"}, + ) + assert resp.status_code == 401 + + +async def test_no_auth_rejected(app_no_override: FastAPI) -> None: + """No auth at all → 401.""" + transport = ASGITransport(app=app_no_override) + async with httpx.AsyncClient(transport=transport, base_url="http://test") as client: + resp = await client.post("/api/v1/bitable/tables", json={"name": "X"}) + assert resp.status_code == 401 + + +# --------------------------------------------------------------------------- +# Batch chunking (BATCH_SIZE=500) +# --------------------------------------------------------------------------- + + +async def test_batch_upsert_1200_records(tool: BitableTool) -> None: + """1200 records → 3 batches (500+500+200), all succeed.""" + table_id, pk_fid, data_fid = await _setup_table_with_pk(tool) + + records = [{pk_fid: f"r{i}", data_fid: i * 10} for i in range(1200)] + result = await tool.execute( + action="upsert_records", + table_id=table_id, + records=records, + primary_key_field_id=pk_fid, + ) + assert result["success"] is True + assert result["successful_count"] == 1200 + assert result["total"] == 1200 + assert "errors" not in result + + +async def test_batch_size_is_500() -> None: + """Verify BATCH_SIZE constant is 500.""" + assert BATCH_SIZE == 500 + + +async def test_resume_from_partial_failure(tool: BitableTool) -> None: + """resume_from skips already-successful records.""" + table_id, pk_fid, data_fid = await _setup_table_with_pk(tool) + + # First, insert 500 records successfully + batch1 = [{pk_fid: f"r{i}", data_fid: i} for i in range(500)] + result1 = await tool.execute( + action="upsert_records", + table_id=table_id, + records=batch1, + primary_key_field_id=pk_fid, + ) + assert result1["successful_count"] == 500 + + # Now resume from 500 with the remaining 700 + all_records = [{pk_fid: f"r{i}", data_fid: i} for i in range(1200)] + remaining = all_records[500:] + result2 = await tool.execute( + action="upsert_records", + table_id=table_id, + records=remaining, + primary_key_field_id=pk_fid, + resume_from=0, # remaining is already sliced + ) + assert result2["successful_count"] == 700 + + +# --------------------------------------------------------------------------- +# query_records +# --------------------------------------------------------------------------- + + +async def test_query_records(tool: BitableTool) -> None: + """query_records returns records from the table.""" + table_id, pk_fid, data_fid = await _setup_table_with_pk(tool) + + # Insert some records + await tool.execute( + action="upsert_records", + table_id=table_id, + records=[{pk_fid: "a", data_fid: 1}, {pk_fid: "b", data_fid: 2}], + primary_key_field_id=pk_fid, + ) + + # Query + result = await tool.execute(action="query_records", table_id=table_id) + assert result["success"] is True + assert len(result["records"]) == 2 + + +async def test_query_records_with_limit(tool: BitableTool) -> None: + """query_records with limit returns fewer records.""" + table_id, pk_fid, data_fid = await _setup_table_with_pk(tool) + + await tool.execute( + action="upsert_records", + table_id=table_id, + records=[{pk_fid: f"r{i}", data_fid: i} for i in range(10)], + primary_key_field_id=pk_fid, + ) + + result = await tool.execute(action="query_records", table_id=table_id, limit=5) + assert result["success"] is True + assert len(result["records"]) == 5 + + +# --------------------------------------------------------------------------- +# import_excel +# --------------------------------------------------------------------------- + + +async def test_import_excel_file(tool: BitableTool, tmp_path) -> None: + """import_excel from file path → creates table + fields + records.""" + xlsx_bytes = _make_xlsx({"Products": [["name", "price"], ["Widget", 9.99], ["Gadget", 19.99]]}) + file_path = tmp_path / "test.xlsx" + file_path.write_bytes(xlsx_bytes) + + result = await tool.execute(action="import_excel", file_path=str(file_path)) + assert result["success"] is True + sheet_result = result["sheets"][0] + assert sheet_result["record_count"] == 2 + assert sheet_result["field_count"] == 2 + + # Verify data was actually written + table_id = sheet_result["table_id"] + query = await tool.execute(action="query_records", table_id=table_id) + assert len(query["records"]) == 2 + + +async def test_import_excel_empty_sheet(tool: BitableTool, tmp_path) -> None: + """Excel with only headers (no data rows) → table created, 0 records.""" + xlsx_bytes = _make_xlsx({"Empty": [["col1", "col2"]]}) + file_path = tmp_path / "empty.xlsx" + file_path.write_bytes(xlsx_bytes) + + result = await tool.execute(action="import_excel", file_path=str(file_path)) + assert result["success"] is True + assert result["sheets"][0]["record_count"] == 0 + assert result["sheets"][0]["field_count"] == 2 + + +async def test_import_excel_missing_path(tool: BitableTool) -> None: + """No file_path or file_url → error.""" + result = await tool.execute(action="import_excel") + assert result["success"] is False + assert "file_path" in result["error"] or "file_url" in result["error"] + + +# --------------------------------------------------------------------------- +# collect_api +# --------------------------------------------------------------------------- + + +async def test_collect_api(tool: BitableTool) -> None: + """collect_api transforms records via field_mapping and upserts.""" + table_id, pk_fid, data_fid = await _setup_table_with_pk(tool) + + result = await tool.execute( + action="collect_api", + table_id=table_id, + records=[ + {"user_id": "u1", "score": 100}, + {"user_id": "u2", "score": 200}, + ], + field_mapping={"user_id": pk_fid, "score": data_fid}, + primary_key_field_id=pk_fid, + ) + assert result["success"] is True + assert result["successful_count"] == 2 + + # Verify + query = await tool.execute(action="query_records", table_id=table_id) + assert len(query["records"]) == 2 + + +async def test_collect_api_missing_fields(tool: BitableTool) -> None: + """Missing required fields → error.""" + result = await tool.execute(action="collect_api", records=[]) + assert result["success"] is False + + +# --------------------------------------------------------------------------- +# Error handling +# --------------------------------------------------------------------------- + + +async def test_unknown_action(tool: BitableTool) -> None: + """Unknown action → error.""" + result = await tool.execute(action="bogus") + assert result["success"] is False + assert "Unknown action" in result["error"] + + +async def test_query_nonexistent_table(tool: BitableTool) -> None: + """Querying a non-existent table → error.""" + result = await tool.execute(action="query_records", table_id="nonexistent-id") + assert result["success"] is False + + +# --------------------------------------------------------------------------- +# Database ingestion (type mapping only — no real external DB needed) +# --------------------------------------------------------------------------- + + +def test_db_type_mapping_integer() -> None: + """Integer type → 'number'.""" + from sqlalchemy import Integer + + from agentkit.bitable.ingestion.database import infer_field_type + + assert infer_field_type(Integer()) == "number" + assert infer_field_type(Integer) == "number" + + +def test_db_type_mapping_varchar() -> None: + """String type → 'text'.""" + from sqlalchemy import String + + from agentkit.bitable.ingestion.database import infer_field_type + + assert infer_field_type(String(255)) == "text" + + +def test_db_type_mapping_datetime() -> None: + """DateTime type → 'date'.""" + from sqlalchemy import DateTime + + from agentkit.bitable.ingestion.database import infer_field_type + + assert infer_field_type(DateTime()) == "date" + + +def test_db_type_mapping_unknown_fallback() -> None: + """Unknown type → 'text' (safe fallback).""" + from agentkit.bitable.ingestion.database import infer_field_type + + class CustomType: + pass + + assert infer_field_type(CustomType()) == "text" + + +# --------------------------------------------------------------------------- +# API collector transform +# --------------------------------------------------------------------------- + + +def test_transform_records_basic() -> None: + """transform_records maps source keys to field IDs.""" + from agentkit.bitable.ingestion.api_collector import transform_records + + result = transform_records( + records=[{"name": "Alice", "age": 30, "extra": "dropped"}], + field_mapping={"name": "fld_abc", "age": "fld_def"}, + ) + assert result == [{"fld_abc": "Alice", "fld_def": 30}] + + +def test_transform_records_empty() -> None: + """Empty records → empty result.""" + from agentkit.bitable.ingestion.api_collector import transform_records + + assert transform_records([], {"a": "b"}) == [] + assert transform_records([{"a": 1}], {}) == [] + + +def test_transform_records_missing_keys() -> None: + """Source keys not in mapping are silently dropped.""" + from agentkit.bitable.ingestion.api_collector import transform_records + + result = transform_records( + records=[{"a": 1, "b": 2}], + field_mapping={"a": "fld_a"}, # b is not mapped + ) + assert result == [{"fld_a": 1}] diff --git a/tests/unit/bitable/test_cli.py b/tests/unit/bitable/test_cli.py new file mode 100644 index 0000000..5c77b4a --- /dev/null +++ b/tests/unit/bitable/test_cli.py @@ -0,0 +1,205 @@ +"""Tests for U7: bitable CLI subcommands. + +Requires PostgreSQL — marked ``postgres``. Uses Typer's CliRunner. +""" + +from __future__ import annotations + +import os +from pathlib import Path + +import pytest +from typer.testing import CliRunner + +from agentkit.cli.bitable import bitable_app + +pytestmark = pytest.mark.postgres + +runner = CliRunner() + + +@pytest.fixture +def db_env(monkeypatch: pytest.MonkeyPatch) -> None: + """Ensure DATABASE_URL is set for CLI tests.""" + url = os.environ.get("DATABASE_URL") or os.environ.get("AGENTKIT_DATABASE_URL") + if not url: + pytest.skip("PostgreSQL not available (set DATABASE_URL)") + monkeypatch.setenv("DATABASE_URL", url) + + +@pytest.fixture +def clean_schema(monkeypatch: pytest.MonkeyPatch) -> None: + """Drop and recreate bitable schema before each test.""" + import asyncio + + url = os.environ.get("DATABASE_URL") or os.environ.get("AGENTKIT_DATABASE_URL") + if not url: + pytest.skip("PostgreSQL not available") + + from agentkit.bitable.db import BitableDB + from sqlalchemy import text + + async def _clean(): + db = BitableDB() + await db.init() + async with db.engine.begin() as conn: + await conn.execute(text("DROP SCHEMA IF EXISTS bitable CASCADE")) + await db.init() + await db.close() + + asyncio.run(_clean()) + + +# --------------------------------------------------------------------------- +# list-tables +# --------------------------------------------------------------------------- + + +def test_list_tables_empty(db_env, clean_schema) -> None: + result = runner.invoke(bitable_app, ["list-tables"]) + assert result.exit_code == 0 + assert "No tables found" in result.output + + +def test_list_tables_after_create(db_env, clean_schema) -> None: + # Create a table first + runner.invoke(bitable_app, ["create-table", "--name", "TestTable"]) + result = runner.invoke(bitable_app, ["list-tables"]) + assert result.exit_code == 0 + assert "TestTable" in result.output + + +# --------------------------------------------------------------------------- +# create-table +# --------------------------------------------------------------------------- + + +def test_create_table_success(db_env, clean_schema) -> None: + result = runner.invoke( + bitable_app, + ["create-table", "--name", "MyTable", "--description", "A test table"], + ) + assert result.exit_code == 0 + assert "Created table" in result.output + assert "MyTable" in result.output + assert "A test table" in result.output + + +def test_create_table_minimal(db_env, clean_schema) -> None: + result = runner.invoke(bitable_app, ["create-table", "--name", "Minimal"]) + assert result.exit_code == 0 + assert "Minimal" in result.output + + +# --------------------------------------------------------------------------- +# query +# --------------------------------------------------------------------------- + + +def test_query_table_not_found(db_env, clean_schema) -> None: + result = runner.invoke(bitable_app, ["query", "--table", "nonexistent-id"]) + assert result.exit_code == 1 + assert "not found" in result.output + + +def test_query_empty_table(db_env, clean_schema) -> None: + # Create a table first + create_result = runner.invoke(bitable_app, ["create-table", "--name", "Empty"]) + assert create_result.exit_code == 0 + # Extract table ID from output — it's on the "ID:" line + lines = create_result.output.split("\n") + table_id = None + for line in lines: + if "ID:" in line: + # Extract the cyan-colored ID + table_id = line.split("ID:")[1].strip().strip("[]").split(" ")[0] + # Remove rich formatting + table_id = table_id.replace("[cyan]", "").replace("[/cyan]", "") + break + assert table_id is not None, f"Could not extract table ID from: {create_result.output}" + + result = runner.invoke(bitable_app, ["query", "--table", table_id]) + assert result.exit_code == 0 + assert "No records found" in result.output + + +def test_query_with_records(db_env, clean_schema) -> None: + """Create table + field + records via service, then query via CLI.""" + import asyncio + + from agentkit.bitable.db import BitableDB + from agentkit.bitable.models import FieldType + from agentkit.bitable.service import BitableService + + async def _setup(): + db = BitableDB() + await db.init() + service = BitableService(db) + table = await service.create_table(name="Data") + field = await service.create_field( + table_id=table.id, name="name", field_type=FieldType.text + ) + await service.create_record(table_id=table.id, values={field.id: "Alice"}) + await service.create_record(table_id=table.id, values={field.id: "Bob"}) + await db.close() + return table.id + + table_id = asyncio.run(_setup()) + + result = runner.invoke(bitable_app, ["query", "--table", table_id, "--limit", "10"]) + assert result.exit_code == 0 + assert "Alice" in result.output + assert "Bob" in result.output + + +# --------------------------------------------------------------------------- +# import-excel +# --------------------------------------------------------------------------- + + +def test_import_excel_file_not_found(db_env, clean_schema) -> None: + result = runner.invoke( + bitable_app, + ["import-excel", "--file", "/nonexistent/file.xlsx"], + ) + assert result.exit_code == 1 + assert "not found" in result.output + + +def test_import_excel_success(db_env, clean_schema, tmp_path: Path) -> None: + """Create a real xlsx file and import it.""" + from openpyxl import Workbook + + wb = Workbook() + ws = wb.active + ws.title = "Sheet1" + ws.append(["name", "age"]) + ws.append(["Alice", 30]) + ws.append(["Bob", 25]) + xlsx_path = tmp_path / "test.xlsx" + wb.save(xlsx_path) + + result = runner.invoke( + bitable_app, + ["import-excel", "--file", str(xlsx_path), "--table", "Imported"], + ) + assert result.exit_code == 0 + assert "Created table" in result.output + assert "Imported" in result.output + assert "Imported 2 records" in result.output + assert "name" in result.output + assert "age" in result.output + + +# --------------------------------------------------------------------------- +# Error path: no DATABASE_URL +# --------------------------------------------------------------------------- + + +def test_no_database_url(monkeypatch: pytest.MonkeyPatch) -> None: + """CLI should exit with clear error when DATABASE_URL is not set.""" + monkeypatch.delenv("DATABASE_URL", raising=False) + monkeypatch.delenv("AGENTKIT_DATABASE_URL", raising=False) + result = runner.invoke(bitable_app, ["list-tables"]) + assert result.exit_code == 1 + assert "DATABASE_URL" in result.output diff --git a/tests/unit/bitable/test_db.py b/tests/unit/bitable/test_db.py new file mode 100644 index 0000000..41dc6ba --- /dev/null +++ b/tests/unit/bitable/test_db.py @@ -0,0 +1,246 @@ +"""Tests for bitable DB initialization, schema, and constraints (U1). + +Requires PostgreSQL — marked ``postgres``. Skips automatically when +``DATABASE_URL`` / ``AGENTKIT_DATABASE_URL`` is unset (see conftest.py). +""" + +from __future__ import annotations + +import pytest + +pytestmark = pytest.mark.postgres + + +# --------------------------------------------------------------------------- +# init_bitable_db / BitableDB.init +# --------------------------------------------------------------------------- + + +async def test_init_creates_schema_and_all_tables(bitable_db) -> None: + """init creates the bitable schema and all 6 tables.""" + from sqlalchemy import text + + async with bitable_db.engine.begin() as conn: + # Schema exists + result = await conn.execute( + text( + "SELECT schema_name FROM information_schema.schemata WHERE schema_name = 'bitable'" + ) + ) + assert result.fetchone() is not None + + # All 6 tables present + result = await conn.execute( + text( + "SELECT table_name FROM information_schema.tables " + "WHERE table_schema = 'bitable' ORDER BY table_name" + ) + ) + tables = {row[0] for row in result.fetchall()} + assert tables == { + "bitable_fields", + "bitable_meta", + "bitable_records", + "bitable_recalc_queue", + "bitable_tables", + "bitable_views", + } + + +async def test_init_is_idempotent(bitable_db) -> None: + """Calling init() twice does not raise and keeps schema intact.""" + # bitable_db fixture already called init(); call again + await bitable_db.init() + await bitable_db.init() # third time also fine + + from sqlalchemy import text + + async with bitable_db.engine.begin() as conn: + result = await conn.execute(text("SELECT COUNT(*) FROM bitable.bitable_meta")) + assert result.fetchone()[0] >= 1 + + +async def test_schema_version_recorded_in_meta(bitable_db) -> None: + """bitable_meta stores the current schema version.""" + from agentkit.bitable.db import _META_SCHEMA_VERSION_KEY, _SCHEMA_VERSION + + from sqlalchemy import text + + async with bitable_db.engine.begin() as conn: + result = await conn.execute( + text("SELECT value FROM bitable.bitable_meta WHERE key = :key"), + {"key": _META_SCHEMA_VERSION_KEY}, + ) + row = result.fetchone() + assert row is not None + assert int(row[0]) == _SCHEMA_VERSION + + +# --------------------------------------------------------------------------- +# Constraints +# --------------------------------------------------------------------------- + + +async def test_recalc_queue_unique_record_field(bitable_db) -> None: + """Recalc queue enforces (record_id, field_id) uniqueness — dedup.""" + from agentkit.bitable.models import FieldType + from agentkit.bitable.repository import BitableRepository + + repo = BitableRepository(bitable_db) + table = await repo.create_table(name="T") + field = await repo.create_field(table_id=table.id, name="f", field_type=FieldType.text) + record = await repo.create_record(table_id=table.id) + + # First enqueue succeeds + task1 = await repo.enqueue_recalc(table.id, record.id, field.id) + assert task1 is not None + + # Second enqueue is a no-op (ON CONFLICT DO NOTHING) — returns None + task2 = await repo.enqueue_recalc(table.id, record.id, field.id) + assert task2 is None + + +async def test_recalc_queue_status_index_exists(bitable_db) -> None: + """The (status, queued_at) index exists for worker consumption.""" + from sqlalchemy import text + + async with bitable_db.engine.begin() as conn: + result = await conn.execute( + text( + "SELECT indexname FROM pg_indexes " + "WHERE schemaname = 'bitable' AND tablename = 'bitable_recalc_queue'" + ) + ) + indexes = {row[0] for row in result.fetchall()} + assert "ix_recalc_status_queued" in indexes + assert "uq_recalc_record_field" in indexes + + +async def test_records_values_gin_index_exists(bitable_db) -> None: + """GIN index on records.values exists for JSONB key lookups.""" + from sqlalchemy import text + + async with bitable_db.engine.begin() as conn: + result = await conn.execute( + text( + "SELECT indexname FROM pg_indexes " + "WHERE schemaname = 'bitable' AND tablename = 'bitable_records'" + ) + ) + indexes = {row[0] for row in result.fetchall()} + assert "ix_bitable_records_values_gin" in indexes + + +# --------------------------------------------------------------------------- +# Repository CRUD smoke (verifies schema is usable end-to-end) +# --------------------------------------------------------------------------- + + +async def test_repository_crud_round_trip(bitable_db) -> None: + """Repository can create/get/list/delete across all entities.""" + from agentkit.bitable.models import FieldOwner, FieldType, ViewType + from agentkit.bitable.repository import BitableRepository + + repo = BitableRepository(bitable_db) + + # Table + table = await repo.create_table(name="Orders", description="desc") + assert table.name == "Orders" + fetched = await repo.get_table(table.id) + assert fetched is not None and fetched.id == table.id + + # Field + field = await repo.create_field( + table_id=table.id, + name="Amount", + field_type=FieldType.number, + owner=FieldOwner.agent, + ) + fields = await repo.list_fields(table.id) + assert len(fields) == 1 + assert fields[0].id == field.id + + # Record + record = await repo.create_record(table_id=table.id, values={field.id: 42}) + fetched_rec = await repo.get_record(record.id) + assert fetched_rec is not None + assert fetched_rec.values[field.id] == 42 + + # Cursor pagination + rec2 = await repo.create_record(table_id=table.id, values={field.id: 99}) + records, next_cursor = await repo.list_records(table.id, limit=1) + assert len(records) == 1 + assert next_cursor is not None + records2, next_cursor2 = await repo.list_records(table.id, cursor=next_cursor, limit=1) + assert len(records2) == 1 + # The second page should be the other record + assert {records[0].id, records2[0].id} == {record.id, rec2.id} + + # View + view = await repo.create_view(table_id=table.id, name="All", view_type=ViewType.grid) + views = await repo.list_views(table.id) + assert len(views) == 1 and views[0].id == view.id + + # Delete cascades + deleted = await repo.delete_table(table.id) + assert deleted is True + assert await repo.get_table(table.id) is None + assert await repo.get_field(field.id) is None + assert await repo.get_record(record.id) is None + assert (await repo.list_views(table.id)) == [] + + +# --------------------------------------------------------------------------- +# Crash recovery +# --------------------------------------------------------------------------- + + +async def test_reset_stale_recalc_tasks(bitable_db) -> None: + """reset_stale_recalc_tasks flips 'calculating' back to 'pending'.""" + from agentkit.bitable.models import FieldType, RecalcStatus + from agentkit.bitable.repository import BitableRepository + + repo = BitableRepository(bitable_db) + table = await repo.create_table(name="T") + field = await repo.create_field(table_id=table.id, name="f", field_type=FieldType.text) + record = await repo.create_record(table_id=table.id) + + task = await repo.enqueue_recalc(table.id, record.id, field.id) + assert task is not None + + # Simulate a worker crash mid-calculation + await repo.update_recalc_status(task.id, RecalcStatus.calculating) + + reset_count = await repo.reset_stale_recalc_tasks() + assert reset_count == 1 + + pending = await repo.get_pending_recalc_tasks() + assert any(t.id == task.id for t in pending) + + +# --------------------------------------------------------------------------- +# Degradation (no PG) +# --------------------------------------------------------------------------- + + +async def test_bitable_db_without_url_raises() -> None: + """BitableDB with no URL raises RuntimeError on init (not silently None).""" + # Clear env vars for this test to ensure no URL resolution + import os + + saved = ( + os.environ.pop("DATABASE_URL", None), + os.environ.pop("AGENTKIT_DATABASE_URL", None), + ) + try: + from agentkit.bitable.db import BitableDB + + db = BitableDB(database_url=None) + # _database_url is None because no arg and no env + assert db.database_url is None + with pytest.raises(RuntimeError, match="No database URL"): + await db.init() + finally: + for key, val in zip(("DATABASE_URL", "AGENTKIT_DATABASE_URL"), saved): + if val is not None: + os.environ[key] = val diff --git a/tests/unit/bitable/test_formula_engine.py b/tests/unit/bitable/test_formula_engine.py new file mode 100644 index 0000000..1179b99 --- /dev/null +++ b/tests/unit/bitable/test_formula_engine.py @@ -0,0 +1,211 @@ +"""Tests for the formula engine — DAG, cycle detection, evaluation. + +Covers: topological sort, circular reference detection, aggregate vs row +context, formula-to-formula dependencies, and the built-in function library. +""" + +from __future__ import annotations + +import pytest + +from agentkit.bitable.formula.engine import ( + CircularReferenceError, + FormulaEngine, +) + + +# --------------------------------------------------------------------------- +# Basic evaluation +# --------------------------------------------------------------------------- + + +def test_engine_evaluate_simple_arithmetic() -> None: + """=1+2*3 → 7""" + engine = FormulaEngine() + engine.add_formula("calc", "=1+2*3") + result = engine.evaluate("calc", row_values={}) + assert result == 7 + + +def test_engine_evaluate_row_reference() -> None: + """={f1} + {f2} → row-level sum""" + engine = FormulaEngine() + engine.add_formula("sum", "={f1} + {f2}") + result = engine.evaluate("sum", row_values={"f1": 10, "f2": 20}) + assert result == 30 + + +def test_engine_evaluate_aggregate_sum() -> None: + """=SUM({f1}) → aggregate sum of column""" + engine = FormulaEngine() + engine.add_formula("total", "=SUM({f1})") + result = engine.evaluate("total", row_values={}, column_values={"f1": [1, 2, 3]}) + assert result == 6 + + +def test_engine_evaluate_aggregate_avg() -> None: + """=AVG({f1}) → average of column""" + engine = FormulaEngine() + engine.add_formula("avg", "=AVG({f1})") + result = engine.evaluate("avg", row_values={}, column_values={"f1": [10, 20, 30]}) + assert result == 20.0 + + +def test_engine_evaluate_aggregate_count() -> None: + """=COUNT({f1}) → count of non-empty values""" + engine = FormulaEngine() + engine.add_formula("cnt", "=COUNT({f1})") + result = engine.evaluate("cnt", row_values={}, column_values={"f1": [1, None, 3, "", 5]}) + assert result == 3 # None and "" are ignored + + +def test_engine_evaluate_mixed_aggregate_and_row() -> None: + """={f1} + SUM({f2}) → row f1 + column f2 sum""" + engine = FormulaEngine() + engine.add_formula("mixed", "={f1} + SUM({f2})") + result = engine.evaluate("mixed", row_values={"f1": 10}, column_values={"f2": [1, 2, 3]}) + assert result == 16 # 10 + 6 + + +def test_engine_evaluate_concat() -> None: + """=CONCAT({f1}, "-", {f2}) → string concat""" + engine = FormulaEngine() + engine.add_formula("label", '=CONCAT({f1}, "-", {f2})') + result = engine.evaluate("label", row_values={"f1": "a", "f2": "b"}) + assert result == "a-b" + + +def test_engine_evaluate_if_function() -> None: + """=IF({f1} > 5, "big", "small")""" + engine = FormulaEngine() + engine.add_formula("size", '=IF({f1} > 5, "big", "small")') + assert engine.evaluate("size", row_values={"f1": 10}) == "big" + assert engine.evaluate("size", row_values={"f1": 3}) == "small" + + +def test_engine_evaluate_min_max() -> None: + engine = FormulaEngine() + engine.add_formula("mn", "=MIN({f1})") + engine.add_formula("mx", "=MAX({f1})") + cols = {"f1": [3, 1, 4, 1, 5, 9, 2, 6]} + assert engine.evaluate("mn", {}, cols) == 1 + assert engine.evaluate("mx", {}, cols) == 9 + + +# --------------------------------------------------------------------------- +# DAG: dependencies and dependents +# --------------------------------------------------------------------------- + + +def test_engine_get_dependencies() -> None: + engine = FormulaEngine() + engine.add_formula("c", "={a} + {b}") + assert engine.get_dependencies("c") == {"a", "b"} + assert engine.get_dependents("a") == {"c"} + assert engine.get_dependents("b") == {"c"} + + +def test_engine_topological_order() -> None: + """c depends on b, b depends on a → order: a, b, c""" + engine = FormulaEngine() + engine.add_formula("c", "={b} + 1") + engine.add_formula("b", "={a} + 1") + engine.add_formula("a", "=1") + order = engine.topological_order() + assert order.index("a") < order.index("b") + assert order.index("b") < order.index("c") + + +def test_engine_evaluate_all_for_record() -> None: + """Formula-to-formula dependency: c = b + 1, b = a + 1, a = 5 → c = 7""" + engine = FormulaEngine() + engine.add_formula("a", "=5") + engine.add_formula("b", "={a} + 1") + engine.add_formula("c", "={b} + 1") + results = engine.evaluate_all_for_record(row_values={}) + assert results["a"] == 5 + assert results["b"] == 6 + assert results["c"] == 7 + + +# --------------------------------------------------------------------------- +# Circular reference detection +# --------------------------------------------------------------------------- + + +def test_circular_reference_detected() -> None: + """f1 = f2 + 1, f2 = f1 + 1 → CircularReferenceError""" + engine = FormulaEngine() + engine.add_formula("f1", "={f2} + 1") + with pytest.raises(CircularReferenceError): + engine.add_formula("f2", "={f1} + 1") + + +def test_circular_reference_rollback() -> None: + """When cycle is detected, the formula is not added (rollback).""" + engine = FormulaEngine() + engine.add_formula("f1", "={f2} + 1") + with pytest.raises(CircularReferenceError): + engine.add_formula("f2", "={f1} + 1") + # f2 should not be in the engine + assert "f2" not in engine._formulas + assert "f2" not in engine._dag + + +def test_self_reference_detected() -> None: + """f1 = f1 + 1 → CircularReferenceError""" + engine = FormulaEngine() + with pytest.raises(CircularReferenceError): + engine.add_formula("f1", "={f1} + 1") + + +def test_remove_formula_breaks_cycle() -> None: + """Remove a formula, then the cycle can be broken.""" + engine = FormulaEngine() + engine.add_formula("f1", "={f2} + 1") + # Can't add f2 = f1 + 1 (cycle) + with pytest.raises(CircularReferenceError): + engine.add_formula("f2", "={f1} + 1") + # Remove f1, now f2 can be added standalone + engine.remove_formula("f1") + engine.add_formula("f2", "=42") + assert engine.evaluate("f2", {}) == 42 + + +# --------------------------------------------------------------------------- +# Edge cases +# --------------------------------------------------------------------------- + + +def test_evaluate_missing_field_value_is_none() -> None: + """Missing field values are None — arithmetic on None raises TypeError.""" + engine = FormulaEngine() + engine.add_formula("calc", "={missing_field} + 1") + # The engine passes None for missing fields (row_values.get returns None) + with pytest.raises(TypeError): + engine.evaluate("calc", row_values={}) + + +def test_aggregate_ignores_none_and_empty() -> None: + """SUM ignores None and empty string values.""" + engine = FormulaEngine() + engine.add_formula("total", "=SUM({f1})") + result = engine.evaluate("total", row_values={}, column_values={"f1": [1, None, 2, "", 3]}) + assert result == 6 + + +def test_division_by_zero_returns_error_in_evaluate_all() -> None: + """Division by zero is caught in evaluate_all_for_record, returns error dict.""" + engine = FormulaEngine() + engine.add_formula("calc", "={f1} / 0") + results = engine.evaluate_all_for_record(row_values={"f1": 10}) + assert "__error" in results["calc"] + + +def test_engine_with_uuid_field_ids() -> None: + """Field IDs with hyphens (UUIDs) work correctly.""" + fid = "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + engine = FormulaEngine() + engine.add_formula("calc", f"={{{fid}}} * 2") + result = engine.evaluate("calc", row_values={fid: 21}) + assert result == 42 diff --git a/tests/unit/bitable/test_formula_parser.py b/tests/unit/bitable/test_formula_parser.py new file mode 100644 index 0000000..a87df4c --- /dev/null +++ b/tests/unit/bitable/test_formula_parser.py @@ -0,0 +1,199 @@ +"""Tests for the formula parser (KTD7 security + parsing). + +Test-first per U3 execution note: parser, security constraints, and cycle +detection tests are written before the engine/recalc worker. +""" + +from __future__ import annotations + +import pytest + +from agentkit.bitable.formula.parser import ( + FormulaParseError, + FormulaSecurityError, + UnknownFunctionError, + evaluate_ast, + parse_formula, +) + +ALLOWED = {"SUM", "AVG", "COUNT", "MIN", "MAX", "ABS", "ROUND", "IF", "LEN", "CONCAT"} + + +# --------------------------------------------------------------------------- +# Parsing happy paths +# --------------------------------------------------------------------------- + + +def test_parse_simple_arithmetic() -> None: + tree, mapping = parse_formula("=1+2*3", ALLOWED) + assert mapping == {} + result = evaluate_ast(tree, {}, {}) + assert result == 7 + + +def test_parse_strips_equals_prefix() -> None: + tree1, _ = parse_formula("=1+1", ALLOWED) + tree2, _ = parse_formula("1+1", ALLOWED) + assert evaluate_ast(tree1, {}, {}) == evaluate_ast(tree2, {}, {}) == 2 + + +def test_parse_field_reference() -> None: + tree, mapping = parse_formula("={field_abc} + 1", ALLOWED) + assert "field_abc" in mapping.values() + # Safe name is prefixed with _f_ + safe_name = next(k for k, v in mapping.items() if v == "field_abc") + result = evaluate_ast(tree, {safe_name: 41}, {}) + assert result == 42 + + +def test_parse_uuid_field_reference() -> None: + """Field IDs are UUIDs with hyphens — must be substituted to safe names.""" + fid = "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + tree, mapping = parse_formula(f"={{{fid}}} * 2", ALLOWED) + # The mapping should have a safe name → original UUID + assert fid in mapping.values() + # Evaluate using the safe name (prefixed with _f_) + safe_name = next(k for k, v in mapping.items() if v == fid) + assert safe_name.startswith("_f_") + result = evaluate_ast(tree, {safe_name: 21}, {}) + assert result == 42 + + +def test_parse_string_concatenation() -> None: + tree, _ = parse_formula('="hello" + " " + "world"', ALLOWED) + assert evaluate_ast(tree, {}, {}) == "hello world" + + +def test_parse_conditional_ifexp() -> None: + tree, _ = parse_formula("=1 if True else 2", ALLOWED) + assert evaluate_ast(tree, {}, {}) == 1 + + +def test_parse_comparison() -> None: + tree, mapping = parse_formula("={f} > 5", ALLOWED) + safe_name = next(k for k, v in mapping.items() if v == "f") + assert evaluate_ast(tree, {safe_name: 10}, {}) is True + assert evaluate_ast(tree, {safe_name: 3}, {}) is False + + +def test_parse_boolean_ops() -> None: + tree, _ = parse_formula("=True and False", ALLOWED) + assert evaluate_ast(tree, {}, {}) is False + tree2, _ = parse_formula("=True or False", ALLOWED) + assert evaluate_ast(tree2, {}, {}) is True + + +# --------------------------------------------------------------------------- +# Function calls +# --------------------------------------------------------------------------- + + +def test_parse_function_call_sum() -> None: + tree, mapping = parse_formula("=SUM({f1})", ALLOWED) + safe_name = next(k for k, v in mapping.items() if v == "f1") + result = evaluate_ast(tree, {safe_name: [1, 2, 3]}, {"SUM": sum}) + assert result == 6 + + +def test_parse_function_call_concat() -> None: + tree, mapping = parse_formula('=CONCAT({f1}, "-", {f2})', ALLOWED) + safe_f1 = next(k for k, v in mapping.items() if v == "f1") + safe_f2 = next(k for k, v in mapping.items() if v == "f2") + result = evaluate_ast( + tree, {safe_f1: "a", safe_f2: "b"}, {"CONCAT": lambda *a: "".join(str(x) for x in a)} + ) + assert result == "a-b" + + +def test_parse_nested_function_calls() -> None: + tree, _ = parse_formula("=ABS(-5) + ROUND(3.7, 0)", ALLOWED) + funcs = {"ABS": abs, "ROUND": round} + result = evaluate_ast(tree, {}, funcs) + assert result == 9 # 5 + 4 + + +# --------------------------------------------------------------------------- +# KTD7 Security — disallowed nodes +# --------------------------------------------------------------------------- + + +def test_security_rejects_attribute_access() -> None: + """__import__('os') is rejected — it's a Call to an unregistered function. + (Attribute access like os.system would be caught by the Attribute node check, + but __import__ is caught earlier as an unknown function.)""" + with pytest.raises((FormulaSecurityError, UnknownFunctionError)): + parse_formula("=__import__('os')", ALLOWED) + + +def test_security_rejects_attribute_chain() -> None: + """Attribute access like ''.join([]) is rejected by the Attribute node check.""" + with pytest.raises(FormulaSecurityError): + parse_formula("=''.join([])", ALLOWED) + + +def test_security_rejects_lambda() -> None: + with pytest.raises(FormulaSecurityError): + parse_formula("=(lambda: 1)()", ALLOWED) + + +def test_security_rejects_subscript() -> None: + with pytest.raises(FormulaSecurityError): + parse_formula("=[1,2,3][0]", ALLOWED) + + +def test_security_rejects_assignment() -> None: + """Assignment is a statement, not an expression — rejected at parse stage.""" + with pytest.raises((FormulaSecurityError, FormulaParseError)): + parse_formula("=x = 1", ALLOWED) + + +def test_unknown_function_rejected() -> None: + with pytest.raises(UnknownFunctionError): + parse_formula("=UNKNOWN(1)", ALLOWED) + + +def test_eval_function_rejected_if_not_registered() -> None: + """eval is not in the registry → UnknownFunctionError.""" + with pytest.raises(UnknownFunctionError): + parse_formula("=eval('1+1')", ALLOWED) + + +# --------------------------------------------------------------------------- +# Error paths +# --------------------------------------------------------------------------- + + +def test_parse_error_unbalanced_parens() -> None: + with pytest.raises(FormulaParseError): + parse_formula("=(1+2", ALLOWED) + + +def test_parse_error_empty_formula() -> None: + with pytest.raises(FormulaParseError): + parse_formula("=", ALLOWED) + + +def test_parse_error_empty_string() -> None: + with pytest.raises(FormulaParseError): + parse_formula("", ALLOWED) + + +def test_evaluate_unknown_field_ref_raises() -> None: + tree, _ = parse_formula("={nonexistent} + 1", ALLOWED) + with pytest.raises(FormulaParseError, match="Unknown field reference"): + evaluate_ast(tree, {}, {}) + + +# --------------------------------------------------------------------------- +# Mixed aggregate + row context +# --------------------------------------------------------------------------- + + +def test_mixed_aggregate_and_row_context() -> None: + """={f1} + SUM({f2}) — row f1 + column f2 sum.""" + tree, mapping = parse_formula("={f1} + SUM({f2})", ALLOWED) + safe_f1 = next(k for k, v in mapping.items() if v == "f1") + safe_f2 = next(k for k, v in mapping.items() if v == "f2") + # f1 is a row value (scalar), f2 is a column value (list) + result = evaluate_ast(tree, {safe_f1: 10, safe_f2: [1, 2, 3]}, {"SUM": sum}) + assert result == 16 # 10 + 6 diff --git a/tests/unit/bitable/test_ingestion_excel.py b/tests/unit/bitable/test_ingestion_excel.py new file mode 100644 index 0000000..6710022 --- /dev/null +++ b/tests/unit/bitable/test_ingestion_excel.py @@ -0,0 +1,182 @@ +"""Tests for Excel ingestion (U4). + +Tests parse_excel_bytes with in-memory .xlsx files created via openpyxl. +No PostgreSQL required — these are pure parsing tests. +""" + +from __future__ import annotations + +import io +from datetime import datetime + +import pytest + +from agentkit.bitable.ingestion.excel import parse_excel_bytes + +pytestmark = pytest.mark.postgres # Reuse the same PG test group for consistency + + +def _make_xlsx( + sheets: dict[str, list[list]], +) -> bytes: + """Create an in-memory .xlsx file from sheet data. + + Args: + sheets: {sheet_name: [[row1_col1, row1_col2], [row2_col1, ...]]} + """ + from openpyxl import Workbook + + wb = Workbook() + # Remove default sheet + wb.remove(wb.active) + for name, rows in sheets.items(): + ws = wb.create_sheet(title=name) + for row in rows: + ws.append(row) + buf = io.BytesIO() + wb.save(buf) + return buf.getvalue() + + +# --------------------------------------------------------------------------- +# Happy path: basic parsing +# --------------------------------------------------------------------------- + + +def test_parse_simple_sheet() -> None: + """One sheet with header + 2 data rows → correct columns, types, records.""" + xlsx = _make_xlsx( + { + "Sheet1": [ + ["name", "age", "city"], + ["Alice", 30, "NYC"], + ["Bob", 25, "LA"], + ] + } + ) + sheets = parse_excel_bytes(xlsx) + assert len(sheets) == 1 + sheet = sheets[0] + assert sheet.name == "Sheet1" + assert sheet.columns == ["name", "age", "city"] + assert sheet.field_types == ["text", "number", "text"] + assert len(sheet.records) == 2 + assert sheet.records[0] == {"name": "Alice", "age": 30, "city": "NYC"} + assert sheet.records[1] == {"name": "Bob", "age": 25, "city": "LA"} + + +def test_parse_multiple_sheets() -> None: + """Multiple sheets → multiple ParsedSheet objects.""" + xlsx = _make_xlsx( + { + "Users": [["id", "name"], [1, "Alice"]], + "Orders": [["order_id", "amount"], [101, 99.9]], + } + ) + sheets = parse_excel_bytes(xlsx) + assert len(sheets) == 2 + assert sheets[0].name == "Users" + assert sheets[1].name == "Orders" + assert sheets[1].records[0]["amount"] == 99.9 + + +# --------------------------------------------------------------------------- +# Type inference +# --------------------------------------------------------------------------- + + +def test_type_inference_all_number() -> None: + """Column with all integers → 'number'.""" + xlsx = _make_xlsx({"S": [["val"], [1], [2], [3]]}) + sheets = parse_excel_bytes(xlsx) + assert sheets[0].field_types == ["number"] + + +def test_type_inference_mixed_text_number() -> None: + """Column with mixed text and number → 'text'.""" + xlsx = _make_xlsx({"S": [["val"], [1], ["two"], [3]]}) + sheets = parse_excel_bytes(xlsx) + assert sheets[0].field_types == ["text"] + + +def test_type_inference_date_column() -> None: + """Column with all datetime values → 'date'.""" + xlsx = _make_xlsx({"S": [["when"], [datetime(2024, 1, 1)], [datetime(2024, 6, 15)]]}) + sheets = parse_excel_bytes(xlsx) + assert sheets[0].field_types == ["date"] + assert "2024-01-01" in sheets[0].records[0]["when"] + + +def test_type_inference_empty_column() -> None: + """Column with no values → 'text' (safe default).""" + xlsx = _make_xlsx({"S": [["a", "b"], [1, None], [2, None]]}) + sheets = parse_excel_bytes(xlsx) + assert sheets[0].field_types == ["number", "text"] + + +# --------------------------------------------------------------------------- +# Edge cases +# --------------------------------------------------------------------------- + + +def test_empty_sheet_skipped() -> None: + """Completely empty sheet → not included in results.""" + xlsx = _make_xlsx({"Empty": [], "Data": [["x"], [1]]}) + sheets = parse_excel_bytes(xlsx) + assert len(sheets) == 1 + assert sheets[0].name == "Data" + + +def test_header_only_no_data_rows() -> None: + """Sheet with only a header row → 0 records, fields still created.""" + xlsx = _make_xlsx({"S": [["name", "age"]]}) + sheets = parse_excel_bytes(xlsx) + assert len(sheets) == 1 + assert sheets[0].columns == ["name", "age"] + assert len(sheets[0].records) == 0 + + +def test_duplicate_headers_deduplicated() -> None: + """Duplicate header names → suffixed with _1, _2, etc.""" + xlsx = _make_xlsx({"S": [["name", "name"], ["Alice", "Bob"]]}) + sheets = parse_excel_bytes(xlsx) + assert sheets[0].columns == ["name", "name_1"] + + +def test_none_header_replaced() -> None: + """None header value → auto-generated column name.""" + xlsx = _make_xlsx({"S": [[None, "real"], [1, 2]]}) + sheets = parse_excel_bytes(xlsx) + assert sheets[0].columns[0] == "col_0" + assert sheets[0].columns[1] == "real" + + +def test_corrupt_file_raises_value_error() -> None: + """Non-xlsx bytes → ValueError with clear message.""" + with pytest.raises(ValueError, match="Failed to parse"): + parse_excel_bytes(b"not an excel file") + + +# --------------------------------------------------------------------------- +# Merged cells (known limitation) +# --------------------------------------------------------------------------- + + +def test_merged_cells_only_top_left_has_value() -> None: + """Merged cell: only top-left has value, others are None (known limitation).""" + from openpyxl import Workbook + + wb = Workbook() + ws = wb.active + ws.title = "Merged" + ws.append(["a", "b", "c"]) + ws.append([1, 2, 3]) + ws.merge_cells("A2:B2") # merge A2:B2 — only A2 has value + buf = io.BytesIO() + wb.save(buf) + + sheets = parse_excel_bytes(buf.getvalue()) + rec = sheets[0].records[0] + # A2 has value 1, B2 is None (merged cell limitation) + assert rec["a"] == 1 + assert rec["b"] is None diff --git a/tests/unit/bitable/test_models.py b/tests/unit/bitable/test_models.py new file mode 100644 index 0000000..cc330db --- /dev/null +++ b/tests/unit/bitable/test_models.py @@ -0,0 +1,303 @@ +"""Tests for bitable Pydantic v2 data models (U1). + +Covers: enum values, round-trip serialization, field config shapes per +field_type, Record.values empty-dict legality, default factories. +""" + +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from pydantic import ValidationError + +from agentkit.bitable.models import ( + Field, + FieldOwner, + FieldType, + Record, + RecalcStatus, + RecalcTask, + Table, + View, + ViewType, +) + + +# --------------------------------------------------------------------------- +# Enums +# --------------------------------------------------------------------------- + + +def test_field_type_values() -> None: + """FieldType has the 9 supported types with correct string values.""" + expected = { + "text", + "number", + "date", + "select", + "multiselect", + "attachment", + "image", + "formula", + "lookup", + } + assert {ft.value for ft in FieldType} == expected + + +def test_field_owner_values() -> None: + """FieldOwner distinguishes agent vs user (drives upsert merge).""" + assert FieldOwner.agent.value == "agent" + assert FieldOwner.user.value == "user" + + +def test_view_type_values() -> None: + """ViewType enumerates the 5 view kinds (v1 only grid is implemented).""" + assert {vt.value for vt in ViewType} == {"grid", "kanban", "gantt", "gallery", "form"} + + +def test_recalc_status_lifecycle() -> None: + """RecalcStatus covers the full recalc lifecycle.""" + assert {rs.value for rs in RecalcStatus} == {"pending", "calculating", "done", "error"} + + +# --------------------------------------------------------------------------- +# Table +# --------------------------------------------------------------------------- + + +def test_table_minimal_construction() -> None: + """Table requires id + name; other fields have defaults.""" + table = Table(id="t1", name="Orders") + assert table.id == "t1" + assert table.name == "Orders" + assert table.description == "" + assert table.primary_key_field_id is None + assert table.owner_user_id is None + assert isinstance(table.created_at, datetime) + assert isinstance(table.updated_at, datetime) + + +def test_table_round_trip() -> None: + """Table serializes to dict and re-parses losslessly.""" + table = Table( + id="t1", + name="Orders", + description="Customer orders", + primary_key_field_id="f_pk", + owner_user_id="u1", + ) + data = table.model_dump(mode="json") + restored = Table.model_validate(data) + assert restored == table + + +def test_table_requires_id_and_name() -> None: + """Table requires id and name (non-optional).""" + with pytest.raises(ValidationError): + Table(name="no id") # type: ignore[call-arg] + with pytest.raises(ValidationError): + Table(id="t1") # type: ignore[call-arg] + + +# --------------------------------------------------------------------------- +# Field +# --------------------------------------------------------------------------- + + +def test_field_text_default_config() -> None: + """Text field has empty config by default and user owner.""" + field = Field(id="f1", table_id="t1", name="Title", field_type=FieldType.text) + assert field.config == {} + assert field.owner == FieldOwner.user + + +def test_field_select_config_shape() -> None: + """Select field config carries options list.""" + field = Field( + id="f1", + table_id="t1", + name="Status", + field_type=FieldType.select, + config={"options": [{"label": "Open", "value": "open"}]}, + owner=FieldOwner.agent, + ) + assert field.config["options"][0]["value"] == "open" + assert field.owner == FieldOwner.agent + + +def test_field_formula_config_shape() -> None: + """Formula field config carries formula_expr.""" + field = Field( + id="f1", + table_id="t1", + name="Total", + field_type=FieldType.formula, + config={"formula_expr": "=SUM({f_price})"}, + ) + assert field.config["formula_expr"] == "=SUM({f_price})" + + +def test_field_lookup_config_shape() -> None: + """Lookup field config carries lookup_target with table/field ids.""" + field = Field( + id="f1", + table_id="t1", + name="Customer Name", + field_type=FieldType.lookup, + config={ + "lookup_target": { + "table_id": "t_customers", + "field_id": "f_name", + "filter_field_id": "f_id", + "filter_value": "cust-123", + } + }, + ) + assert field.config["lookup_target"]["field_id"] == "f_name" + + +def test_field_round_trip() -> None: + """Field serializes and re-parses losslessly across types.""" + field = Field( + id="f1", + table_id="t1", + name="Score", + field_type=FieldType.number, + config={"precision": 2}, + owner=FieldOwner.agent, + ) + restored = Field.model_validate(field.model_dump(mode="json")) + assert restored == field + assert restored.field_type == FieldType.number + assert restored.owner == FieldOwner.agent + + +def test_field_type_accepts_string() -> None: + """FieldType coerces from string (JSON round-trip scenario).""" + field = Field(id="f1", table_id="t1", name="X", field_type="number") # type: ignore[arg-type] + assert field.field_type == FieldType.number + + +# --------------------------------------------------------------------------- +# Record +# --------------------------------------------------------------------------- + + +def test_record_empty_values_allowed() -> None: + """Record.values defaults to empty dict (new row before data entry).""" + record = Record(id="r1", table_id="t1") + assert record.values == {} + + +def test_record_values_round_trip() -> None: + """Record.values (JSONB-shaped dict) round-trips through JSON.""" + record = Record( + id="r1", + table_id="t1", + values={"f_name": "Alice", "f_age": 30, "f_tags": ["a", "b"]}, + ) + restored = Record.model_validate(record.model_dump(mode="json")) + assert restored.values == record.values + assert restored.values["f_tags"] == ["a", "b"] + + +def test_record_values_with_null() -> None: + """Record.values can carry None for unset fields.""" + record = Record(id="r1", table_id="t1", values={"f_name": None}) + assert record.values["f_name"] is None + + +# --------------------------------------------------------------------------- +# View +# --------------------------------------------------------------------------- + + +def test_view_defaults_to_grid() -> None: + """View defaults to grid type with empty config.""" + view = View(id="v1", table_id="t1", name="All") + assert view.view_type == ViewType.grid + assert view.config == {} + + +def test_view_round_trip() -> None: + """View with filter/sort config round-trips.""" + view = View( + id="v1", + table_id="t1", + name="Open only", + view_type=ViewType.grid, + config={ + "filters": [{"field_id": "f_status", "op": "eq", "value": "open"}], + "sorts": [{"field_id": "f_created", "direction": "desc"}], + "hidden_fields": ["f_internal"], + }, + ) + restored = View.model_validate(view.model_dump(mode="json")) + assert restored == view + assert restored.config["filters"][0]["op"] == "eq" + + +# --------------------------------------------------------------------------- +# RecalcTask +# --------------------------------------------------------------------------- + + +def test_recalc_task_defaults() -> None: + """RecalcTask defaults to pending status, no error, no completed_at.""" + task = RecalcTask(id="q1", table_id="t1", record_id="r1", field_id="f1") + assert task.status == RecalcStatus.pending + assert task.error_message is None + assert task.completed_at is None + assert isinstance(task.queued_at, datetime) + + +def test_recalc_task_error_state() -> None: + """RecalcTask in error state carries message and completed_at.""" + task = RecalcTask( + id="q1", + table_id="t1", + record_id="r1", + field_id="f1", + status=RecalcStatus.error, + error_message="division by zero", + completed_at=datetime.now(timezone.utc), + ) + assert task.status == RecalcStatus.error + assert task.error_message == "division by zero" + + +def test_recalc_task_round_trip() -> None: + """RecalcTask round-trips through JSON.""" + task = RecalcTask( + id="q1", + table_id="t1", + record_id="r1", + field_id="f1", + status=RecalcStatus.done, + ) + restored = RecalcTask.model_validate(task.model_dump(mode="json")) + assert restored == task + assert restored.status == RecalcStatus.done + + +# --------------------------------------------------------------------------- +# from_attributes (ORM row compatibility) +# --------------------------------------------------------------------------- + + +def test_table_from_attributes() -> None: + """Table.model_validate accepts an ORM-like object (from_attributes).""" + + class _Row: + id = "t1" + name = "Orders" + description = "desc" + primary_key_field_id = None + owner_user_id = None + created_at = datetime.now(timezone.utc) + updated_at = datetime.now(timezone.utc) + + table = Table.model_validate(_Row()) + assert table.id == "t1" + assert table.name == "Orders" diff --git a/tests/unit/bitable/test_recalc.py b/tests/unit/bitable/test_recalc.py new file mode 100644 index 0000000..154593e --- /dev/null +++ b/tests/unit/bitable/test_recalc.py @@ -0,0 +1,330 @@ +"""Tests for the async recalc pipeline (U3). + +Requires PostgreSQL — marked ``postgres``. Tests the full pipeline: +record write → recalc enqueue → worker processing → formula value written. + +Also covers: crash recovery, deduplication, and error handling. +""" + +from __future__ import annotations + +import asyncio + +import pytest + +from agentkit.bitable.models import FieldOwner, FieldType, RecalcStatus +from agentkit.bitable.recalc_worker import RecalcWorker +from agentkit.bitable.service import BitableService + +pytestmark = pytest.mark.postgres + + +# --------------------------------------------------------------------------- +# Helper: process all pending recalc tasks synchronously +# --------------------------------------------------------------------------- + + +async def _process_all_pending(service: BitableService) -> None: + """Process all pending recalc tasks (for testing without background worker).""" + tasks = await service.get_pending_recalc_tasks(limit=100) + for task in tasks: + await service.process_recalc_task(task) + + +# --------------------------------------------------------------------------- +# Happy path: formula recalc after record write +# --------------------------------------------------------------------------- + + +async def test_recalc_simple_formula_after_create(bitable_service: BitableService) -> None: + """Create a record with data → formula field gets recalculated.""" + table = await bitable_service.create_table(name="T") + src_field = await bitable_service.create_field( + table_id=table.id, name="src", field_type=FieldType.number, owner=FieldOwner.agent + ) + calc_field = await bitable_service.create_field( + table_id=table.id, + name="calc", + field_type=FieldType.formula, + config={"formula_expr": f"={{{src_field.id}}} * 2"}, + ) + + # Create a record — should trigger recalc + record = await bitable_service.create_record(table_id=table.id, values={src_field.id: 21}) + + # Process pending recalc tasks + await _process_all_pending(bitable_service) + + # Verify formula result was written + updated = await bitable_service.get_record(record.id) + assert updated is not None + assert updated.values[calc_field.id] == 42 + + +async def test_recalc_aggregate_formula(bitable_service: BitableService) -> None: + """SUM aggregate formula recalculates correctly across all records.""" + table = await bitable_service.create_table(name="T") + src_field = await bitable_service.create_field( + table_id=table.id, name="amt", field_type=FieldType.number, owner=FieldOwner.agent + ) + total_field = await bitable_service.create_field( + table_id=table.id, + name="total", + field_type=FieldType.formula, + config={"formula_expr": f"=SUM({{{src_field.id}}})"}, + ) + + # Create multiple records + for amt in [10, 20, 30]: + await bitable_service.create_record(table_id=table.id, values={src_field.id: amt}) + + # Process all pending recalc tasks + await _process_all_pending(bitable_service) + + # Each record's total field should be 60 (sum of all) + records, _ = await bitable_service.list_records(table.id) + assert len(records) == 3 + for rec in records: + assert rec.values[total_field.id] == 60 + + +async def test_recalc_after_upsert(bitable_service: BitableService) -> None: + """Upsert triggers recalc for affected formula fields.""" + table = await bitable_service.create_table(name="T") + pk_field = await bitable_service.create_field( + table_id=table.id, name="id", field_type=FieldType.text, owner=FieldOwner.agent + ) + data_field = await bitable_service.create_field( + table_id=table.id, name="data", field_type=FieldType.number, owner=FieldOwner.agent + ) + calc_field = await bitable_service.create_field( + table_id=table.id, + name="doubled", + field_type=FieldType.formula, + config={"formula_expr": f"={{{data_field.id}}} * 2"}, + ) + await bitable_service.update_table(table.id, primary_key_field_id=pk_field.id) + + # Upsert a record + await bitable_service.upsert_records( + table.id, + [{pk_field.id: "r1", data_field.id: 15}], + pk_field.id, + ) + + # Process recalc + await _process_all_pending(bitable_service) + + # Verify formula result + records, _ = await bitable_service.list_records(table.id) + assert len(records) == 1 + assert records[0].values[calc_field.id] == 30 + + +async def test_recalc_formula_chain(bitable_service: BitableService) -> None: + """Formula-to-formula dependency: c = b*2, b = a*2 → c = a*4.""" + table = await bitable_service.create_table(name="T") + src = await bitable_service.create_field( + table_id=table.id, name="a", field_type=FieldType.number, owner=FieldOwner.agent + ) + mid = await bitable_service.create_field( + table_id=table.id, + name="b", + field_type=FieldType.formula, + config={"formula_expr": f"={{{src.id}}} * 2"}, + ) + top = await bitable_service.create_field( + table_id=table.id, + name="c", + field_type=FieldType.formula, + config={"formula_expr": f"={{{mid.id}}} * 2"}, + ) + + await bitable_service.create_record(table_id=table.id, values={src.id: 5}) + + # Process recalc — may need multiple passes for formula chains + # ponytail: The current implementation processes tasks in queue order, not + # topological order. For formula chains, we may need to process twice: + # first pass computes b, second pass computes c (which depends on b). + await _process_all_pending(bitable_service) + await _process_all_pending(bitable_service) + + records, _ = await bitable_service.list_records(table.id) + assert len(records) == 1 + rec = records[0] + assert rec.values[mid.id] == 10 # 5 * 2 + assert rec.values[top.id] == 20 # 10 * 2 + + +# --------------------------------------------------------------------------- +# Crash recovery +# --------------------------------------------------------------------------- + + +async def test_crash_recovery_resets_calculating_tasks( + bitable_service: BitableService, +) -> None: + """Stale 'calculating' tasks are reset to 'pending' on worker start.""" + table = await bitable_service.create_table(name="T") + src = await bitable_service.create_field( + table_id=table.id, name="s", field_type=FieldType.number, owner=FieldOwner.agent + ) + calc = await bitable_service.create_field( + table_id=table.id, + name="c", + field_type=FieldType.formula, + config={"formula_expr": f"={{{src.id}}} + 1"}, + ) + + record = await bitable_service.create_record(table_id=table.id, values={src.id: 10}) + + # create_record already enqueued a recalc task — get it from pending + tasks = await bitable_service.get_pending_recalc_tasks() + assert len(tasks) == 1 + task = tasks[0] + + from agentkit.bitable.repository import BitableRepository + + repo = BitableRepository(bitable_service._db) + await repo.update_recalc_status(task.id, RecalcStatus.calculating) + + # Verify it's stuck in calculating + tasks = await bitable_service.get_pending_recalc_tasks() + assert len(tasks) == 0 # not pending, it's calculating + + # Crash recovery + reset_count = await bitable_service.reset_stale_recalc_tasks() + assert reset_count == 1 + + # Now it should be pending again + tasks = await bitable_service.get_pending_recalc_tasks() + assert len(tasks) == 1 + + # Process it + await _process_all_pending(bitable_service) + + # Verify result + rec = await bitable_service.get_record(record.id) + assert rec is not None + assert rec.values[calc.id] == 11 + + +# --------------------------------------------------------------------------- +# Deduplication +# --------------------------------------------------------------------------- + + +async def test_recalc_deduplication(bitable_service: BitableService) -> None: + """Same (record_id, field_id) enqueued twice → only one task in queue.""" + table = await bitable_service.create_table(name="T") + src = await bitable_service.create_field( + table_id=table.id, name="s", field_type=FieldType.number, owner=FieldOwner.agent + ) + calc = await bitable_service.create_field( + table_id=table.id, + name="c", + field_type=FieldType.formula, + config={"formula_expr": f"={{{src.id}}} + 1"}, + ) + + record = await bitable_service.create_record(table_id=table.id, values={src.id: 10}) + + # The create_record already enqueued one task. Enqueue again manually. + task2 = await bitable_service.trigger_recalc(table.id, record.id, calc.id) + # Should return None (duplicate, ON CONFLICT DO NOTHING) + assert task2 is None + + # Only one pending task + tasks = await bitable_service.get_pending_recalc_tasks() + assert len(tasks) == 1 + + +# --------------------------------------------------------------------------- +# Error handling +# --------------------------------------------------------------------------- + + +async def test_recalc_error_marks_task_as_error(bitable_service: BitableService) -> None: + """Formula with division by zero marks task as error.""" + table = await bitable_service.create_table(name="T") + src = await bitable_service.create_field( + table_id=table.id, name="s", field_type=FieldType.number, owner=FieldOwner.agent + ) + calc = await bitable_service.create_field( + table_id=table.id, + name="c", + field_type=FieldType.formula, + config={"formula_expr": f"={{{src.id}}} / 0"}, + ) + + record = await bitable_service.create_record(table_id=table.id, values={src.id: 10}) + + # Process recalc — should fail with division by zero + await _process_all_pending(bitable_service) + + # Verify task is marked as error + from sqlalchemy import text + + db = bitable_service._db + async with db.session_factory() as session: + result = await session.execute( + text( + "SELECT status, error_message FROM bitable.bitable_recalc_queue " + "WHERE record_id = :rid AND field_id = :fid" + ), + {"rid": record.id, "fid": calc.id}, + ) + row = result.fetchone() + assert row is not None + assert row[0] == RecalcStatus.error.value + assert "division" in row[1].lower() or "zero" in row[1].lower() + + +# --------------------------------------------------------------------------- +# Worker lifecycle +# --------------------------------------------------------------------------- + + +async def test_worker_starts_and_stops(bitable_service: BitableService) -> None: + """RecalcWorker starts and stops gracefully.""" + worker = RecalcWorker(bitable_service._db, bitable_service, poll_interval=0.1) + await worker.start() + assert worker._task is not None + assert worker._reaper_task is not None + + # Let it run briefly + await asyncio.sleep(0.2) + + await worker.stop() + assert worker._task is None + assert worker._reaper_task is None + + +async def test_worker_processes_tasks(bitable_service: BitableService) -> None: + """Background worker picks up and processes recalc tasks.""" + table = await bitable_service.create_table(name="T") + src = await bitable_service.create_field( + table_id=table.id, name="s", field_type=FieldType.number, owner=FieldOwner.agent + ) + calc = await bitable_service.create_field( + table_id=table.id, + name="c", + field_type=FieldType.formula, + config={"formula_expr": f"={{{src.id}}} + 100"}, + ) + + record = await bitable_service.create_record(table_id=table.id, values={src.id: 5}) + + # Start worker — it should pick up the pending task + worker = RecalcWorker(bitable_service._db, bitable_service, poll_interval=0.1) + await worker.start() + + # Wait for worker to process + await asyncio.sleep(1.0) + + await worker.stop() + + # Verify formula was computed + rec = await bitable_service.get_record(record.id) + assert rec is not None + assert rec.values[calc.id] == 105 diff --git a/tests/unit/bitable/test_routes.py b/tests/unit/bitable/test_routes.py new file mode 100644 index 0000000..8b4e09d --- /dev/null +++ b/tests/unit/bitable/test_routes.py @@ -0,0 +1,579 @@ +"""Tests for bitable REST API routes (U2). + +Requires PostgreSQL — marked ``postgres``. Uses ``httpx.AsyncClient`` with +``ASGITransport`` so the async DB engine and the HTTP client share one event +loop (TestClient runs in a separate thread/loop, which breaks asyncpg's +loop-bound connections). +""" + +from __future__ import annotations + +import json +from typing import Any + +import httpx +import pytest +from fastapi import FastAPI +from httpx import ASGITransport + +from agentkit.bitable.service import BitableService +from agentkit.server.routes import bitable as bitable_routes +from agentkit.server.routes.bitable import require_bitable_auth + +pytestmark = pytest.mark.postgres + +TEST_USER_ID = "test-user-id" + + +def _make_test_user() -> dict[str, Any]: + return {"user_id": TEST_USER_ID, "username": "testuser", "role": "member"} + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def app(bitable_service: BitableService) -> FastAPI: + """Test app with bitable_service on app.state and auth bypassed.""" + app = FastAPI() + app.state.bitable_service = bitable_service + app.include_router(bitable_routes.router, prefix="/api/v1") + app.dependency_overrides[require_bitable_auth] = lambda: _make_test_user() + return app + + +@pytest.fixture +def unauth_app(bitable_service: BitableService) -> FastAPI: + """App without auth override — simulates unauthenticated requests.""" + app = FastAPI() + app.state.bitable_service = bitable_service + app.include_router(bitable_routes.router, prefix="/api/v1") + return app + + +@pytest.fixture +def no_service_app() -> FastAPI: + """App without bitable_service on state — simulates uninitialized subsystem.""" + app = FastAPI() + app.include_router(bitable_routes.router, prefix="/api/v1") + app.dependency_overrides[require_bitable_auth] = lambda: _make_test_user() + return app + + +@pytest.fixture +async def client(app: FastAPI) -> httpx.AsyncClient: + """Async HTTP client — shares event loop with async DB fixtures.""" + transport = ASGITransport(app=app) + async with httpx.AsyncClient(transport=transport, base_url="http://test") as c: + yield c + + +@pytest.fixture +async def unauth_client(unauth_app: FastAPI) -> httpx.AsyncClient: + transport = ASGITransport(app=unauth_app) + async with httpx.AsyncClient(transport=transport, base_url="http://test") as c: + yield c + + +@pytest.fixture +async def no_service_client(no_service_app: FastAPI) -> httpx.AsyncClient: + transport = ASGITransport(app=no_service_app) + async with httpx.AsyncClient(transport=transport, base_url="http://test") as c: + yield c + + +# --------------------------------------------------------------------------- +# Auth + service availability +# --------------------------------------------------------------------------- + + +async def test_create_table_requires_auth(unauth_client: httpx.AsyncClient) -> None: + """No auth → 401.""" + resp = await unauth_client.post("/api/v1/bitable/tables", json={"name": "T"}) + assert resp.status_code == 401 + + +async def test_endpoint_returns_503_when_service_unavailable( + no_service_client: httpx.AsyncClient, +) -> None: + """No service on app.state → 503.""" + resp = await no_service_client.post("/api/v1/bitable/tables", json={"name": "T"}) + assert resp.status_code == 503 + + +# --------------------------------------------------------------------------- +# Tables CRUD +# --------------------------------------------------------------------------- + + +async def test_create_table_success(client: httpx.AsyncClient) -> None: + resp = await client.post( + "/api/v1/bitable/tables", json={"name": "Orders", "description": "desc"} + ) + assert resp.status_code == 200 + data = resp.json() + assert data["success"] is True + assert data["table"]["name"] == "Orders" + assert data["table"]["description"] == "desc" + assert "id" in data["table"] + + +async def test_list_tables_returns_created(client: httpx.AsyncClient) -> None: + for name in ("A", "B", "C"): + await client.post("/api/v1/bitable/tables", json={"name": name}) + resp = await client.get("/api/v1/bitable/tables") + assert resp.status_code == 200 + data = resp.json() + assert data["success"] is True + assert len(data["tables"]) == 3 + names = {t["name"] for t in data["tables"]} + assert names == {"A", "B", "C"} + + +async def test_get_table_404_when_missing(client: httpx.AsyncClient) -> None: + resp = await client.get("/api/v1/bitable/tables/nonexistent-id") + assert resp.status_code == 404 + + +async def test_update_table_success(client: httpx.AsyncClient) -> None: + create_resp = await client.post("/api/v1/bitable/tables", json={"name": "Old"}) + table_id = create_resp.json()["table"]["id"] + resp = await client.patch(f"/api/v1/bitable/tables/{table_id}", json={"name": "New"}) + assert resp.status_code == 200 + assert resp.json()["table"]["name"] == "New" + + +async def test_delete_table_success(client: httpx.AsyncClient) -> None: + create_resp = await client.post("/api/v1/bitable/tables", json={"name": "T"}) + table_id = create_resp.json()["table"]["id"] + resp = await client.delete(f"/api/v1/bitable/tables/{table_id}") + assert resp.status_code == 200 + assert resp.json()["success"] is True + # Verify gone + assert (await client.get(f"/api/v1/bitable/tables/{table_id}")).status_code == 404 + + +# --------------------------------------------------------------------------- +# Fields CRUD + dependency check (409) +# --------------------------------------------------------------------------- + + +async def test_create_field_success(client: httpx.AsyncClient) -> None: + table_id = (await client.post("/api/v1/bitable/tables", json={"name": "T"})).json()["table"][ + "id" + ] + resp = await client.post( + f"/api/v1/bitable/tables/{table_id}/fields", + json={"name": "title", "field_type": "text", "owner": "agent"}, + ) + assert resp.status_code == 200 + data = resp.json() + assert data["field"]["name"] == "title" + assert data["field"]["field_type"] == "text" + assert data["field"]["owner"] == "agent" + + +async def test_list_fields(client: httpx.AsyncClient) -> None: + table_id = (await client.post("/api/v1/bitable/tables", json={"name": "T"})).json()["table"][ + "id" + ] + for name in ("f1", "f2"): + await client.post( + f"/api/v1/bitable/tables/{table_id}/fields", + json={"name": name, "field_type": "text"}, + ) + resp = await client.get(f"/api/v1/bitable/tables/{table_id}/fields") + assert resp.status_code == 200 + assert len(resp.json()["fields"]) == 2 + + +async def test_delete_field_no_deps(client: httpx.AsyncClient) -> None: + table_id = (await client.post("/api/v1/bitable/tables", json={"name": "T"})).json()["table"][ + "id" + ] + field_id = ( + await client.post( + f"/api/v1/bitable/tables/{table_id}/fields", + json={"name": "f", "field_type": "text"}, + ) + ).json()["field"]["id"] + resp = await client.delete(f"/api/v1/bitable/fields/{field_id}") + assert resp.status_code == 200 + + +async def test_delete_field_returns_409_when_referenced_by_formula( + client: httpx.AsyncClient, +) -> None: + table_id = (await client.post("/api/v1/bitable/tables", json={"name": "T"})).json()["table"][ + "id" + ] + source_id = ( + await client.post( + f"/api/v1/bitable/tables/{table_id}/fields", + json={"name": "src", "field_type": "number"}, + ) + ).json()["field"]["id"] + await client.post( + f"/api/v1/bitable/tables/{table_id}/fields", + json={ + "name": "calc", + "field_type": "formula", + "config": {"formula_expr": f"={source_id} * 2"}, + }, + ) + resp = await client.delete(f"/api/v1/bitable/fields/{source_id}") + assert resp.status_code == 409 + detail = resp.json()["detail"] + assert "dependencies" in detail + assert "formula_fields" in detail["dependencies"] + + +async def test_delete_field_force_cascades(client: httpx.AsyncClient) -> None: + table_id = (await client.post("/api/v1/bitable/tables", json={"name": "T"})).json()["table"][ + "id" + ] + source_id = ( + await client.post( + f"/api/v1/bitable/tables/{table_id}/fields", + json={"name": "src", "field_type": "number", "owner": "agent"}, + ) + ).json()["field"]["id"] + # Create a record with the source field + await client.post( + f"/api/v1/bitable/tables/{table_id}/records", + json={"records": [{source_id: 42}]}, + ) + resp = await client.delete(f"/api/v1/bitable/fields/{source_id}?force=true") + assert resp.status_code == 200 + + +# --------------------------------------------------------------------------- +# Records CRUD + cursor pagination +# --------------------------------------------------------------------------- + + +async def test_create_records_batch(client: httpx.AsyncClient) -> None: + table_id = (await client.post("/api/v1/bitable/tables", json={"name": "T"})).json()["table"][ + "id" + ] + resp = await client.post( + f"/api/v1/bitable/tables/{table_id}/records", + json={"records": [{"a": 1}, {"a": 2}, {"a": 3}]}, + ) + assert resp.status_code == 200 + data = resp.json() + assert data["count"] == 3 + assert len(data["records"]) == 3 + + +async def test_list_records_cursor_pagination(client: httpx.AsyncClient) -> None: + table_id = (await client.post("/api/v1/bitable/tables", json={"name": "T"})).json()["table"][ + "id" + ] + await client.post( + f"/api/v1/bitable/tables/{table_id}/records", + json={"records": [{"i": i} for i in range(5)]}, + ) + # Page 1 + resp = await client.get(f"/api/v1/bitable/tables/{table_id}/records?limit=2") + assert resp.status_code == 200 + data = resp.json() + assert len(data["records"]) == 2 + assert data["next_cursor"] is not None + # Page 2 + resp2 = await client.get( + f"/api/v1/bitable/tables/{table_id}/records?limit=2&cursor={data['next_cursor']}" + ) + data2 = resp2.json() + assert len(data2["records"]) == 2 + # No overlap + ids1 = {r["id"] for r in data["records"]} + ids2 = {r["id"] for r in data2["records"]} + assert ids1.isdisjoint(ids2) + + +async def test_list_records_with_filters(client: httpx.AsyncClient) -> None: + table_id = (await client.post("/api/v1/bitable/tables", json={"name": "T"})).json()["table"][ + "id" + ] + num_field_id = ( + await client.post( + f"/api/v1/bitable/tables/{table_id}/fields", + json={"name": "amt", "field_type": "number", "owner": "agent"}, + ) + ).json()["field"]["id"] + await client.post( + f"/api/v1/bitable/tables/{table_id}/records", + json={"records": [{num_field_id: 10}, {num_field_id: 50}, {num_field_id: 100}]}, + ) + filters = json.dumps([{"field_id": num_field_id, "op": "gt", "value": 40}]) + resp = await client.get( + f"/api/v1/bitable/tables/{table_id}/records", params={"filters": filters} + ) + assert resp.status_code == 200 + data = resp.json() + assert len(data["records"]) == 2 # 50 and 100 + + +async def test_update_record(client: httpx.AsyncClient) -> None: + table_id = (await client.post("/api/v1/bitable/tables", json={"name": "T"})).json()["table"][ + "id" + ] + record_id = ( + await client.post( + f"/api/v1/bitable/tables/{table_id}/records", json={"records": [{"a": 1}]} + ) + ).json()["records"][0]["id"] + resp = await client.patch(f"/api/v1/bitable/records/{record_id}", json={"values": {"a": 99}}) + assert resp.status_code == 200 + assert resp.json()["record"]["values"]["a"] == 99 + + +async def test_delete_single_record(client: httpx.AsyncClient) -> None: + table_id = (await client.post("/api/v1/bitable/tables", json={"name": "T"})).json()["table"][ + "id" + ] + record_id = ( + await client.post( + f"/api/v1/bitable/tables/{table_id}/records", json={"records": [{"a": 1}]} + ) + ).json()["records"][0]["id"] + resp = await client.delete(f"/api/v1/bitable/records/{record_id}") + assert resp.status_code == 200 + # Verify gone + resp2 = await client.get(f"/api/v1/bitable/tables/{table_id}/records") + assert len(resp2.json()["records"]) == 0 + + +# --------------------------------------------------------------------------- +# Upsert endpoint (KTD8) +# --------------------------------------------------------------------------- + + +async def test_upsert_inserts_then_updates(client: httpx.AsyncClient) -> None: + table_id = (await client.post("/api/v1/bitable/tables", json={"name": "T"})).json()["table"][ + "id" + ] + pk_id = ( + await client.post( + f"/api/v1/bitable/tables/{table_id}/fields", + json={"name": "id", "field_type": "text", "owner": "agent"}, + ) + ).json()["field"]["id"] + data_id = ( + await client.post( + f"/api/v1/bitable/tables/{table_id}/fields", + json={"name": "data", "field_type": "text", "owner": "agent"}, + ) + ).json()["field"]["id"] + await client.patch(f"/api/v1/bitable/tables/{table_id}", json={"primary_key_field_id": pk_id}) + + # First: insert + resp = await client.post( + f"/api/v1/bitable/tables/{table_id}/upsert", + json={ + "records": [{pk_id: "r1", data_id: "v1"}], + "primary_key_field_id": pk_id, + }, + ) + assert resp.status_code == 200 + assert resp.json()["inserted"] == 1 + assert resp.json()["updated"] == 0 + + # Second: update + resp2 = await client.post( + f"/api/v1/bitable/tables/{table_id}/upsert", + json={ + "records": [{pk_id: "r1", data_id: "v2"}], + "primary_key_field_id": pk_id, + }, + ) + assert resp2.status_code == 200 + assert resp2.json()["inserted"] == 0 + assert resp2.json()["updated"] == 1 + + # Verify value + records = (await client.get(f"/api/v1/bitable/tables/{table_id}/records")).json()["records"] + assert len(records) == 1 + assert records[0]["values"][data_id] == "v2" + + +async def test_upsert_preserves_user_columns(client: httpx.AsyncClient) -> None: + """KTD8 via API: upsert updates agent columns, user columns untouched.""" + table_id = (await client.post("/api/v1/bitable/tables", json={"name": "T"})).json()["table"][ + "id" + ] + pk_id = ( + await client.post( + f"/api/v1/bitable/tables/{table_id}/fields", + json={"name": "id", "field_type": "text", "owner": "agent"}, + ) + ).json()["field"]["id"] + agent_id = ( + await client.post( + f"/api/v1/bitable/tables/{table_id}/fields", + json={"name": "agent_data", "field_type": "text", "owner": "agent"}, + ) + ).json()["field"]["id"] + user_id = ( + await client.post( + f"/api/v1/bitable/tables/{table_id}/fields", + json={"name": "user_data", "field_type": "text", "owner": "user"}, + ) + ).json()["field"]["id"] + await client.patch(f"/api/v1/bitable/tables/{table_id}", json={"primary_key_field_id": pk_id}) + + # Insert with both agent and user values + await client.post( + f"/api/v1/bitable/tables/{table_id}/upsert", + json={ + "records": [{pk_id: "r1", agent_id: "a1", user_id: "u1"}], + "primary_key_field_id": pk_id, + }, + ) + # Manually set user column (simulating user edit via PATCH) + records = (await client.get(f"/api/v1/bitable/tables/{table_id}/records")).json()["records"] + rec_id = records[0]["id"] + await client.patch( + f"/api/v1/bitable/records/{rec_id}", + json={"values": {**records[0]["values"], user_id: "USER_EDITED"}}, + ) + + # Second upsert: tries to change user column — should be ignored + await client.post( + f"/api/v1/bitable/tables/{table_id}/upsert", + json={ + "records": [{pk_id: "r1", agent_id: "a2", user_id: "SHOULD_NOT_APPLY"}], + "primary_key_field_id": pk_id, + }, + ) + + records = (await client.get(f"/api/v1/bitable/tables/{table_id}/records")).json()["records"] + assert len(records) == 1 + assert records[0]["values"][agent_id] == "a2" # updated + assert records[0]["values"][user_id] == "USER_EDITED" # preserved + + +# --------------------------------------------------------------------------- +# Views CRUD +# --------------------------------------------------------------------------- + + +async def test_create_view_success(client: httpx.AsyncClient) -> None: + table_id = (await client.post("/api/v1/bitable/tables", json={"name": "T"})).json()["table"][ + "id" + ] + resp = await client.post( + f"/api/v1/bitable/tables/{table_id}/views", + json={"name": "Grid View", "view_type": "grid", "config": {}}, + ) + assert resp.status_code == 200 + assert resp.json()["view"]["name"] == "Grid View" + assert resp.json()["view"]["view_type"] == "grid" + + +async def test_list_views(client: httpx.AsyncClient) -> None: + table_id = (await client.post("/api/v1/bitable/tables", json={"name": "T"})).json()["table"][ + "id" + ] + for name in ("v1", "v2"): + await client.post(f"/api/v1/bitable/tables/{table_id}/views", json={"name": name}) + resp = await client.get(f"/api/v1/bitable/tables/{table_id}/views") + assert resp.status_code == 200 + assert len(resp.json()["views"]) == 2 + + +async def test_update_view(client: httpx.AsyncClient) -> None: + table_id = (await client.post("/api/v1/bitable/tables", json={"name": "T"})).json()["table"][ + "id" + ] + view_id = ( + await client.post(f"/api/v1/bitable/tables/{table_id}/views", json={"name": "Old"}) + ).json()["view"]["id"] + resp = await client.patch(f"/api/v1/bitable/views/{view_id}", json={"name": "New"}) + assert resp.status_code == 200 + assert resp.json()["view"]["name"] == "New" + + +# --------------------------------------------------------------------------- +# Formula validation (U5b) +# --------------------------------------------------------------------------- + + +async def test_validate_formula_valid(client: httpx.AsyncClient) -> None: + """Valid formula returns valid=true.""" + resp = await client.post( + "/api/v1/bitable/fields/validate-formula", + json={"formula": "1 + 2"}, + ) + assert resp.status_code == 200 + data = resp.json() + assert data["valid"] is True + assert "error" not in data + + +async def test_validate_formula_with_field_ref(client: httpx.AsyncClient) -> None: + """Formula with field reference is valid syntax.""" + resp = await client.post( + "/api/v1/bitable/fields/validate-formula", + json={"formula": "{field_abc} + 1"}, + ) + assert resp.status_code == 200 + assert resp.json()["valid"] is True + + +async def test_validate_formula_with_function(client: httpx.AsyncClient) -> None: + """Formula with built-in function is valid.""" + resp = await client.post( + "/api/v1/bitable/fields/validate-formula", + json={"formula": "SUM({f1}) + AVG({f2})"}, + ) + assert resp.status_code == 200 + assert resp.json()["valid"] is True + + +async def test_validate_formula_syntax_error(client: httpx.AsyncClient) -> None: + """Syntax error returns valid=false with error message.""" + resp = await client.post( + "/api/v1/bitable/fields/validate-formula", + json={"formula": "1 +"}, + ) + assert resp.status_code == 200 + data = resp.json() + assert data["valid"] is False + assert "error" in data + + +async def test_validate_formula_security_error(client: httpx.AsyncClient) -> None: + """Dangerous constructs (import) are rejected.""" + resp = await client.post( + "/api/v1/bitable/fields/validate-formula", + json={"formula": "__import__('os')"}, + ) + assert resp.status_code == 200 + data = resp.json() + assert data["valid"] is False + assert "error" in data + + +async def test_validate_formula_unknown_function(client: httpx.AsyncClient) -> None: + """Unknown function is rejected.""" + resp = await client.post( + "/api/v1/bitable/fields/validate-formula", + json={"formula": "UNKNOWN_FUNC(1)"}, + ) + assert resp.status_code == 200 + data = resp.json() + assert data["valid"] is False + assert "error" in data + + +async def test_validate_formula_requires_auth(unauth_client: httpx.AsyncClient) -> None: + """No auth → 401.""" + resp = await unauth_client.post( + "/api/v1/bitable/fields/validate-formula", + json={"formula": "1 + 2"}, + ) + assert resp.status_code == 401 diff --git a/tests/unit/bitable/test_service.py b/tests/unit/bitable/test_service.py new file mode 100644 index 0000000..67605a5 --- /dev/null +++ b/tests/unit/bitable/test_service.py @@ -0,0 +1,296 @@ +"""Tests for bitable service layer (U2): upsert, field deletion, view filtering. + +Requires PostgreSQL — marked ``postgres``. +""" + +from __future__ import annotations + +import pytest + +from agentkit.bitable.models import FieldOwner, FieldType +from agentkit.bitable.service import FieldDependencyError + +pytestmark = pytest.mark.postgres + + +# --------------------------------------------------------------------------- +# Upsert (KTD8: jsonb_set preserves user columns) +# --------------------------------------------------------------------------- + + +async def test_upsert_inserts_new_records(bitable_service) -> None: + """First upsert inserts all records.""" + table = await bitable_service.create_table(name="T") + pk_field = await bitable_service.create_field( + table_id=table.id, name="id", field_type=FieldType.text, owner=FieldOwner.agent + ) + data_field = await bitable_service.create_field( + table_id=table.id, name="data", field_type=FieldType.text, owner=FieldOwner.agent + ) + await bitable_service.update_table(table.id, primary_key_field_id=pk_field.id) + + result = await bitable_service.upsert_records( + table.id, + [ + {pk_field.id: "row1", data_field.id: "hello"}, + {pk_field.id: "row2", data_field.id: "world"}, + ], + pk_field.id, + ) + assert result == {"inserted": 2, "updated": 0, "skipped": 0} + + records, _ = await bitable_service.list_records(table.id) + assert len(records) == 2 + + +async def test_upsert_updates_existing_preserves_user_columns(bitable_service) -> None: + """KTD8: upsert updates agent columns via jsonb_set, user columns untouched.""" + table = await bitable_service.create_table(name="T") + pk_field = await bitable_service.create_field( + table_id=table.id, name="id", field_type=FieldType.text, owner=FieldOwner.agent + ) + agent_field = await bitable_service.create_field( + table_id=table.id, name="agent_data", field_type=FieldType.text, owner=FieldOwner.agent + ) + user_field = await bitable_service.create_field( + table_id=table.id, name="user_data", field_type=FieldType.text, owner=FieldOwner.user + ) + await bitable_service.update_table(table.id, primary_key_field_id=pk_field.id) + + # First: insert with both agent and user values + await bitable_service.upsert_records( + table.id, + [{pk_field.id: "row1", agent_field.id: "agent_v1", user_field.id: "user_v1"}], + pk_field.id, + ) + + # Manually set user column (simulating user edit) + records, _ = await bitable_service.list_records(table.id) + assert len(records) == 1 + rec = records[0] + await bitable_service.update_record_values(rec.id, {**rec.values, user_field.id: "USER_EDITED"}) + + # Second upsert: only agent column changes + result = await bitable_service.upsert_records( + table.id, + [{pk_field.id: "row1", agent_field.id: "agent_v2", user_field.id: "SHOULD_NOT_APPLY"}], + pk_field.id, + ) + assert result == {"inserted": 0, "updated": 1, "skipped": 0} + + # Verify: agent column updated, user column preserved + records, _ = await bitable_service.list_records(table.id) + assert len(records) == 1 + rec = records[0] + assert rec.values[agent_field.id] == "agent_v2" # updated + assert rec.values[user_field.id] == "USER_EDITED" # preserved (NOT "SHOULD_NOT_APPLY") + + +async def test_upsert_skips_records_without_pk(bitable_service) -> None: + """Records without PK value are skipped.""" + table = await bitable_service.create_table(name="T") + pk_field = await bitable_service.create_field( + table_id=table.id, name="id", field_type=FieldType.text, owner=FieldOwner.agent + ) + await bitable_service.update_table(table.id, primary_key_field_id=pk_field.id) + + result = await bitable_service.upsert_records( + table.id, + [{pk_field.id: "row1"}, {}], # second has no PK + pk_field.id, + ) + assert result == {"inserted": 1, "updated": 0, "skipped": 1} + + +async def test_upsert_empty_batch(bitable_service) -> None: + """Empty batch returns all zeros.""" + table = await bitable_service.create_table(name="T") + pk_field = await bitable_service.create_field( + table_id=table.id, name="id", field_type=FieldType.text, owner=FieldOwner.agent + ) + result = await bitable_service.upsert_records(table.id, [], pk_field.id) + assert result == {"inserted": 0, "updated": 0, "skipped": 0} + + +async def test_upsert_without_pk_field_raises(bitable_service) -> None: + """Upsert without primary_key_field_id raises ValueError.""" + table = await bitable_service.create_table(name="T") + with pytest.raises(ValueError, match="primary_key_field_id"): + await bitable_service.upsert_records(table.id, [{}], "") + + +# --------------------------------------------------------------------------- +# Field deletion with dependency checking +# --------------------------------------------------------------------------- + + +async def test_delete_field_no_dependencies(bitable_service) -> None: + """Deleting a field with no dependencies succeeds.""" + table = await bitable_service.create_table(name="T") + field = await bitable_service.create_field( + table_id=table.id, name="f", field_type=FieldType.text + ) + deleted = await bitable_service.delete_field(field.id) + assert deleted is True + assert await bitable_service.get_field(field.id) is None + + +async def test_delete_field_referenced_by_formula_returns_deps(bitable_service) -> None: + """Deleting a field referenced by a formula raises FieldDependencyError.""" + table = await bitable_service.create_table(name="T") + source_field = await bitable_service.create_field( + table_id=table.id, name="source", field_type=FieldType.number + ) + formula_field = await bitable_service.create_field( + table_id=table.id, + name="calc", + field_type=FieldType.formula, + config={"formula_expr": f"={source_field.id} * 2"}, + ) + + with pytest.raises(FieldDependencyError) as exc_info: + await bitable_service.delete_field(source_field.id) + + deps = exc_info.value.dependencies + assert "formula_fields" in deps + assert any(f["id"] == formula_field.id for f in deps["formula_fields"]) + + +async def test_delete_primary_key_field_returns_deps(bitable_service) -> None: + """Deleting the primary key field raises FieldDependencyError.""" + table = await bitable_service.create_table(name="T") + pk_field = await bitable_service.create_field( + table_id=table.id, name="id", field_type=FieldType.text + ) + await bitable_service.update_table(table.id, primary_key_field_id=pk_field.id) + + with pytest.raises(FieldDependencyError) as exc_info: + await bitable_service.delete_field(pk_field.id) + + assert exc_info.value.dependencies.get("is_primary_key") is True + + +async def test_delete_field_force_casces_cleanup(bitable_service) -> None: + """Force delete cascades: removes field from records, marks formula as error.""" + table = await bitable_service.create_table(name="T") + source_field = await bitable_service.create_field( + table_id=table.id, name="source", field_type=FieldType.number, owner=FieldOwner.agent + ) + formula_field = await bitable_service.create_field( + table_id=table.id, + name="calc", + field_type=FieldType.formula, + config={"formula_expr": f"={source_field.id} * 2"}, + ) + # Create a record with the source field value + record = await bitable_service.create_record(table_id=table.id, values={source_field.id: 42}) + + # Force delete + deleted = await bitable_service.delete_field(source_field.id, force=True) + assert deleted is True + + # Record should no longer have the source field key + rec = await bitable_service.get_record(record.id) + assert rec is not None + assert source_field.id not in rec.values + + # Formula field should have error in config + formula = await bitable_service.get_field(formula_field.id) + assert formula is not None + assert "error" in formula.config + + +# --------------------------------------------------------------------------- +# View-filtered record listing +# --------------------------------------------------------------------------- + + +async def test_list_records_filtered_by_number_gt(bitable_service) -> None: + """View filter with gt op on number field correctly filters (CAST NUMERIC).""" + table = await bitable_service.create_table(name="T") + num_field = await bitable_service.create_field( + table_id=table.id, name="amount", field_type=FieldType.number, owner=FieldOwner.agent + ) + + # Create records with various amounts + for amt in [10, 50, 100, 200]: + await bitable_service.create_record(table_id=table.id, values={num_field.id: amt}) + + # Filter: amount > 50 + records, _ = await bitable_service.list_records_filtered( + table.id, + filters=[{"field_id": num_field.id, "op": "gt", "value": 50}], + ) + amounts = [r.values[num_field.id] for r in records] + assert all(a > 50 for a in amounts) + assert len(records) == 2 # 100 and 200 + + +async def test_list_records_filtered_by_text_eq(bitable_service) -> None: + """View filter with eq op on text field.""" + table = await bitable_service.create_table(name="T") + text_field = await bitable_service.create_field( + table_id=table.id, name="status", field_type=FieldType.text, owner=FieldOwner.agent + ) + + for status in ["open", "closed", "open", "pending"]: + await bitable_service.create_record(table_id=table.id, values={text_field.id: status}) + + records, _ = await bitable_service.list_records_filtered( + table.id, + filters=[{"field_id": text_field.id, "op": "eq", "value": "open"}], + ) + assert len(records) == 2 + assert all(r.values[text_field.id] == "open" for r in records) + + +async def test_list_records_filtered_with_sort(bitable_service) -> None: + """View sort by number field descending.""" + table = await bitable_service.create_table(name="T") + num_field = await bitable_service.create_field( + table_id=table.id, name="score", field_type=FieldType.number, owner=FieldOwner.agent + ) + + for score in [30, 10, 50, 20]: + await bitable_service.create_record(table_id=table.id, values={num_field.id: score}) + + records, _ = await bitable_service.list_records_filtered( + table.id, + sorts=[{"field_id": num_field.id, "direction": "desc"}], + ) + # Records should be sorted by score descending (as text, but single/double digit sorts OK) + assert len(records) == 4 + + +async def test_list_records_filtered_cursor_pagination(bitable_service) -> None: + """Cursor pagination with filters.""" + table = await bitable_service.create_table(name="T") + text_field = await bitable_service.create_field( + table_id=table.id, name="name", field_type=FieldType.text, owner=FieldOwner.agent + ) + + for i in range(5): + await bitable_service.create_record(table_id=table.id, values={text_field.id: f"item_{i}"}) + + # First page + records, next_cursor = await bitable_service.list_records_filtered(table.id, limit=2) + assert len(records) == 2 + assert next_cursor is not None + + # Second page + records2, next_cursor2 = await bitable_service.list_records_filtered( + table.id, cursor=next_cursor, limit=2 + ) + assert len(records2) == 2 + assert next_cursor2 is not None + + # Third page + records3, next_cursor3 = await bitable_service.list_records_filtered( + table.id, cursor=next_cursor2, limit=2 + ) + assert len(records3) == 1 + assert next_cursor3 is None + + # All records unique + all_ids = {r.id for r in [records, records2, records3] for r in r} + assert len(all_ids) == 5 diff --git a/tests/unit/test_middleware.py b/tests/unit/test_middleware.py index 9dc6145..6f368c4 100644 --- a/tests/unit/test_middleware.py +++ b/tests/unit/test_middleware.py @@ -437,12 +437,12 @@ class TestTokenUsageMiddleware: @pytest.mark.asyncio async def test_after_extracts_usage_from_result(self) -> None: - """after 从 result 提取 token_usage。""" + """after 从 result 提取 total_tokens(ReActResult 属性名)。""" mw = TokenUsageMiddleware() ctx = _make_ctx() result = MagicMock() - result.token_usage = {"total": 100} + result.total_tokens = {"total": 100} await mw.after(ctx, result) diff --git a/tests/unit/test_pipeline_checkpoint.py b/tests/unit/test_pipeline_checkpoint.py index 68cf75b..e95d146 100644 --- a/tests/unit/test_pipeline_checkpoint.py +++ b/tests/unit/test_pipeline_checkpoint.py @@ -273,7 +273,7 @@ class TestCheckpointTTL: from datetime import datetime, timedelta, timezone expired_time = (datetime.now(timezone.utc) - timedelta(seconds=10)).isoformat() - cp._memory["plan_1"][0].saved_at = expired_time + cp._memory["plan_1"]["p1"].saved_at = expired_time # 过期后 load 应返回 None loaded = await cp.load("plan_1") @@ -329,7 +329,7 @@ class TestCheckpointTTL: from datetime import datetime, timedelta, timezone expired_time = (datetime.now(timezone.utc) - timedelta(seconds=10)).isoformat() - cp._memory["plan_1"][0].saved_at = expired_time + cp._memory["plan_1"]["p1"].saved_at = expired_time # list 应过滤掉过期的,只返回 1 个 checkpoints = await cp.list_checkpoints("plan_1") @@ -363,7 +363,7 @@ class TestCheckpointTTL: from datetime import datetime, timedelta, timezone expired_time = (datetime.now(timezone.utc) - timedelta(seconds=10)).isoformat() - cp._memory["plan_1"][0].saved_at = expired_time + cp._memory["plan_1"]["p1"].saved_at = expired_time # 内存降级 + TTL 过期 → 应返回 None loaded = await cp.load("plan_1") diff --git a/tests/unit/test_skill_md.py b/tests/unit/test_skill_md.py index a573859..1cb8425 100644 --- a/tests/unit/test_skill_md.py +++ b/tests/unit/test_skill_md.py @@ -193,7 +193,7 @@ class TestSkillMdToSkillConfig: assert config.name == "content-generator" assert config.description != "" - assert config.disclosure_level == 0 + assert config.disclosure_level == 1 # Level 0: prompt 仅含 identity(概要信息) assert config.prompt is not None assert "identity" in config.prompt @@ -257,14 +257,14 @@ class TestSkillConfigNewFields: ) assert config.skill_md_path is None - def test_default_disclosure_level_is_zero(self): + def test_default_disclosure_level_is_one(self): config = SkillConfig( name="test", agent_type="test", task_mode="llm_generate", prompt={"identity": "test"}, ) - assert config.disclosure_level == 0 + assert config.disclosure_level == 1 def test_skill_md_path_set(self): config = SkillConfig( @@ -321,7 +321,7 @@ class TestSkillConfigNewFields: } config = SkillConfig.from_dict(data) assert config.skill_md_path is None - assert config.disclosure_level == 0 + assert config.disclosure_level == 1 # ── SkillLoader.load_from_skill_md 测试 ───────────────────