264 lines
8.9 KiB
Python
264 lines
8.9 KiB
Python
"""FeishuKBAdapter - 飞书知识库适配器
|
||
|
||
对接飞书知识库 API,实现 KnowledgeBase 协议。
|
||
通过 app_id + app_secret 认证,调用飞书开放平台 API 检索知识库内容。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import logging
|
||
import time
|
||
from typing import TypeAlias
|
||
|
||
import httpx
|
||
|
||
from agentkit.memory.adapters.base import KBAdapter
|
||
from agentkit.memory.knowledge_base import Document, QueryResult, SourceInfo
|
||
from agentkit.utils.security import is_safe_url
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# 飞书搜索请求 payload:search_key/page_size/wiki_space_ids — 值为 str|int|list[str]。
|
||
FeishuSearchPayload: TypeAlias = dict[str, object]
|
||
|
||
|
||
class FeishuKBAdapter(KBAdapter):
|
||
"""飞书知识库适配器
|
||
|
||
对接飞书开放平台知识库 API,支持:
|
||
- 搜索知识库节点
|
||
- 获取知识空间列表
|
||
- 获取文档内容
|
||
|
||
典型配置::
|
||
|
||
adapter = FeishuKBAdapter(
|
||
app_id="cli_xxx",
|
||
app_secret="xxx",
|
||
base_url="https://open.feishu.cn/open-apis",
|
||
)
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
app_id: str,
|
||
app_secret: str,
|
||
base_url: str = "https://open.feishu.cn/open-apis",
|
||
space_ids: list[str] | None = None,
|
||
timeout: int = 30,
|
||
):
|
||
super().__init__(
|
||
source_id=f"feishu-{app_id[:8]}",
|
||
source_name="飞书知识库",
|
||
source_type="feishu",
|
||
timeout=timeout,
|
||
)
|
||
self._app_id = app_id
|
||
self._app_secret = app_secret
|
||
self._base_url = base_url.rstrip("/")
|
||
if not is_safe_url(self._base_url):
|
||
raise ValueError(
|
||
f"Unsafe base_url: {self._base_url}. Private/internal URLs are not allowed."
|
||
)
|
||
self._space_ids = space_ids or []
|
||
self._access_token: str | None = None
|
||
self._token_expiry: float = 0.0
|
||
|
||
def _make_client(self) -> httpx.AsyncClient:
|
||
"""创建飞书 API HTTP 客户端"""
|
||
headers: dict[str, str] = {"Content-Type": "application/json"}
|
||
if self._access_token:
|
||
headers["Authorization"] = f"Bearer {self._access_token}"
|
||
return httpx.AsyncClient(
|
||
base_url=self._base_url,
|
||
headers=headers,
|
||
timeout=self._timeout,
|
||
)
|
||
|
||
async def _get_access_token(self) -> str | None:
|
||
"""获取飞书 tenant_access_token"""
|
||
if self._access_token and time.time() < self._token_expiry:
|
||
return self._access_token
|
||
|
||
client = self._get_client()
|
||
try:
|
||
resp = await client.post(
|
||
"/auth/v3/tenant_access_token/internal",
|
||
json={
|
||
"app_id": self._app_id,
|
||
"app_secret": self._app_secret,
|
||
},
|
||
)
|
||
resp.raise_for_status()
|
||
data = resp.json()
|
||
if data.get("code") == 0:
|
||
self._access_token = data.get("tenant_access_token")
|
||
expire_seconds = data.get("expire", 7200)
|
||
self._token_expiry = time.time() + expire_seconds - 300 # Refresh 5 minutes early
|
||
# Invalidate cached client so it's rebuilt with the new token
|
||
self._client = None
|
||
return self._access_token
|
||
else:
|
||
logger.error(f"Feishu auth failed: code={data.get('code')}, msg={data.get('msg')}")
|
||
return None
|
||
except (httpx.HTTPError, ValueError, KeyError, TypeError) as e:
|
||
logger.error(f"Feishu auth error: {e}")
|
||
return None
|
||
|
||
async def authenticate(self) -> bool:
|
||
"""飞书认证"""
|
||
token = await self._get_access_token()
|
||
self._authenticated = token is not None
|
||
return self._authenticated
|
||
|
||
async def search(self, query: str, top_k: int = 5) -> list[QueryResult]:
|
||
"""搜索飞书知识库
|
||
|
||
调用飞书搜索 API 检索知识库节点内容。
|
||
"""
|
||
token = await self._get_access_token()
|
||
if not token:
|
||
logger.error("FeishuKBAdapter.search: not authenticated")
|
||
return []
|
||
|
||
client = self._get_client()
|
||
try:
|
||
payload: FeishuSearchPayload = {
|
||
"search_key": query,
|
||
"page_size": top_k,
|
||
}
|
||
if self._space_ids:
|
||
payload["wiki_space_ids"] = self._space_ids
|
||
|
||
resp = await client.post(
|
||
"/search/v2/wiki",
|
||
json=payload,
|
||
)
|
||
resp.raise_for_status()
|
||
data = resp.json()
|
||
|
||
if data.get("code") != 0:
|
||
logger.error(
|
||
f"Feishu search failed: code={data.get('code')}, msg={data.get('msg')}"
|
||
)
|
||
return []
|
||
|
||
results: list[QueryResult] = []
|
||
for item in data.get("data", {}).get("items", []):
|
||
results.append(
|
||
QueryResult(
|
||
content=item.get("content", ""),
|
||
source_id=self._source_id,
|
||
source_name=self._source_name,
|
||
score=float(item.get("score", 0.0)),
|
||
metadata={
|
||
"wiki_token": item.get("wiki_token", ""),
|
||
"space_id": item.get("space_id", ""),
|
||
},
|
||
doc_id=item.get("wiki_token", ""),
|
||
title=item.get("title", ""),
|
||
)
|
||
)
|
||
return results[:top_k]
|
||
|
||
except httpx.HTTPStatusError as e:
|
||
logger.error(
|
||
f"Feishu search HTTP error: {e.response.status_code} — {e.response.text[:200]}"
|
||
)
|
||
return []
|
||
except (httpx.HTTPError, ValueError, KeyError, TypeError) as e:
|
||
logger.error(f"Feishu search error: {e}")
|
||
return []
|
||
|
||
async def get_document(self, doc_id: str) -> Document | None:
|
||
"""获取飞书知识库文档内容"""
|
||
token = await self._get_access_token()
|
||
if not token:
|
||
return None
|
||
|
||
client = self._get_client()
|
||
try:
|
||
resp = await client.get(
|
||
"/wiki/v2/spaces/get_node",
|
||
params={"token": doc_id},
|
||
)
|
||
resp.raise_for_status()
|
||
data = resp.json()
|
||
|
||
if data.get("code") != 0:
|
||
return None
|
||
|
||
node = data.get("data", {}).get("node", {})
|
||
return Document(
|
||
doc_id=doc_id,
|
||
content=node.get("content", ""),
|
||
title=node.get("title", ""),
|
||
source_id=self._source_id,
|
||
metadata={
|
||
"space_id": node.get("space_id", ""),
|
||
"obj_type": node.get("obj_type", ""),
|
||
},
|
||
)
|
||
except (httpx.HTTPError, ValueError, KeyError, TypeError) as e:
|
||
logger.error(f"Feishu get_document error: {e}")
|
||
return None
|
||
|
||
async def list_sources(self) -> list[SourceInfo]:
|
||
"""列出飞书知识空间"""
|
||
token = await self._get_access_token()
|
||
if not token:
|
||
return [
|
||
SourceInfo(
|
||
source_id=self._source_id,
|
||
source_name=self._source_name,
|
||
source_type=self._source_type,
|
||
)
|
||
]
|
||
|
||
client = self._get_client()
|
||
try:
|
||
resp = await client.get("/wiki/v2/spaces", params={"page_size": 50})
|
||
resp.raise_for_status()
|
||
data = resp.json()
|
||
|
||
sources: list[SourceInfo] = []
|
||
for space in data.get("data", {}).get("items", []):
|
||
sources.append(
|
||
SourceInfo(
|
||
source_id=f"feishu-space-{space.get('space_id', '')}",
|
||
source_name=space.get("name", ""),
|
||
source_type="feishu",
|
||
)
|
||
)
|
||
return (
|
||
sources
|
||
if sources
|
||
else [
|
||
SourceInfo(
|
||
source_id=self._source_id,
|
||
source_name=self._source_name,
|
||
source_type=self._source_type,
|
||
)
|
||
]
|
||
)
|
||
except (httpx.HTTPError, ValueError, KeyError, TypeError) as e:
|
||
logger.error(f"Feishu list_sources error: {e}")
|
||
return [
|
||
SourceInfo(
|
||
source_id=self._source_id,
|
||
source_name=self._source_name,
|
||
source_type=self._source_type,
|
||
)
|
||
]
|
||
|
||
async def health_check(self) -> bool:
|
||
"""检查飞书 API 连接状态"""
|
||
try:
|
||
token = await self._get_access_token()
|
||
return token is not None
|
||
except asyncio.CancelledError:
|
||
raise
|
||
except Exception: # noqa: BLE001 — health_check 设计意图:任何异常都返回 False
|
||
return False
|