fischer-agentkit/src/agentkit/memory/adapters/feishu.py

"""FeishuKBAdapter - 飞书知识库适配器

对接飞书知识库 API，实现 KnowledgeBase 协议。
通过 app_id + app_secret 认证，调用飞书开放平台 API 检索知识库内容。
"""

from __future__ import annotations

import asyncio
import logging
import time
from typing import TypeAlias

import httpx

from agentkit.memory.adapters.base import KBAdapter
from agentkit.memory.knowledge_base import Document, QueryResult, SourceInfo
from agentkit.utils.security import is_safe_url

logger = logging.getLogger(__name__)

# 飞书搜索请求 payload：search_key/page_size/wiki_space_ids — 值为 str|int|list[str]。
FeishuSearchPayload: TypeAlias = dict[str, object]


class FeishuKBAdapter(KBAdapter):
    """飞书知识库适配器

    对接飞书开放平台知识库 API，支持：
    - 搜索知识库节点
    - 获取知识空间列表
    - 获取文档内容

    典型配置::

        adapter = FeishuKBAdapter(
            app_id="cli_xxx",
            app_secret="xxx",
            base_url="https://open.feishu.cn/open-apis",
        )
    """

    def __init__(
        self,
        app_id: str,
        app_secret: str,
        base_url: str = "https://open.feishu.cn/open-apis",
        space_ids: list[str] | None = None,
        timeout: int = 30,
    ):
        super().__init__(
            source_id=f"feishu-{app_id[:8]}",
            source_name="飞书知识库",
            source_type="feishu",
            timeout=timeout,
        )
        self._app_id = app_id
        self._app_secret = app_secret
        self._base_url = base_url.rstrip("/")
        if not is_safe_url(self._base_url):
            raise ValueError(
                f"Unsafe base_url: {self._base_url}. Private/internal URLs are not allowed."
            )
        self._space_ids = space_ids or []
        self._access_token: str | None = None
        self._token_expiry: float = 0.0

    def _make_client(self) -> httpx.AsyncClient:
        """创建飞书 API HTTP 客户端"""
        headers: dict[str, str] = {"Content-Type": "application/json"}
        if self._access_token:
            headers["Authorization"] = f"Bearer {self._access_token}"
        return httpx.AsyncClient(
            base_url=self._base_url,
            headers=headers,
            timeout=self._timeout,
        )

    async def _get_access_token(self) -> str | None:
        """获取飞书 tenant_access_token"""
        if self._access_token and time.time() < self._token_expiry:
            return self._access_token

        client = self._get_client()
        try:
            resp = await client.post(
                "/auth/v3/tenant_access_token/internal",
                json={
                    "app_id": self._app_id,
                    "app_secret": self._app_secret,
                },
            )
            resp.raise_for_status()
            data = resp.json()
            if data.get("code") == 0:
                self._access_token = data.get("tenant_access_token")
                expire_seconds = data.get("expire", 7200)
                self._token_expiry = time.time() + expire_seconds - 300  # Refresh 5 minutes early
                # Invalidate cached client so it's rebuilt with the new token
                self._client = None
                return self._access_token
            else:
                logger.error(f"Feishu auth failed: code={data.get('code')}, msg={data.get('msg')}")
                return None
        except (httpx.HTTPError, ValueError, KeyError, TypeError) as e:
            logger.error(f"Feishu auth error: {e}")
            return None

    async def authenticate(self) -> bool:
        """飞书认证"""
        token = await self._get_access_token()
        self._authenticated = token is not None
        return self._authenticated

    async def search(self, query: str, top_k: int = 5) -> list[QueryResult]:
        """搜索飞书知识库

        调用飞书搜索 API 检索知识库节点内容。
        """
        token = await self._get_access_token()
        if not token:
            logger.error("FeishuKBAdapter.search: not authenticated")
            return []

        client = self._get_client()
        try:
            payload: FeishuSearchPayload = {
                "search_key": query,
                "page_size": top_k,
            }
            if self._space_ids:
                payload["wiki_space_ids"] = self._space_ids

            resp = await client.post(
                "/search/v2/wiki",
                json=payload,
            )
            resp.raise_for_status()
            data = resp.json()

            if data.get("code") != 0:
                logger.error(
                    f"Feishu search failed: code={data.get('code')}, msg={data.get('msg')}"
                )
                return []

            results: list[QueryResult] = []
            for item in data.get("data", {}).get("items", []):
                results.append(
                    QueryResult(
                        content=item.get("content", ""),
                        source_id=self._source_id,
                        source_name=self._source_name,
                        score=float(item.get("score", 0.0)),
                        metadata={
                            "wiki_token": item.get("wiki_token", ""),
                            "space_id": item.get("space_id", ""),
                        },
                        doc_id=item.get("wiki_token", ""),
                        title=item.get("title", ""),
                    )
                )
            return results[:top_k]

        except httpx.HTTPStatusError as e:
            logger.error(
                f"Feishu search HTTP error: {e.response.status_code} — {e.response.text[:200]}"
            )
            return []
        except (httpx.HTTPError, ValueError, KeyError, TypeError) as e:
            logger.error(f"Feishu search error: {e}")
            return []

    async def get_document(self, doc_id: str) -> Document | None:
        """获取飞书知识库文档内容"""
        token = await self._get_access_token()
        if not token:
            return None

        client = self._get_client()
        try:
            resp = await client.get(
                "/wiki/v2/spaces/get_node",
                params={"token": doc_id},
            )
            resp.raise_for_status()
            data = resp.json()

            if data.get("code") != 0:
                return None

            node = data.get("data", {}).get("node", {})
            return Document(
                doc_id=doc_id,
                content=node.get("content", ""),
                title=node.get("title", ""),
                source_id=self._source_id,
                metadata={
                    "space_id": node.get("space_id", ""),
                    "obj_type": node.get("obj_type", ""),
                },
            )
        except (httpx.HTTPError, ValueError, KeyError, TypeError) as e:
            logger.error(f"Feishu get_document error: {e}")
            return None

    async def list_sources(self) -> list[SourceInfo]:
        """列出飞书知识空间"""
        token = await self._get_access_token()
        if not token:
            return [
                SourceInfo(
                    source_id=self._source_id,
                    source_name=self._source_name,
                    source_type=self._source_type,
                )
            ]

        client = self._get_client()
        try:
            resp = await client.get("/wiki/v2/spaces", params={"page_size": 50})
            resp.raise_for_status()
            data = resp.json()

            sources: list[SourceInfo] = []
            for space in data.get("data", {}).get("items", []):
                sources.append(
                    SourceInfo(
                        source_id=f"feishu-space-{space.get('space_id', '')}",
                        source_name=space.get("name", ""),
                        source_type="feishu",
                    )
                )
            return (
                sources
                if sources
                else [
                    SourceInfo(
                        source_id=self._source_id,
                        source_name=self._source_name,
                        source_type=self._source_type,
                    )
                ]
            )
        except (httpx.HTTPError, ValueError, KeyError, TypeError) as e:
            logger.error(f"Feishu list_sources error: {e}")
            return [
                SourceInfo(
                    source_id=self._source_id,
                    source_name=self._source_name,
                    source_type=self._source_type,
                )
            ]

    async def health_check(self) -> bool:
        """检查飞书 API 连接状态"""
        try:
            token = await self._get_access_token()
            return token is not None
        except asyncio.CancelledError:
            raise
        except Exception:  # noqa: BLE001 — health_check 设计意图：任何异常都返回 False
            return False