2026-06-29 22:08:56 +08:00
5 changed files with 333 additions and 20 deletions
--- a/src/agentkit/core/react.py
+++ b/src/agentkit/core/react.py
@ -163,6 +163,7 @@ class ReActEngine:
        core_tool_names: list[str] | None = None,
        enable_tool_search: bool = True,
        middleware_chain: "MiddlewareChain | None" = None,
+        prompt_cache_enable: bool = True,
    ):
        if max_steps < 1:
            raise ValueError(f"max_steps must be >= 1, got {max_steps}")
@ -176,6 +177,9 @@ class ReActEngine:
        self._parallel_tools = parallel_tools
        self._verification_enabled = verification_enabled
        self._verification_commands = verification_commands
+        # U2/G2: prompt cache 双块结构开关(True 时 Anthropic 用 cache_control blocks,
+        # 其他 provider 走字符串拼接依赖自动前缀缓存)
+        self._prompt_cache_enable = prompt_cache_enable
        # Tiered tool description injection config
        self._core_tool_names: tuple[str, ...] | None = (
            tuple(core_tool_names) if core_tool_names is not None else None
@ -429,7 +433,9 @@ class ReActEngine:
                    skill_name=task_type or None,
                )

-            # Memory retrieval: 执行前检索相关上下文注入 system_prompt
+            # Memory retrieval: 执行前检索相关上下文,作为 volatile 层注入 system message
+            # U2/G2: 不再拼到 stable(system_prompt)末尾,改由 _build_system_message 组装双块结构
+            memory_context = ""
            if memory_retriever:
                try:
                    query = str(messages[-1].get("content", "")) if messages else ""
@ -439,12 +445,7 @@ class ReActEngine:
                        query=query,
                        top_k=top_k,
                        token_budget=token_budget,
-                    )
-                    if memory_context:
-                        if system_prompt:
-                            system_prompt += f"\n\n## 参考信息\n{memory_context}"
-                        else:
-                            system_prompt = f"## 参考信息\n{memory_context}"
+                    ) or ""
                except Exception as e:
                    logger.warning(
                        f"Memory retrieval failed, continuing without context: {e}", exc_info=True
@ -452,8 +453,13 @@ class ReActEngine:

            # 构建初始消息
            conversation: list[dict[str, Any]] = []
-            if system_prompt:
-                conversation.append({"role": "system", "content": system_prompt})
+            system_content = self._build_system_message(
+                stable=system_prompt or "",
+                volatile=memory_context,
+                model=model,
+            )
+            if system_content is not None:
+                conversation.append({"role": "system", "content": system_content})
            conversation.extend(messages)

            # Context compression: 压缩超长对话历史
@ -1039,7 +1045,10 @@ class ReActEngine:
                skill_name=task_type or None,
            )

-        # Memory retrieval: 执行前检索相关上下文注入 system_prompt
+        # Memory retrieval: 执行前检索相关上下文,作为 volatile 层注入 system message
+        # U2/G2: 不再拼到 stable(system_prompt)末尾破坏 cache 前缀,改由 _build_system_message
+        # 组装双块结构(stable + volatile),Anthropic provider 在 stable 上加 cache_control。
+        memory_context = ""
        if memory_retriever:
            try:
                query = str(messages[-1].get("content", "")) if messages else ""
@ -1049,18 +1058,18 @@ class ReActEngine:
                    query=query,
                    top_k=top_k,
                    token_budget=token_budget,
-                )
-                if memory_context:
-                    if system_prompt:
-                        system_prompt += f"\n\n## 参考信息\n{memory_context}"
-                    else:
-                        system_prompt = f"## 参考信息\n{memory_context}"
+                ) or ""
            except Exception as e:
                logger.warning(f"Memory retrieval failed, continuing without context: {e}")

        conversation: list[dict[str, Any]] = []
-        if system_prompt:
-            conversation.append({"role": "system", "content": system_prompt})
+        system_content = self._build_system_message(
+            stable=system_prompt or "",
+            volatile=memory_context,
+            model=model,
+        )
+        if system_content is not None:
+            conversation.append({"role": "system", "content": system_content})
        conversation.extend(messages)

        # Context compression: 压缩超长对话历史
@ -1721,6 +1730,62 @@ class ReActEngine:
            schemas.append(schema)
        return schemas

+    def _build_system_message(
+        self,
+        stable: str,
+        volatile: str,
+        *,
+        model: str,
+    ) -> str | list[dict[str, Any]] | None:
+        """构建双块结构 system message(stable + volatile)。
+
+        - prompt_cache_enable=False 或无 stable+volatile → 返回 str(或 None)
+        - Anthropic provider → 返回 content blocks 列表,stable 块带 cache_control
+        - 其他 provider → 返回字符串拼接(stable + volatile),依赖 stable 前缀命中自动前缀缓存
+
+        ponytail: 断点数硬编码为 1(stable 层),不暴露配置(YAGNI — 双块结构 >1 无语义)。
+        """
+        if not stable and not volatile:
+            return None
+        if not self._prompt_cache_enable:
+            # 退化为字符串拼接(向后兼容,行为同改动前)
+            if stable and volatile:
+                return f"{stable}\n\n## 参考信息\n{volatile}"
+            if volatile:
+                return f"## 参考信息\n{volatile}"
+            return stable
+
+        provider_name = self._get_provider_name(model)
+        if provider_name == "anthropic":
+            blocks: list[dict[str, Any]] = []
+            if stable:
+                blocks.append({
+                    "type": "text",
+                    "text": stable,
+                    "cache_control": {"type": "ephemeral"},
+                })
+            if volatile:
+                blocks.append({
+                    "type": "text",
+                    "text": f"## 参考信息\n{volatile}",
+                })
+            return blocks if blocks else None
+
+        # 非 Anthropic:字符串拼接,stable 前缀命中 OpenAI/DashScope 自动前缀缓存
+        if stable and volatile:
+            return f"{stable}\n\n## 参考信息\n{volatile}"
+        if volatile:
+            return f"## 参考信息\n{volatile}"
+        return stable
+
+    def _get_provider_name(self, model: str) -> str | None:
+        """通过 gateway 查询 model 对应的 provider 名。失败回退 None(字符串拼接)。"""
+        try:
+            return self._llm_gateway.get_provider_name_for_model(model)
+        except Exception:
+            # ponytail: 测试中 gateway 可能是 MagicMock,无该方法;回退保守路径
+            return None
+
    def _build_tool_use_prompt(self, tools: list[Tool]) -> str:
        """Build prompt-based tool calling instructions with tiered injection.

--- a/src/agentkit/llm/gateway.py
+++ b/src/agentkit/llm/gateway.py
@ -412,6 +412,24 @@ class LLMGateway:
            return self._config.model_aliases[model]
        return model

+    def get_provider_name_for_model(self, model: str) -> str | None:
+        """返回 model 对应的 provider 名(用于 provider-specific 优化如 cache_control)。
+
+        ponytail: 仅做 alias 解析 + provider 前缀提取,不查内部状态。
+        升级路径:ServerConfig 显式声明 provider per model。
+        返回 None 表示无法确定(多 provider + 无 "/" 前缀),调用方应回退到字符串拼接。
+        """
+        resolved = self._resolve_model_alias(model)
+        if "/" in resolved:
+            provider_name = resolved.split("/", 1)[0]
+            if provider_name in self._providers:
+                return provider_name
+            return None
+        # 无 "/" 前缀:仅当只有一个 provider 时能确定
+        if len(self._providers) == 1:
+            return next(iter(self._providers))
+        return None
+
    def _resolve_model(self, model: str) -> tuple[LLMProvider, str]:
        """解析模型为 (provider, actual_model_name)"""
        # model 格式: "provider/model_name" 或 "model_name"
--- a/src/agentkit/llm/providers/anthropic.py
+++ b/src/agentkit/llm/providers/anthropic.py
@ -99,13 +99,18 @@ class AnthropicProvider(LLMProvider):
            "content-type": "application/json",
        }

-    def _convert_messages(self, messages: list[dict[str, str]]) -> tuple[str | None, list[dict[str, Any]]]:
+    def _convert_messages(self, messages: list[dict[str, str]]) -> tuple[str | list[dict[str, Any]] | None, list[dict[str, Any]]]:
        """将 OpenAI 风格消息转换为 Anthropic 格式

        Returns:
            (system_prompt, anthropic_messages)
+
+        system_prompt 可为:
+          - str: 传统字符串(向后兼容)
+          - list[dict]: Anthropic content blocks(支持 cache_control,U2/G2)
+          - None: 无 system 消息
        """
-        system_prompt: str | None = None
+        system_prompt: str | list[dict[str, Any]] | None = None
        anthropic_messages: list[dict[str, Any]] = []

        for msg in messages:
@ -113,6 +118,8 @@ class AnthropicProvider(LLMProvider):
            content = msg.get("content", "")

            if role == "system":
+                # U2/G2: content 可为 str(传统)或 list[dict](content blocks)
+                # content blocks 直接透传(包含 cache_control 标记)
                system_prompt = content
                continue

--- a/src/agentkit/server/config.py
+++ b/src/agentkit/server/config.py
@ -116,6 +116,7 @@ class ServerConfig:
        evolution: dict[str, Any] | None = None,
        expert_paths: list[str] | None = None,
        board: dict[str, Any] | None = None,
+        prompt_cache: dict[str, Any] | None = None,
        on_change: Callable[["ServerConfig"], None] | None = None,
    ):
        self.host = host
@ -144,6 +145,7 @@ class ServerConfig:
        self.evolution = evolution or {}
        self.expert_paths = expert_paths or []
        self.board = board or {}
+        self.prompt_cache = prompt_cache or {}
        self.on_change = on_change

        # Config watching state
--- a/tests/unit/test_prompt_cache_layers.py
+++ b/tests/unit/test_prompt_cache_layers.py
@ -0,0 +1,221 @@
+"""U2 / G2 Prompt Cache 双块结构测试。
+
+覆盖 R4-R7, R13:
+- R4 stable/volatile 双块结构
+- R5 记忆注入从 system_prompt 末尾移到 volatile 层
+- R6 跨 provider 统一 cache 策略(Anthropic blocks / OpenAI 字符串)
+- R7 多轮 stable 不变(由构造保证)
+- R13 配置化(prompt_cache_enable=False 退化)
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+from agentkit.core.react import ReActEngine
+
+
+class _StubGateway:
+    """模拟 LLMGateway,记录最后一次 chat_stream 调用的 messages。"""
+
+    def __init__(self, provider_name: str | None = "anthropic"):
+        self._provider_name = provider_name
+        self.captured_messages: list[dict[str, Any]] | None = None
+
+    def get_provider_name_for_model(self, model: str) -> str | None:
+        return self._provider_name
+
+    async def chat_stream(self, **kwargs):
+        self.captured_messages = list(kwargs.get("messages", []))
+        return
+        yield  # makes this an async generator
+
+
+def _make_engine(provider_name: str | None = "anthropic", *, cache_enable: bool = True) -> tuple[ReActEngine, _StubGateway]:
+    gw = _StubGateway(provider_name=provider_name)
+    engine = ReActEngine.__new__(ReActEngine)
+    engine._llm_gateway = gw
+    engine._prompt_cache_enable = cache_enable
+    return engine, gw
+
+
+# ---- R4/R6 Anthropic: stable + volatile → content blocks ----
+
+
+def test_anthropic_provider_returns_content_blocks_with_cache_control():
+    engine, _ = _make_engine("anthropic")
+    result = engine._build_system_message(
+        stable="You are a helpful assistant.",
+        volatile="Memory: foo",
+        model="claude-sonnet-4",
+    )
+    assert isinstance(result, list)
+    assert result[0] == {
+        "type": "text",
+        "text": "You are a helpful assistant.",
+        "cache_control": {"type": "ephemeral"},
+    }
+    assert result[1] == {"type": "text", "text": "## 参考信息\nMemory: foo"}
+
+
+# ---- R5 Anthropic: empty volatile → only stable block ----
+
+
+def test_anthropic_empty_volatile_returns_only_stable_block():
+    engine, _ = _make_engine("anthropic")
+    result = engine._build_system_message(
+        stable="base prompt",
+        volatile="",
+        model="claude",
+    )
+    assert isinstance(result, list)
+    assert len(result) == 1
+    assert result[0]["text"] == "base prompt"
+    assert "cache_control" in result[0]
+
+
+# ---- R6 Non-Anthropic: returns string concat ----
+
+
+def test_non_anthropic_returns_string_concat():
+    engine, _ = _make_engine("openai")
+    result = engine._build_system_message(
+        stable="base",
+        volatile="ctx",
+        model="gpt-4",
+    )
+    assert isinstance(result, str)
+    assert result == "base\n\n## 参考信息\nctx"
+
+
+def test_unknown_provider_returns_string_concat():
+    """provider_name 无法确定时(gateway 返回 None),回退字符串拼接不报错。"""
+    engine, _ = _make_engine(None)
+    result = engine._build_system_message(
+        stable="base",
+        volatile="ctx",
+        model="default",
+    )
+    assert isinstance(result, str)
+    assert "## 参考信息" in result
+
+
+# ---- R13 Config: prompt_cache_enable=False → 退化字符串 ----
+
+
+def test_prompt_cache_disabled_falls_back_to_string():
+    engine, _ = _make_engine("anthropic", cache_enable=False)
+    result = engine._build_system_message(
+        stable="base",
+        volatile="ctx",
+        model="claude",
+    )
+    # 即便是 Anthropic,enable=False 时也返回字符串(行为同改动前)
+    assert isinstance(result, str)
+    assert "cache_control" not in result
+
+
+# ---- R4 Edge: no stable + no volatile → None ----
+
+
+def test_empty_stable_and_volatile_returns_none():
+    engine, _ = _make_engine("anthropic")
+    result = engine._build_system_message(stable="", volatile="", model="claude")
+    assert result is None
+
+
+# ---- R5 Edge: empty stable + volatile → only volatile block (Anthropic) ----
+
+
+def test_anthropic_empty_stable_returns_only_volatile_block():
+    engine, _ = _make_engine("anthropic")
+    result = engine._build_system_message(
+        stable="",
+        volatile="only memory",
+        model="claude",
+    )
+    assert isinstance(result, list)
+    assert len(result) == 1
+    assert result[0]["text"] == "## 参考信息\nonly memory"
+    assert "cache_control" not in result[0]  # volatile 块无 cache_control
+
+
+# ---- Integration: execute_stream uses _build_system_message end-to-end ----
+
+
+async def test_execute_stream_with_anthropic_uses_content_blocks():
+    """execute_stream 把双块 system content 透传给 gateway。"""
+    from agentkit.tools.base import Tool
+
+    class _NoopTool(Tool):
+        def __init__(self):
+            super().__init__(name="noop", description="noop")
+
+        async def execute(self, **kwargs):
+            return {}
+
+    class _MockGateway:
+        def __init__(self):
+            self.captured_messages = None
+
+        def get_provider_name_for_model(self, model: str) -> str | None:
+            return "anthropic"
+
+        async def chat_stream(self, **kwargs):
+            self.captured_messages = list(kwargs.get("messages", []))
+            # yield one chunk then end
+            from agentkit.llm.protocol import StreamChunk
+            yield StreamChunk(content="done", model="claude")
+
+    class _MemRetriever:
+        async def get_context_string(self, **kw):
+            return "retrieved context"
+
+    gw = _MockGateway()
+    engine = ReActEngine(llm_gateway=gw, prompt_cache_enable=True)
+    events = []
+    async for ev in engine.execute_stream(
+        messages=[{"role": "user", "content": "hi"}],
+        tools=[],
+        model="claude",
+        system_prompt="base",
+        memory_retriever=_MemRetriever(),
+    ):
+        events.append(ev)
+
+    assert gw.captured_messages is not None
+    sys_msg = gw.captured_messages[0]
+    assert sys_msg["role"] == "system"
+    assert isinstance(sys_msg["content"], list)
+    assert sys_msg["content"][0]["text"] == "base"
+    assert "cache_control" in sys_msg["content"][0]
+    assert sys_msg["content"][1]["text"] == "## 参考信息\nretrieved context"
+
+
+# ---- Anthropic provider _convert_messages passes through list-type system ----
+
+
+def test_anthropic_convert_messages_passes_through_list_system_content():
+    """AnthropicProvider._convert_messages 应直接透传 list-type system content。"""
+    from agentkit.llm.providers.anthropic import AnthropicProvider
+
+    provider = AnthropicProvider.__new__(AnthropicProvider)
+    blocks = [
+        {"type": "text", "text": "stable", "cache_control": {"type": "ephemeral"}},
+        {"type": "text", "text": "volatile"},
+    ]
+    messages = [{"role": "system", "content": blocks}]
+    system_prompt, anthropic_messages = provider._convert_messages(messages)
+    assert system_prompt is blocks  # same object, transparent passthrough
+    assert anthropic_messages == []
+
+
+def test_anthropic_convert_messages_string_system_still_works():
+    """传统 string system content 仍能透传(向后兼容)。"""
+    from agentkit.llm.providers.anthropic import AnthropicProvider
+
+    provider = AnthropicProvider.__new__(AnthropicProvider)
+    messages = [{"role": "system", "content": "old style"}]
+    system_prompt, _ = provider._convert_messages(messages)
+    assert system_prompt == "old style"