fischer-agentkit/tests/unit/llm/test_cache.py

"""U17 — LiteLLM 语义缓存管理器单元测试。

覆盖场景（plan 要求的 4 个 + 安全要求 e + 向后兼容 + fallback）：
1. 语义相似查询命中缓存（mock litellm.acompletion 模拟缓存行为）
2. 不同 system prompt 不命中缓存
3. 缓存命中率可统计
4. 阈值调优生效（similarity_threshold 传入 RedisSemanticCache）
5. User A 不返回 User B 缓存（安全要求 e）
6. kb_acl_hash 隔离 — 不同 ACL hash 产生不同 key
7. kb_caching_disabled 禁用缓存（安全要求 c）
8. cache_params_for_hit / no_cache — 返回正确 dict
9. record_cache_result — 记录命中/未命中到 stats 计数器
10. LitellmCacheConfig.from_cache_config — 转换正确，similarity_threshold=0.87
11. LitellmCacheManager.enable/disable — litellm.cache 正确设置/清除
12. generate_cache_key 向后兼容 — user_id=None, kb_acl_hash=None 时与旧版相同
13. RedisSemanticCache fallback — redisvl 缺失时 auto backend 回退

测试用 ``unittest.mock.patch`` mock ``litellm.cache`` 全局变量，避免测试间污染。
每个测试后清理 ``litellm.cache = None``。
"""

from __future__ import annotations

from types import SimpleNamespace
from typing import Any
from unittest.mock import MagicMock, patch

import pytest

from agentkit.llm.cache import LitellmCacheConfig, LitellmCacheManager
from agentkit.llm.cache_key import generate_cache_key
from agentkit.llm.config import CacheConfig


# ---------------------------------------------------------------------------
# 测试辅助
# ---------------------------------------------------------------------------


def _make_messages(user_content: str = "Hello") -> list[dict[str, str]]:
    return [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": user_content},
    ]


def _make_litellm_response(
    content: str = "Hello!",
    model: str = "gpt-4o-mini",
    prompt_tokens: int = 10,
    completion_tokens: int = 5,
    cache_key: str | None = None,
) -> SimpleNamespace:
    """构造 LiteLLM 响应（OpenAI ChatCompletion 格式），可选 cache_key 标记。"""
    hidden_params: dict[str, Any] = {}
    if cache_key is not None:
        hidden_params["cache_key"] = cache_key
    message = SimpleNamespace(content=content, tool_calls=None)
    return SimpleNamespace(
        choices=[SimpleNamespace(message=message)],
        usage=SimpleNamespace(prompt_tokens=prompt_tokens, completion_tokens=completion_tokens),
        model=model,
        _hidden_params=hidden_params,
    )


@pytest.fixture(autouse=True)
def _cleanup_litellm_cache():
    """每个测试后清理 litellm.cache 全局变量，避免测试间污染。"""
    yield
    import litellm

    litellm.cache = None


# ---------------------------------------------------------------------------
# 1. 语义相似查询命中缓存
# ---------------------------------------------------------------------------


class TestCacheHit:
    """场景 1 — 相同请求第二次命中缓存。"""

    async def test_second_request_is_cache_hit(self):
        """相同 cache_key 的第二次请求返回缓存响应（cache_hit=True）。"""
        config = LitellmCacheConfig(enabled=True, backend="memory")
        manager = LitellmCacheManager(config)

        msgs = _make_messages()
        cache_key = manager.build_cache_key("gpt-4o", msgs, 0.0)
        cache_params = LitellmCacheManager.cache_params_for_hit(cache_key)

        # 模拟 LiteLLM 缓存行为：第一次 miss，第二次 hit
        call_count = 0
        cache_store: dict[str, SimpleNamespace] = {}

        async def fake_acompletion(**kwargs):
            nonlocal call_count
            call_count += 1
            ck = kwargs.get("cache", {}).get("cache_key")
            if ck and ck in cache_store:
                # 缓存命中 — 返回带 cache_key 标记的响应
                return _make_litellm_response(content="Cached!", cache_key=ck)
            # 缓存未命中 — 调用"真实" API 并存储
            resp = _make_litellm_response(content="Fresh response")
            if ck:
                cache_store[ck] = resp
            return resp

        with patch("litellm.acompletion", side_effect=fake_acompletion):
            from agentkit.llm.providers.litellm_provider import LitellmProvider

            provider = LitellmProvider(model_prefix="openai/", api_key="test")
            from agentkit.llm.protocol import LLMRequest

            # 第一次请求 — miss
            req1 = LLMRequest(messages=msgs, model="gpt-4o", temperature=0.0, cache=cache_params)
            resp1 = await provider.chat(req1)
            assert resp1.cache_hit is False
            assert resp1.content == "Fresh response"

            # 第二次相同请求 — hit
            req2 = LLMRequest(messages=msgs, model="gpt-4o", temperature=0.0, cache=cache_params)
            resp2 = await provider.chat(req2)
            assert resp2.cache_hit is True
            assert resp2.content == "Cached!"

        assert call_count == 2  # litellm.acompletion 被调用两次（LiteLLM 内部处理缓存）


# ---------------------------------------------------------------------------
# 2. 不同 system prompt 不命中缓存
# ---------------------------------------------------------------------------


class TestSystemPromptIsolation:
    """场景 2 — 不同 system prompt 产生不同 cache_key，不误命中。"""

    def test_different_system_prompt_different_key(self):
        manager = LitellmCacheManager(LitellmCacheConfig(backend="memory"))
        msgs1 = [
            {"role": "system", "content": "Be concise"},
            {"role": "user", "content": "Hello"},
        ]
        msgs2 = [
            {"role": "system", "content": "Be verbose"},
            {"role": "user", "content": "Hello"},
        ]
        key1 = manager.build_cache_key("gpt-4o", msgs1, 0.0)
        key2 = manager.build_cache_key("gpt-4o", msgs2, 0.0)
        assert key1 != key2


# ---------------------------------------------------------------------------
# 3. 缓存命中率可统计
# ---------------------------------------------------------------------------


class TestCacheStats:
    """场景 3 — LitellmCacheManager.stats() 返回正确 hits/misses。"""

    def test_stats_after_hits_and_misses(self):
        manager = LitellmCacheManager(LitellmCacheConfig(backend="memory"))

        # 2 hits
        manager.record_cache_result(True)
        manager.record_cache_result(True)
        # 1 miss
        manager.record_cache_result(False)

        stats = manager.stats()
        assert stats["total_hits"] == 2
        assert stats["total_misses"] == 1


# ---------------------------------------------------------------------------
# 4. 阈值调优生效
# ---------------------------------------------------------------------------


class TestSimilarityThreshold:
    """场景 4 — similarity_threshold 传入 RedisSemanticCache 构造函数。"""

    def test_threshold_passed_to_redis_semantic_cache(self):
        """RedisSemanticCache 构造时接收 similarity_threshold=0.87。"""
        config = LitellmCacheConfig(
            enabled=True,
            backend="redis_semantic",
            similarity_threshold=0.83,
            redis_url="redis://localhost:6379",
        )
        manager = LitellmCacheManager(config)

        mock_cache_instance = MagicMock()
        with patch(
            "litellm.caching.RedisSemanticCache", return_value=mock_cache_instance
        ) as mock_cls:
            instance = manager._create_cache_instance()
            mock_cls.assert_called_once_with(
                redis_url="redis://localhost:6379",
                similarity_threshold=0.83,
                embedding_model="text-embedding-ada-002",
            )
            assert instance is mock_cache_instance

    def test_default_threshold_is_087(self):
        """from_cache_config 固定 similarity_threshold=0.87。"""
        old_config = CacheConfig(similarity_threshold=0.92)
        litellm_config = LitellmCacheConfig.from_cache_config(old_config)
        assert litellm_config.similarity_threshold == 0.87


# ---------------------------------------------------------------------------
# 5. User A 不返回 User B 缓存（安全要求 e）
# ---------------------------------------------------------------------------


class TestUserIsolation:
    """安全要求 e — User A 的查询不返回 User B 的缓存响应。"""

    def test_different_users_different_keys(self):
        """不同 user_id 产生不同 cache_key。"""
        manager = LitellmCacheManager(LitellmCacheConfig(backend="memory"))
        msgs = _make_messages("What is my salary?")
        key_a = manager.build_cache_key("gpt-4o", msgs, 0.0, user_id="user_a")
        key_b = manager.build_cache_key("gpt-4o", msgs, 0.0, user_id="user_b")
        assert key_a != key_b

    def test_same_user_same_key(self):
        """相同 user_id 产生相同 cache_key。"""
        manager = LitellmCacheManager(LitellmCacheConfig(backend="memory"))
        msgs = _make_messages("What is my salary?")
        key1 = manager.build_cache_key("gpt-4o", msgs, 0.0, user_id="user_a")
        key2 = manager.build_cache_key("gpt-4o", msgs, 0.0, user_id="user_a")
        assert key1 == key2

    def test_no_user_id_same_as_no_user_id(self):
        """user_id=None 时两次调用产生相同 key（向后兼容）。"""
        manager = LitellmCacheManager(LitellmCacheConfig(backend="memory"))
        msgs = _make_messages()
        key1 = manager.build_cache_key("gpt-4o", msgs, 0.0, user_id=None)
        key2 = manager.build_cache_key("gpt-4o", msgs, 0.0, user_id=None)
        assert key1 == key2


# ---------------------------------------------------------------------------
# 6. kb_acl_hash 隔离
# ---------------------------------------------------------------------------


class TestACLIsolation:
    """安全要求 b — 不同 ACL scope 产生不同 cache_key。"""

    def test_different_acl_hash_different_keys(self):
        manager = LitellmCacheManager(LitellmCacheConfig(backend="memory"))
        msgs = _make_messages("Summarize the document")
        key1 = manager.build_cache_key("gpt-4o", msgs, 0.0, kb_acl_hash="acl_v1")
        key2 = manager.build_cache_key("gpt-4o", msgs, 0.0, kb_acl_hash="acl_v2")
        assert key1 != key2

    def test_same_acl_hash_same_key(self):
        manager = LitellmCacheManager(LitellmCacheConfig(backend="memory"))
        msgs = _make_messages()
        key1 = manager.build_cache_key("gpt-4o", msgs, 0.0, kb_acl_hash="acl_v1")
        key2 = manager.build_cache_key("gpt-4o", msgs, 0.0, kb_acl_hash="acl_v1")
        assert key1 == key2


# ---------------------------------------------------------------------------
# 7. kb_caching_disabled 禁用缓存
# ---------------------------------------------------------------------------


class TestKBCachingDisabled:
    """安全要求 c — KB 设置 caching_disabled=True 时禁用缓存。"""

    def test_should_cache_returns_false_when_disabled(self):
        manager = LitellmCacheManager(LitellmCacheConfig(backend="memory"))
        assert manager.should_cache(kb_caching_disabled=True) is False

    def test_should_cache_returns_true_when_enabled(self):
        manager = LitellmCacheManager(LitellmCacheConfig(backend="memory"))
        assert manager.should_cache(kb_caching_disabled=False) is True

    def test_should_cache_default_true(self):
        manager = LitellmCacheManager(LitellmCacheConfig(backend="memory"))
        assert manager.should_cache() is True


# ---------------------------------------------------------------------------
# 8. cache_params_for_hit / no_cache
# ---------------------------------------------------------------------------


class TestCacheParams:
    def test_cache_params_for_hit(self):
        params = LitellmCacheManager.cache_params_for_hit("my_cache_key")
        assert params == {"cache_key": "my_cache_key"}

    def test_cache_params_for_no_cache(self):
        params = LitellmCacheManager.cache_params_for_no_cache()
        assert params == {"no-cache": True}


# ---------------------------------------------------------------------------
# 10. LitellmCacheConfig.from_cache_config
# ---------------------------------------------------------------------------


class TestFromCacheConfig:
    def test_basic_conversion(self):
        old = CacheConfig(
            enabled=True,
            backend="memory",
            redis_url="redis://myhost:6379",
            semantic_ttl=3600,
            embedding_model="bge-m3",
        )
        litellm_config = LitellmCacheConfig.from_cache_config(old)
        assert litellm_config.enabled is True
        assert litellm_config.backend == "memory"
        assert litellm_config.redis_url == "redis://myhost:6379"
        assert litellm_config.ttl == 3600
        assert litellm_config.similarity_threshold == 0.87  # 固定，忽略旧 0.92
        assert litellm_config.per_user_namespace is True  # 强制开启

    def test_unknown_backend_falls_to_auto(self):
        old = CacheConfig(backend="unknown_backend")
        litellm_config = LitellmCacheConfig.from_cache_config(old)
        assert litellm_config.backend == "auto"

    def test_redis_backend_preserved(self):
        old = CacheConfig(backend="redis")
        litellm_config = LitellmCacheConfig.from_cache_config(old)
        assert litellm_config.backend == "redis"

    def test_empty_embedding_model_falls_to_default(self):
        old = CacheConfig(embedding_model="")
        litellm_config = LitellmCacheConfig.from_cache_config(old)
        assert litellm_config.embedding_model == "text-embedding-ada-002"


# ---------------------------------------------------------------------------
# 11. LitellmCacheManager.enable/disable
# ---------------------------------------------------------------------------


class TestEnableDisable:
    def test_enable_sets_litellm_cache(self):
        import litellm

        config = LitellmCacheConfig(backend="memory")
        manager = LitellmCacheManager(config)
        manager.enable()
        assert litellm.cache is not None
        assert manager._cache_instance is not None

    def test_disable_clears_litellm_cache(self):
        import litellm

        config = LitellmCacheConfig(backend="memory")
        manager = LitellmCacheManager(config)
        manager.enable()
        assert litellm.cache is not None
        manager.disable()
        assert litellm.cache is None
        assert manager._cache_instance is None


# ---------------------------------------------------------------------------
# 12. generate_cache_key 向后兼容
# ---------------------------------------------------------------------------


class TestBackwardCompatibility:
    """user_id=None, kb_acl_hash=None 时与旧版 generate_cache_key 完全一致。"""

    def test_none_user_id_same_as_not_passing(self):
        msgs = _make_messages()
        key1 = generate_cache_key("gpt-4o", msgs, 0.0, user_id=None, kb_acl_hash=None)
        key2 = generate_cache_key("gpt-4o", msgs, 0.0)
        assert key1 == key2

    def test_backward_compat_deterministic(self):
        msgs = _make_messages()
        key1 = generate_cache_key("gpt-4o", msgs, 0.0, user_id=None, kb_acl_hash=None)
        key2 = generate_cache_key("gpt-4o", msgs, 0.0, user_id=None, kb_acl_hash=None)
        assert key1 == key2

    def test_user_id_changes_key(self):
        msgs = _make_messages()
        key_none = generate_cache_key("gpt-4o", msgs, 0.0)
        key_user = generate_cache_key("gpt-4o", msgs, 0.0, user_id="user_a")
        assert key_none != key_user

    def test_acl_hash_changes_key(self):
        msgs = _make_messages()
        key_none = generate_cache_key("gpt-4o", msgs, 0.0)
        key_acl = generate_cache_key("gpt-4o", msgs, 0.0, kb_acl_hash="acl_v1")
        assert key_none != key_acl


# ---------------------------------------------------------------------------
# 13. RedisSemanticCache fallback
# ---------------------------------------------------------------------------


class TestRedisSemanticCacheFallback:
    """redisvl 缺失时 auto backend 回退到 RedisCache。"""

    def test_auto_fallback_to_redis_cache_when_redisvl_missing(self):
        """auto 模式下 RedisSemanticCache 构造 ImportError → 回退到 RedisCache。"""
        import litellm.caching

        config = LitellmCacheConfig(backend="auto", redis_url="redis://localhost:6379")
        manager = LitellmCacheManager(config)

        mock_redis_cache = MagicMock(name="RedisCacheInstance")

        # 模拟 RedisSemanticCache 构造时 ImportError（redisvl 未安装）
        def raise_import_error(*args, **kwargs):
            raise ImportError("No module named 'redisvl'")

        with patch.object(litellm.caching, "RedisSemanticCache", side_effect=raise_import_error):
            with patch.object(
                litellm.caching, "RedisCache", return_value=mock_redis_cache
            ) as mock_rc:
                instance = manager._create_cache_instance()
                mock_rc.assert_called_once_with(redis_url="redis://localhost:6379")
                assert instance is mock_redis_cache

    def test_redis_semantic_backend_raises_when_redisvl_missing(self):
        """显式 redis_semantic backend + redisvl 缺失 → raise ImportError。"""
        import litellm.caching

        config = LitellmCacheConfig(backend="redis_semantic")
        manager = LitellmCacheManager(config)

        def raise_import_error(*args, **kwargs):
            raise ImportError("No module named 'redisvl'")

        with patch.object(litellm.caching, "RedisSemanticCache", side_effect=raise_import_error):
            with pytest.raises(ImportError):
                manager._create_cache_instance()

    def test_memory_backend_uses_in_memory_cache(self):
        """memory backend 直接使用 InMemoryCache，不尝试 Redis。"""
        import litellm.caching

        config = LitellmCacheConfig(backend="memory")
        manager = LitellmCacheManager(config)
        instance = manager._create_cache_instance()
        assert isinstance(instance, litellm.caching.InMemoryCache)