623 lines
26 KiB
Python
623 lines
26 KiB
Python
"""LLM Gateway - 统一 LLM 调用入口"""
|
||
|
||
import asyncio
|
||
import logging
|
||
import time
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
from typing import Any
|
||
|
||
from agentkit.core.exceptions import LLMProviderError, ModelNotFoundError
|
||
from agentkit.llm.config import LLMConfig
|
||
from agentkit.llm.protocol import LLMProvider, LLMRequest, LLMResponse, StreamChunk, TokenUsage
|
||
from agentkit.llm.providers.tracker import UsageSummary, UsageTracker
|
||
from agentkit.telemetry.tracing import get_tracer, _OTEL_AVAILABLE
|
||
from agentkit.telemetry.metrics import llm_token_histogram
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class QuotaExceededError(Exception):
|
||
"""Raised when a department's LLM quota is exceeded.
|
||
|
||
Carries enough metadata for the API layer to return a structured
|
||
429 response (department_id, quota_type, period, limit, current).
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
department_id: str,
|
||
quota_type: str,
|
||
period: str,
|
||
limit: Any,
|
||
current: Any,
|
||
) -> None:
|
||
self.department_id = department_id
|
||
self.quota_type = quota_type
|
||
self.period = period
|
||
self.limit = limit
|
||
self.current = current
|
||
super().__init__(
|
||
f"Quota exceeded for department {department_id}: "
|
||
f"{quota_type} {period} (limit={limit}, current={current})"
|
||
)
|
||
|
||
|
||
class LLMGateway:
|
||
"""LLM 网关 - Provider 注册、模型别名解析、Fallback、Usage 追踪、Cache"""
|
||
|
||
def __init__(self, config: LLMConfig | None = None, usage_store: Any = None):
|
||
self._providers: dict[str, LLMProvider] = {}
|
||
self._usage_tracker = UsageTracker(store=usage_store) if usage_store else UsageTracker()
|
||
self._config = config or LLMConfig()
|
||
|
||
# Cache (U17 — LiteLLM 缓存管理器,opt-in,默认禁用)
|
||
self._cache_manager: Any = None # LitellmCacheManager | None
|
||
if self._config.cache and self._config.cache.enabled:
|
||
from agentkit.llm.cache import LitellmCacheConfig, LitellmCacheManager
|
||
|
||
litellm_config = LitellmCacheConfig.from_cache_config(self._config.cache)
|
||
self._cache_manager = LitellmCacheManager(litellm_config)
|
||
self._cache_manager.enable()
|
||
logger.info(
|
||
f"LLM cache enabled (LiteLLM, backend={self._config.cache.backend}, "
|
||
f"similarity_threshold={litellm_config.similarity_threshold})"
|
||
)
|
||
|
||
def register_provider(self, name: str, provider: LLMProvider) -> None:
|
||
"""注册 Provider"""
|
||
self._providers[name] = provider
|
||
logger.info(f"LLM provider '{name}' registered")
|
||
|
||
@property
|
||
def has_providers(self) -> bool:
|
||
"""Return True if at least one LLM provider is registered."""
|
||
return bool(self._providers)
|
||
|
||
async def chat(
|
||
self,
|
||
messages: list[dict[str, str]],
|
||
model: str,
|
||
agent_name: str = "",
|
||
task_type: str = "",
|
||
tools: list[dict] | None = None,
|
||
tool_choice: str = "auto",
|
||
timeout: float | None = None,
|
||
user_id: str | None = None,
|
||
department_ids: list[str] | None = None,
|
||
db_path: Path | str | None = None,
|
||
kb_id: str | None = None,
|
||
kb_acl_hash: str | None = None,
|
||
**kwargs,
|
||
) -> LLMResponse:
|
||
"""发送 chat 请求,自动解析别名和 Fallback"""
|
||
resolved_model = self._resolve_model_alias(model)
|
||
|
||
if not self._providers:
|
||
raise LLMProviderError("", "No provider registered")
|
||
|
||
# ── Quota enforcement ──
|
||
# Only enforce when department_ids + db_path are provided
|
||
# (other call sites pass None — no quota check).
|
||
if department_ids and db_path:
|
||
await self._enforce_quota(db_path, department_ids, resolved_model)
|
||
|
||
# Telemetry: start LLM span
|
||
_span_cm = None
|
||
_span = None
|
||
if _OTEL_AVAILABLE:
|
||
tracer = get_tracer()
|
||
if tracer is not None:
|
||
from opentelemetry.trace import SpanKind
|
||
|
||
_span_cm = tracer.start_as_current_span(
|
||
"gen_ai.chat",
|
||
kind=SpanKind.CLIENT,
|
||
attributes={
|
||
"gen_ai.system": resolved_model.split("/")[0]
|
||
if "/" in resolved_model
|
||
else "unknown",
|
||
"gen_ai.operation.name": "chat",
|
||
"gen_ai.request.model": resolved_model,
|
||
},
|
||
)
|
||
_span = _span_cm.__enter__()
|
||
|
||
start = time.monotonic()
|
||
|
||
# ── Cache check (U17 — LiteLLM cache via cache_key in request) ──
|
||
# LiteLLM 在 litellm.acompletion 内部处理缓存读写,gateway 只需:
|
||
# 1. 构建 per-user + ACL-scoped cache_key(安全要求 a, b)
|
||
# 2. 将 cache 参数注入 kwargs 透传到 provider
|
||
# 3. 检测响应的 cache_hit 标志,用于 usage tracking(cost=0)
|
||
if self._cache_manager is not None:
|
||
from agentkit.llm.cache import LitellmCacheManager
|
||
|
||
# 解析 KB caching_disabled(安全要求 c)
|
||
# 非 RAG 请求(kb_id=None)→ 默认启用缓存(无 KB 数据需保护)。
|
||
# RAG 请求(kb_id!=None)→ fail-closed:默认禁用缓存,仅在 settings
|
||
# 明确返回 caching_disabled=False 时启用。防止 DB 异常时 fail-open
|
||
# 导致禁用缓存的 KB 数据泄漏到缓存。
|
||
kb_caching_disabled = kb_id is not None
|
||
if kb_id is not None:
|
||
try:
|
||
from agentkit.rag_platform.settings import get_settings_store
|
||
|
||
settings = await get_settings_store().get_settings(kb_id)
|
||
if settings is not None:
|
||
kb_caching_disabled = settings.caching_disabled
|
||
# settings 为 None(KB 不存在)→ 保持 True(fail-closed)
|
||
except Exception as e:
|
||
logger.warning(f"Failed to read KB cache settings for kb_id={kb_id}: {e}")
|
||
# 读取异常 → 保持 True(fail-closed,禁用缓存)
|
||
|
||
if self._cache_manager.should_cache(kb_caching_disabled, user_id):
|
||
cache_key = self._cache_manager.build_cache_key(
|
||
model=resolved_model,
|
||
messages=messages,
|
||
temperature=kwargs.get("temperature", 0.7),
|
||
tools=tools,
|
||
tool_choice=tool_choice,
|
||
max_tokens=kwargs.get("max_tokens", 2000),
|
||
user_id=user_id,
|
||
kb_acl_hash=kb_acl_hash,
|
||
)
|
||
kwargs["cache"] = LitellmCacheManager.cache_params_for_hit(cache_key)
|
||
else:
|
||
kwargs["cache"] = LitellmCacheManager.cache_params_for_no_cache()
|
||
|
||
# ── Normal provider call ──
|
||
models_to_try = self._get_models_to_try(resolved_model)
|
||
last_error: LLMProviderError | None = None
|
||
response: LLMResponse | None = None
|
||
|
||
try:
|
||
for model_name in models_to_try:
|
||
try:
|
||
provider, actual_model = self._resolve_model(model_name)
|
||
except ModelNotFoundError:
|
||
continue
|
||
|
||
req = LLMRequest(
|
||
messages=messages,
|
||
model=actual_model,
|
||
tools=tools,
|
||
tool_choice=tool_choice,
|
||
timeout=timeout,
|
||
**kwargs,
|
||
)
|
||
try:
|
||
response = await provider.chat(req)
|
||
# Empty response detection: if content is None/empty and no tool_calls,
|
||
# treat as failure and try next fallback model.
|
||
# This handles the common case where providers return 200 OK but empty body.
|
||
if (
|
||
response.content is None or not response.content.strip()
|
||
) and not response.tool_calls:
|
||
# Record usage for billing before discarding this response
|
||
if response.usage:
|
||
latency_ms = (time.monotonic() - start) * 1000
|
||
cost = self._calculate_cost(model_name, response.usage)
|
||
await self._record_usage(
|
||
agent_name=agent_name,
|
||
model=model_name,
|
||
usage=response.usage,
|
||
cost=cost,
|
||
latency_ms=latency_ms,
|
||
user_id=user_id,
|
||
department_ids=department_ids,
|
||
)
|
||
logger.warning(
|
||
f"Model '{model_name}' returned empty content with no tool_calls, "
|
||
f"trying next fallback"
|
||
)
|
||
last_error = LLMProviderError(
|
||
model_name,
|
||
f"Empty response from {model_name} (no content, no tool_calls)",
|
||
)
|
||
continue
|
||
break
|
||
except LLMProviderError as e:
|
||
last_error = e
|
||
logger.warning(f"Model '{model_name}' failed, trying next: {e}")
|
||
continue
|
||
else:
|
||
raise last_error or LLMProviderError(
|
||
"", f"All models failed for '{resolved_model}'"
|
||
)
|
||
|
||
latency_ms = (time.monotonic() - start) * 1000
|
||
|
||
# U17 — 检测 LiteLLM 缓存命中(用于 usage tracking cost=0)
|
||
is_cache_hit = getattr(response, "cache_hit", False)
|
||
if self._cache_manager is not None:
|
||
self._cache_manager.record_cache_result(is_cache_hit)
|
||
|
||
# 计算成本(缓存命中时 cost=0)
|
||
cost = 0.0 if is_cache_hit else self._calculate_cost(response.model, response.usage)
|
||
|
||
# 记录使用量
|
||
await self._record_usage(
|
||
agent_name=agent_name,
|
||
model=response.model,
|
||
usage=response.usage,
|
||
cost=cost,
|
||
latency_ms=latency_ms,
|
||
user_id=user_id,
|
||
department_ids=department_ids,
|
||
)
|
||
|
||
# Telemetry: record token usage and end span
|
||
if _span is not None:
|
||
_span.set_attribute("gen_ai.usage.input_tokens", response.usage.prompt_tokens)
|
||
_span.set_attribute("gen_ai.usage.output_tokens", response.usage.completion_tokens)
|
||
_span.set_attribute("gen_ai.response.model", response.model)
|
||
_span.set_attribute("gen_ai.duration.ms", int(latency_ms))
|
||
if self._cache_manager is not None:
|
||
_span.set_attribute("gen_ai.cache.hit", is_cache_hit)
|
||
llm_token_histogram().record(
|
||
response.usage.total_tokens,
|
||
{"gen_ai.request.model": resolved_model},
|
||
)
|
||
|
||
return response
|
||
finally:
|
||
if _span_cm is not None:
|
||
_span_cm.__exit__(None, None, None)
|
||
|
||
async def chat_stream(
|
||
self,
|
||
messages: list[dict[str, str]],
|
||
model: str,
|
||
agent_name: str = "",
|
||
task_type: str = "",
|
||
tools: list[dict] | None = None,
|
||
tool_choice: str = "auto",
|
||
timeout: float | None = None,
|
||
user_id: str | None = None,
|
||
department_ids: list[str] | None = None,
|
||
db_path: Path | str | None = None,
|
||
**kwargs,
|
||
):
|
||
"""Stream chat response with fallback support.
|
||
|
||
If the primary model fails before any chunk is yielded, tries fallback
|
||
models. If it fails after chunks have been sent, yields an error chunk
|
||
and terminates (cannot switch mid-stream).
|
||
|
||
Note: Streaming responses are NOT cached in this iteration.
|
||
"""
|
||
resolved_model = self._resolve_model_alias(model)
|
||
|
||
if not self._providers:
|
||
raise LLMProviderError("", "No provider registered")
|
||
|
||
# ── Quota enforcement ──
|
||
if department_ids and db_path:
|
||
await self._enforce_quota(db_path, department_ids, resolved_model)
|
||
|
||
models_to_try = self._get_models_to_try(resolved_model)
|
||
last_error: Exception | None = None
|
||
|
||
for model_name in models_to_try:
|
||
try:
|
||
provider, actual_model = self._resolve_model(model_name)
|
||
except ModelNotFoundError:
|
||
continue
|
||
|
||
stream_request = LLMRequest(
|
||
messages=messages,
|
||
model=actual_model,
|
||
tools=tools,
|
||
tool_choice=tool_choice,
|
||
timeout=timeout,
|
||
**kwargs,
|
||
)
|
||
|
||
chunk_yielded = False
|
||
start = time.monotonic()
|
||
total_content = ""
|
||
final_usage = None
|
||
final_model = model_name
|
||
|
||
try:
|
||
stream_obj = provider.chat_stream(stream_request)
|
||
# Defensive: guard against misconfigured providers (e.g. an
|
||
# AsyncMock in tests, or a future refactor that accidentally
|
||
# turns chat_stream into a regular ``async def``) that return
|
||
# a coroutine instead of an async generator. The original
|
||
# cryptic error ``'async for' requires an object with
|
||
# __aiter__ method, got coroutine`` becomes a clear,
|
||
# actionable message naming the offending provider+model.
|
||
if asyncio.iscoroutine(stream_obj):
|
||
logger.error(
|
||
f"Provider '{model_name}'.chat_stream returned a "
|
||
f"coroutine instead of an async generator. "
|
||
f"Check that the method is defined as "
|
||
f"``async def chat_stream(...): ...; yield ...``."
|
||
)
|
||
raise TypeError(
|
||
f"Provider '{model_name}' returned a coroutine "
|
||
f"from chat_stream() — expected an async "
|
||
f"generator. This indicates a provider "
|
||
f"implementation bug."
|
||
)
|
||
async for chunk in stream_obj:
|
||
chunk_yielded = True
|
||
if chunk.content:
|
||
total_content += chunk.content
|
||
if chunk.usage:
|
||
final_usage = chunk.usage
|
||
if chunk.model:
|
||
final_model = chunk.model
|
||
yield chunk
|
||
|
||
# Track usage after successful stream
|
||
latency_ms = (time.monotonic() - start) * 1000
|
||
if final_usage is None:
|
||
final_usage = TokenUsage()
|
||
cost = self._calculate_cost(final_model, final_usage)
|
||
await self._record_usage(
|
||
agent_name=agent_name,
|
||
model=final_model,
|
||
usage=final_usage,
|
||
cost=cost,
|
||
latency_ms=latency_ms,
|
||
user_id=user_id,
|
||
department_ids=department_ids,
|
||
)
|
||
|
||
# Empty stream detection: if no content was produced,
|
||
# raise error so the caller (ReActEngine) can retry with a different model.
|
||
# We cannot continue to next model here because chunks may have already
|
||
# been yielded to the client, which would cause mixed output.
|
||
# Note: stream tool_calls are not tracked in chunks, so we only check content.
|
||
if not total_content.strip():
|
||
logger.warning(f"Stream from '{model_name}' produced empty content")
|
||
raise LLMProviderError(
|
||
model_name,
|
||
f"Empty stream from {model_name}",
|
||
)
|
||
|
||
return # Success, done
|
||
except Exception as e:
|
||
last_error = e
|
||
if chunk_yielded:
|
||
# Can't switch mid-stream, terminate gracefully
|
||
logger.error(f"Stream failed after chunks sent for '{model_name}': {e}")
|
||
yield StreamChunk(
|
||
content="",
|
||
model=final_model,
|
||
usage=None,
|
||
is_final=True,
|
||
)
|
||
return
|
||
# No chunks yet, try next fallback
|
||
logger.warning(f"Stream failed for '{model_name}', trying fallback: {e}")
|
||
continue
|
||
|
||
# All models failed
|
||
raise last_error or LLMProviderError(
|
||
"", f"No provider available for streaming '{resolved_model}'"
|
||
)
|
||
|
||
def _get_models_to_try(self, resolved_model: str) -> list[str]:
|
||
"""Return [primary_model] + fallback_models for the given resolved model."""
|
||
fallback_models = self._config.fallbacks.get(resolved_model, [])
|
||
return [resolved_model] + fallback_models
|
||
|
||
def _resolve_model_alias(self, model: str) -> str:
|
||
"""解析模型别名"""
|
||
if model in self._config.model_aliases:
|
||
return self._config.model_aliases[model]
|
||
return model
|
||
|
||
def get_provider_name_for_model(self, model: str) -> str | None:
|
||
"""返回 model 对应的 provider 名(用于 provider-specific 优化如 cache_control)。
|
||
|
||
ponytail: 仅做 alias 解析 + provider 前缀提取,不查内部状态。
|
||
升级路径:ServerConfig 显式声明 provider per model。
|
||
返回 None 表示无法确定(多 provider + 无 "/" 前缀),调用方应回退到字符串拼接。
|
||
"""
|
||
resolved = self._resolve_model_alias(model)
|
||
if "/" in resolved:
|
||
provider_name = resolved.split("/", 1)[0]
|
||
if provider_name in self._providers:
|
||
return provider_name
|
||
return None
|
||
# 无 "/" 前缀:仅当只有一个 provider 时能确定
|
||
if len(self._providers) == 1:
|
||
return next(iter(self._providers))
|
||
return None
|
||
|
||
def _resolve_model(self, model: str) -> tuple[LLMProvider, str]:
|
||
"""解析模型为 (provider, actual_model_name)"""
|
||
# model 格式: "provider/model_name" 或 "model_name"
|
||
if "/" in model:
|
||
provider_name, model_name = model.split("/", 1)
|
||
if provider_name not in self._providers:
|
||
raise ModelNotFoundError(model)
|
||
return self._providers[provider_name], model_name
|
||
|
||
# 无 "/" 前缀:仅当只有一个 provider 时自动匹配
|
||
if len(self._providers) == 1:
|
||
provider = next(iter(self._providers.values()))
|
||
return provider, model
|
||
|
||
raise ModelNotFoundError(model)
|
||
|
||
def _get_fallback_model(self, model: str) -> str | None:
|
||
"""获取 Fallback 模型"""
|
||
fallbacks = self._config.fallbacks.get(model, [])
|
||
return fallbacks[0] if fallbacks else None
|
||
|
||
def _calculate_cost(self, model: str, usage: TokenUsage) -> float:
|
||
"""计算成本"""
|
||
# 在 provider config 的 models 中查找成本配置
|
||
for provider_config in self._config.providers.values():
|
||
if model in provider_config.models:
|
||
model_conf = provider_config.models[model]
|
||
input_cost = usage.prompt_tokens * model_conf.get("cost_per_1k_input", 0) / 1000
|
||
output_cost = (
|
||
usage.completion_tokens * model_conf.get("cost_per_1k_output", 0) / 1000
|
||
)
|
||
return input_cost + output_cost
|
||
return 0.0
|
||
|
||
def get_usage(
|
||
self,
|
||
agent_name: str | None = None,
|
||
start_time=None,
|
||
end_time=None,
|
||
) -> UsageSummary:
|
||
"""查询使用量"""
|
||
return self._usage_tracker.get_usage(
|
||
agent_name=agent_name,
|
||
start_time=start_time,
|
||
end_time=end_time,
|
||
)
|
||
|
||
# ------------------------------------------------------------------
|
||
# Quota enforcement helpers (U7)
|
||
# ------------------------------------------------------------------
|
||
|
||
async def _record_usage(
|
||
self,
|
||
agent_name: str,
|
||
model: str,
|
||
usage: TokenUsage,
|
||
cost: float,
|
||
latency_ms: float,
|
||
user_id: str | None,
|
||
department_ids: list[str] | None,
|
||
) -> None:
|
||
"""Record a usage event via the async store interface (KTD-6).
|
||
|
||
Multi-department attribution (U2): when a user belongs to
|
||
multiple departments, a separate :class:`UsageRecord` is created
|
||
for each department. This ensures ``get_usage(dept_id)`` returns
|
||
the correct total for every department the user belongs to,
|
||
matching the quota check scope (which checks all departments).
|
||
|
||
TOCTOU (KTD-2): This method is called *after* the LLM response
|
||
is received. Between ``_enforce_quota`` (before the call) and
|
||
this recording, concurrent requests may push usage over the
|
||
limit. This race window is accepted; post-hoc reconciliation
|
||
(periodic scans for over-limit users) handles violations.
|
||
"""
|
||
if not department_ids:
|
||
# API key users (no departments) — record once with dept=None.
|
||
await self._usage_tracker.record_async(
|
||
agent_name=agent_name,
|
||
model=model,
|
||
usage=usage,
|
||
cost=cost,
|
||
latency_ms=latency_ms,
|
||
user_id=user_id,
|
||
department_id=None,
|
||
)
|
||
return
|
||
|
||
# Record one entry per department so each department's aggregate
|
||
# includes this usage. The cost is attributed in full to each
|
||
# department (not split) — this matches how quota checks work
|
||
# (each department is checked against the full usage).
|
||
for dept_id in department_ids:
|
||
await self._usage_tracker.record_async(
|
||
agent_name=agent_name,
|
||
model=model,
|
||
usage=usage,
|
||
cost=cost,
|
||
latency_ms=latency_ms,
|
||
user_id=user_id,
|
||
department_id=dept_id,
|
||
)
|
||
|
||
async def _enforce_quota(
|
||
self,
|
||
db_path: Path | str,
|
||
department_ids: list[str],
|
||
resolved_model: str,
|
||
) -> None:
|
||
"""Run all quota checks for the given departments.
|
||
|
||
Strictest-wins: if ANY department fails ANY check, raises
|
||
:class:`QuotaExceededError` and the request is rejected.
|
||
|
||
Both daily and monthly periods are checked (U2): for each
|
||
department, ``token_limit`` and ``cost_limit`` are evaluated
|
||
against both ``daily`` and ``monthly`` windows.
|
||
|
||
Fail-closed (KTD-1): if the usage store is unavailable (Redis
|
||
degraded), raises :class:`UsageStoreUnavailableError`. The
|
||
caller must translate this to HTTP 503.
|
||
|
||
TOCTOU (KTD-2): quota is checked *before* the LLM call, and
|
||
usage is recorded *after*. Concurrent requests in this window
|
||
may exceed the limit. This race is accepted; see
|
||
:meth:`_record_usage` for the reconciliation strategy.
|
||
"""
|
||
# Lazy import to avoid circular dependency (admin → ... → gateway).
|
||
from agentkit.server.admin.quota_service import get_quota_service
|
||
|
||
quota_service = get_quota_service()
|
||
db = Path(db_path)
|
||
|
||
for dept_id in department_ids:
|
||
# 1. Model whitelist
|
||
allowed, _reason = await quota_service.is_model_allowed(db, dept_id, resolved_model)
|
||
if not allowed:
|
||
raise QuotaExceededError(
|
||
department_id=dept_id,
|
||
quota_type="model_whitelist",
|
||
period="",
|
||
limit="",
|
||
current=resolved_model,
|
||
)
|
||
|
||
# 2. Token + cost limits (daily AND monthly)
|
||
# 优化:每个 period 只查一次 get_usage,复用 summary 检查 token + cost
|
||
for period in ("daily", "monthly"):
|
||
summary = self._get_usage_summary(dept_id, period)
|
||
current_tokens = int(summary.total_tokens)
|
||
current_cost_cents = float(summary.total_cost) * 100.0
|
||
await self._check_quota_value(
|
||
quota_service, db, dept_id, period, "token_limit", current_tokens
|
||
)
|
||
await self._check_quota_value(
|
||
quota_service, db, dept_id, period, "cost_limit", current_cost_cents
|
||
)
|
||
|
||
def _get_usage_summary(self, department_id: str, period: str) -> UsageSummary:
|
||
"""返回 department_id 在当前 period 的 usage summary(单次查询,供 token+cost 复用)。"""
|
||
now = datetime.now(timezone.utc)
|
||
if period == "monthly":
|
||
start = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
||
else:
|
||
start = now.replace(hour=0, minute=0, second=0, microsecond=0)
|
||
return self._usage_tracker.get_usage(
|
||
department_id=department_id, start_time=start, end_time=now
|
||
)
|
||
|
||
async def _check_quota_value(
|
||
self,
|
||
quota_service: Any,
|
||
db: Path,
|
||
dept_id: str,
|
||
period: str,
|
||
quota_type: str,
|
||
current: float,
|
||
) -> None:
|
||
"""检查单个配额(token_limit 或 cost_limit)— current 由调用方预计算传入。"""
|
||
allowed, _reason = await quota_service.check_quota(db, dept_id, quota_type, period, current)
|
||
if not allowed:
|
||
quota = await quota_service.get_quota(db, dept_id, quota_type, period)
|
||
limit = quota["limit_value"] if quota else None
|
||
raise QuotaExceededError(
|
||
department_id=dept_id,
|
||
quota_type=quota_type,
|
||
period=period,
|
||
limit=limit,
|
||
current=current,
|
||
)
|