103 lines
3.0 KiB
Python
103 lines
3.0 KiB
Python
"""LLM调用指标包装"""
|
||
import time
|
||
from typing import Optional
|
||
|
||
from app.middleware.prometheus_metrics import (
|
||
LLM_REQUESTS_TOTAL,
|
||
LLM_REQUEST_DURATION_SECONDS,
|
||
LLM_TOKENS_TOTAL,
|
||
LLM_COST_ESTIMATED,
|
||
)
|
||
|
||
# LLM成本估算(USD/token)
|
||
LLM_COST_PER_TOKEN = {
|
||
# OpenAI
|
||
("openai", "gpt-4o"): {"prompt": 0.000005, "completion": 0.000015},
|
||
("openai", "gpt-4o-mini"): {"prompt": 0.00000015, "completion": 0.0000006},
|
||
("openai", "gpt-4-turbo"): {"prompt": 0.00001, "completion": 0.00003},
|
||
# DeepSeek
|
||
("deepseek", "deepseek-chat"): {"prompt": 0.00000014, "completion": 0.00000028},
|
||
("deepseek", "deepseek-coder"): {"prompt": 0.00000014, "completion": 0.00000028},
|
||
}
|
||
|
||
|
||
class LLMMetricsWrapper:
|
||
"""LLM调用指标包装器"""
|
||
|
||
def __init__(self, provider: str, model: str):
|
||
self.provider = provider
|
||
self.model = model
|
||
|
||
def record_request(
|
||
self,
|
||
status: str,
|
||
duration: float,
|
||
prompt_tokens: Optional[int] = None,
|
||
completion_tokens: Optional[int] = None,
|
||
):
|
||
"""记录LLM请求指标"""
|
||
# 记录请求数和耗时
|
||
LLM_REQUESTS_TOTAL.labels(
|
||
provider=self.provider,
|
||
model=self.model,
|
||
status=status
|
||
).inc()
|
||
|
||
LLM_REQUEST_DURATION_SECONDS.labels(
|
||
provider=self.provider,
|
||
model=self.model
|
||
).observe(duration)
|
||
|
||
# 记录Token消耗
|
||
if prompt_tokens is not None:
|
||
LLM_TOKENS_TOTAL.labels(
|
||
provider=self.provider,
|
||
model=self.model,
|
||
token_type="prompt"
|
||
).inc(prompt_tokens)
|
||
|
||
if completion_tokens is not None:
|
||
LLM_TOKENS_TOTAL.labels(
|
||
provider=self.provider,
|
||
model=self.model,
|
||
token_type="completion"
|
||
).inc(completion_tokens)
|
||
|
||
# 估算成本
|
||
cost = self._estimate_cost(prompt_tokens, completion_tokens)
|
||
if cost > 0:
|
||
LLM_COST_ESTIMATED.labels(
|
||
provider=self.provider,
|
||
model=self.model
|
||
).inc(cost)
|
||
|
||
def _estimate_cost(
|
||
self,
|
||
prompt_tokens: Optional[int],
|
||
completion_tokens: Optional[int]
|
||
) -> float:
|
||
"""估算请求成本"""
|
||
cost_info = LLM_COST_PER_TOKEN.get((self.provider, self.model))
|
||
if not cost_info:
|
||
return 0.0
|
||
|
||
total = 0.0
|
||
if prompt_tokens:
|
||
total += prompt_tokens * cost_info["prompt"]
|
||
if completion_tokens:
|
||
total += completion_tokens * cost_info["completion"]
|
||
|
||
return total
|
||
|
||
|
||
# 全局LLM指标记录器
|
||
_llm_metrics_cache: dict[str, LLMMetricsWrapper] = {}
|
||
|
||
|
||
def get_llm_metrics(provider: str, model: str) -> LLMMetricsWrapper:
|
||
"""获取LLM指标包装器(带缓存)"""
|
||
key = f"{provider}:{model}"
|
||
if key not in _llm_metrics_cache:
|
||
_llm_metrics_cache[key] = LLMMetricsWrapper(provider, model)
|
||
return _llm_metrics_cache[key]
|