187 lines
5.8 KiB
Python
187 lines
5.8 KiB
Python
"""详细健康检查服务"""
|
|
import time
|
|
from dataclasses import dataclass
|
|
from typing import Optional
|
|
|
|
import redis.asyncio as aioredis
|
|
from sqlalchemy import text
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
|
|
@dataclass
|
|
class HealthCheckResult:
|
|
"""健康检查结果"""
|
|
name: str
|
|
healthy: bool
|
|
latency_ms: Optional[float] = None
|
|
message: Optional[str] = None
|
|
details: Optional[dict] = None
|
|
|
|
|
|
class HealthChecker:
|
|
"""健康检查服务"""
|
|
|
|
def __init__(self, db: AsyncSession, redis_url: str):
|
|
self.db = db
|
|
self.redis_url = redis_url
|
|
|
|
async def check_database(self) -> HealthCheckResult:
|
|
"""检查数据库连接"""
|
|
start = time.perf_counter()
|
|
try:
|
|
await self.db.execute(text("SELECT 1"))
|
|
latency = (time.perf_counter() - start) * 1000
|
|
|
|
return HealthCheckResult(
|
|
name="database",
|
|
healthy=True,
|
|
latency_ms=round(latency, 2),
|
|
message="Connection OK",
|
|
)
|
|
except Exception as e:
|
|
latency = (time.perf_counter() - start) * 1000
|
|
return HealthCheckResult(
|
|
name="database",
|
|
healthy=False,
|
|
latency_ms=round(latency, 2),
|
|
message=f"Connection failed: {str(e)}",
|
|
)
|
|
|
|
async def check_redis(self) -> HealthCheckResult:
|
|
"""检查Redis连接"""
|
|
start = time.perf_counter()
|
|
try:
|
|
redis = aioredis.from_url(
|
|
self.redis_url,
|
|
socket_connect_timeout=2,
|
|
)
|
|
await redis.ping()
|
|
await redis.aclose()
|
|
|
|
latency = (time.perf_counter() - start) * 1000
|
|
return HealthCheckResult(
|
|
name="redis",
|
|
healthy=True,
|
|
latency_ms=round(latency, 2),
|
|
message="Connection OK",
|
|
)
|
|
except Exception as e:
|
|
latency = (time.perf_counter() - start) * 1000
|
|
return HealthCheckResult(
|
|
name="redis",
|
|
healthy=False,
|
|
latency_ms=round(latency, 2),
|
|
message=f"Connection failed: {str(e)}",
|
|
)
|
|
|
|
async def check_llm_providers(self) -> HealthCheckResult:
|
|
"""检查LLM服务提供商"""
|
|
from app.config import settings
|
|
from app.services.llm.factory import LLMFactory
|
|
|
|
providers = {}
|
|
all_healthy = True
|
|
|
|
# 检查默认provider
|
|
try:
|
|
provider_name = getattr(settings, 'DEFAULT_LLM_PROVIDER', 'openai')
|
|
provider = LLMFactory.create(provider_name)
|
|
providers[provider_name] = {
|
|
"healthy": True,
|
|
"available": True,
|
|
}
|
|
except Exception as e:
|
|
providers[getattr(settings, 'DEFAULT_LLM_PROVIDER', 'openai')] = {
|
|
"healthy": False,
|
|
"error": str(e),
|
|
}
|
|
all_healthy = False
|
|
|
|
# 检查所有已注册的provider
|
|
for name in LLMFactory.list_providers():
|
|
if name not in providers:
|
|
try:
|
|
provider = LLMFactory.create(name)
|
|
providers[name] = {
|
|
"healthy": True,
|
|
"available": True,
|
|
}
|
|
except Exception as e:
|
|
providers[name] = {
|
|
"healthy": False,
|
|
"error": str(e),
|
|
}
|
|
all_healthy = False
|
|
|
|
return HealthCheckResult(
|
|
name="llm_providers",
|
|
healthy=all_healthy,
|
|
message="All providers healthy" if all_healthy else "Some providers unhealthy",
|
|
details={"providers": providers},
|
|
)
|
|
|
|
async def check_storage(self) -> HealthCheckResult:
|
|
"""检查存储(本地文件系统)"""
|
|
import os
|
|
|
|
storage_path = "/data/documents"
|
|
|
|
try:
|
|
if os.path.exists(storage_path):
|
|
# 检查读写权限
|
|
test_file = os.path.join(storage_path, ".health_check")
|
|
with open(test_file, "w") as f:
|
|
f.write("ok")
|
|
os.remove(test_file)
|
|
|
|
return HealthCheckResult(
|
|
name="storage",
|
|
healthy=True,
|
|
message=f"Storage path {storage_path} is writable",
|
|
details={"path": storage_path},
|
|
)
|
|
else:
|
|
return HealthCheckResult(
|
|
name="storage",
|
|
healthy=True,
|
|
message=f"Storage path {storage_path} does not exist (will be created)",
|
|
details={"path": storage_path, "created": True},
|
|
)
|
|
except Exception as e:
|
|
return HealthCheckResult(
|
|
name="storage",
|
|
healthy=False,
|
|
message=f"Storage check failed: {str(e)}",
|
|
)
|
|
|
|
async def check_all(self) -> dict:
|
|
"""执行所有健康检查"""
|
|
import asyncio
|
|
|
|
# 并行执行所有检查
|
|
checks = [
|
|
self.check_database(),
|
|
self.check_redis(),
|
|
self.check_llm_providers(),
|
|
self.check_storage(),
|
|
]
|
|
|
|
results = await asyncio.gather(*checks)
|
|
|
|
# 汇总结果
|
|
all_healthy = all(r.healthy for r in results)
|
|
|
|
return {
|
|
"status": "healthy" if all_healthy else "degraded",
|
|
"timestamp": time.time(),
|
|
"checks": {
|
|
r.name: {
|
|
"healthy": r.healthy,
|
|
"latency_ms": r.latency_ms,
|
|
"message": r.message,
|
|
"details": r.details,
|
|
}
|
|
for r in results
|
|
},
|
|
}
|