"""详细健康检查服务""" import time from dataclasses import dataclass from typing import Optional import redis.asyncio as aioredis from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncSession @dataclass class HealthCheckResult: """健康检查结果""" name: str healthy: bool latency_ms: Optional[float] = None message: Optional[str] = None details: Optional[dict] = None class HealthChecker: """健康检查服务""" def __init__(self, db: AsyncSession, redis_url: str): self.db = db self.redis_url = redis_url async def check_database(self) -> HealthCheckResult: """检查数据库连接""" start = time.perf_counter() try: await self.db.execute(text("SELECT 1")) latency = (time.perf_counter() - start) * 1000 return HealthCheckResult( name="database", healthy=True, latency_ms=round(latency, 2), message="Connection OK", ) except Exception as e: latency = (time.perf_counter() - start) * 1000 return HealthCheckResult( name="database", healthy=False, latency_ms=round(latency, 2), message=f"Connection failed: {str(e)}", ) async def check_redis(self) -> HealthCheckResult: """检查Redis连接""" start = time.perf_counter() try: from app.core.redis import get_redis redis = await get_redis() await redis.ping() latency = (time.perf_counter() - start) * 1000 return HealthCheckResult( name="redis", healthy=True, latency_ms=round(latency, 2), message="Connection OK", ) except Exception as e: latency = (time.perf_counter() - start) * 1000 return HealthCheckResult( name="redis", healthy=False, latency_ms=round(latency, 2), message=f"Connection failed: {str(e)}", ) async def check_llm_providers(self) -> HealthCheckResult: """检查LLM服务提供商""" from app.config import settings from app.services.llm.factory import LLMFactory providers = {} all_healthy = True # 检查默认provider try: provider_name = getattr(settings, 'DEFAULT_LLM_PROVIDER', 'openai') provider = LLMFactory.create(provider_name) providers[provider_name] = { "healthy": True, "available": True, } except Exception as e: providers[getattr(settings, 'DEFAULT_LLM_PROVIDER', 'openai')] = { "healthy": False, "error": str(e), } all_healthy = False # 检查所有已注册的provider for name in LLMFactory.list_providers(): if name not in providers: try: provider = LLMFactory.create(name) providers[name] = { "healthy": True, "available": True, } except Exception as e: providers[name] = { "healthy": False, "error": str(e), } all_healthy = False return HealthCheckResult( name="llm_providers", healthy=all_healthy, message="All providers healthy" if all_healthy else "Some providers unhealthy", details={"providers": providers}, ) async def check_storage(self) -> HealthCheckResult: """检查存储(本地文件系统)""" import os storage_path = "/data/documents" start = time.perf_counter() try: if os.path.exists(storage_path): # 检查读写权限 test_file = os.path.join(storage_path, ".health_check") with open(test_file, "w") as f: f.write("ok") os.remove(test_file) latency = (time.perf_counter() - start) * 1000 return HealthCheckResult( name="storage", healthy=True, latency_ms=round(latency, 2), message=f"Storage path {storage_path} is writable", details={"path": storage_path}, ) else: latency = (time.perf_counter() - start) * 1000 return HealthCheckResult( name="storage", healthy=True, latency_ms=round(latency, 2), message=f"Storage path {storage_path} does not exist (will be created)", details={"path": storage_path, "created": True}, ) except Exception as e: latency = (time.perf_counter() - start) * 1000 return HealthCheckResult( name="storage", healthy=False, latency_ms=round(latency, 2), message=f"Storage check failed: {str(e)}", ) async def check_all(self) -> dict: """执行所有健康检查""" import asyncio # 并行执行所有检查 checks = [ self.check_database(), self.check_redis(), self.check_llm_providers(), self.check_storage(), ] results = await asyncio.gather(*checks) # 汇总结果 all_healthy = all(r.healthy for r in results) return { "status": "healthy" if all_healthy else "degraded", "timestamp": time.time(), "checks": { r.name: { "healthy": r.healthy, "latency_ms": r.latency_ms, "message": r.message, "details": r.details, } for r in results }, }