""" 情感分析服务 - 使用DeepSeek API分析AI回答中对品牌的情感倾向 """ import asyncio import hashlib import json import logging import re import time from typing import Optional from app.config import settings logger = logging.getLogger(__name__) # 情感分析Prompt模板 SENTIMENT_ANALYSIS_PROMPT = """分析以下AI回答中对品牌"{brand_name}"的情感倾向。 请严格返回以下JSON格式,不要包含其他内容: {{ "sentiment": "positive" | "neutral" | "negative", "confidence": 0.0到1.0之间的浮点数, "key_phrases": ["关键情感短语1", "关键情感短语2"], "reasoning": "判断理由的简要说明" }} 判断标准: - positive: AI明确推荐、赞扬、肯定该品牌,或在对比中明显偏向该品牌 - neutral: AI客观提及该品牌,无明确褒贬,或褒贬平衡 - negative: AI批评、否定、不推荐该品牌,或在对比中明显贬低该品牌 - confidence: 判断的置信度,0.0表示完全不确定,1.0表示完全确定 AI回答内容: {content}""" class SentimentResult: """情感分析结果""" def __init__( self, sentiment: str, confidence: float, key_phrases: list[str], reasoning: str, ): self.sentiment = sentiment # positive / neutral / negative self.confidence = confidence # 0.0 - 1.0 self.key_phrases = key_phrases # 关键情感短语 self.reasoning = reasoning # 判断理由 def to_dict(self) -> dict: return { "sentiment": self.sentiment, "confidence": self.confidence, "key_phrases": self.key_phrases, "reasoning": self.reasoning, } class SentimentCache: """ 情感分析结果缓存 使用内存缓存,相同内容不重复调用LLM。 缓存键为 brand_name + content 的哈希值。 """ def __init__(self, max_size: int = 1000, ttl_seconds: int = 86400): self._cache: dict[str, tuple[SentimentResult, float]] = {} self._max_size = max_size self._ttl_seconds = ttl_seconds def _make_key(self, brand_name: str, content: str) -> str: """生成缓存键""" raw = f"{brand_name}::{content}" return hashlib.sha256(raw.encode("utf-8")).hexdigest() def get(self, brand_name: str, content: str) -> Optional[SentimentResult]: """获取缓存结果""" key = self._make_key(brand_name, content) entry = self._cache.get(key) if entry is None: return None result, timestamp = entry if time.time() - timestamp > self._ttl_seconds: del self._cache[key] return None return result def set(self, brand_name: str, content: str, result: SentimentResult) -> None: """设置缓存结果""" # 清理过期缓存 if len(self._cache) >= self._max_size: self._evict_expired() if len(self._cache) >= self._max_size: # 按时间排序,删除最旧的10% sorted_items = sorted( self._cache.items(), key=lambda x: x[1][1] ) evict_count = max(1, len(sorted_items) // 10) for k, _ in sorted_items[:evict_count]: del self._cache[k] key = self._make_key(brand_name, content) self._cache[key] = (result, time.time()) def _evict_expired(self) -> None: """清理过期缓存""" now = time.time() expired_keys = [ k for k, (_, ts) in self._cache.items() if now - ts > self._ttl_seconds ] for k in expired_keys: del self._cache[k] def clear(self) -> None: """清空缓存""" self._cache.clear() class SentimentAnalysisService: """ 情感分析服务 使用DeepSeek API分析AI回答中对品牌的情感倾向。 支持: - 缓存:相同内容不重复调用LLM - 重试:API调用失败时自动重试 - 开关:通过ENABLE_LLM环境变量控制 - 降级:LLM不可用时使用基于规则的分析 """ def __init__( self, api_key: Optional[str] = None, max_retries: int = 3, retry_delay: float = 1.0, ): self.api_key = api_key or settings.DEEPSEEK_API_KEY self.max_retries = max_retries self.retry_delay = retry_delay self._client = None self._cache = SentimentCache() @property def client(self): """延迟初始化DeepSeek客户端""" if self._client is None: try: from openai import OpenAI self._client = OpenAI( api_key=self.api_key, base_url="https://api.deepseek.com", ) except ImportError: raise RuntimeError( "请安装openai库: pip install openai" ) return self._client async def analyze( self, brand_name: str, content: str, ) -> SentimentResult: """ 分析AI回答中对品牌的情感倾向 Args: brand_name: 品牌名称 content: AI回答内容 Returns: SentimentResult: 情感分析结果 """ if not content or not content.strip(): return SentimentResult( sentiment="neutral", confidence=0.0, key_phrases=[], reasoning="内容为空,无法分析", ) # 检查缓存 cached = self._cache.get(brand_name, content) if cached is not None: logger.debug(f"情感分析命中缓存: brand={brand_name}") return cached # 检查LLM开关 if not settings.ENABLE_LLM or not self.api_key: logger.info( f"LLM情感分析未启用 (ENABLE_LLM={settings.ENABLE_LLM}, " f"has_api_key={bool(self.api_key)}),使用规则分析" ) result = self._rule_based_analysis(brand_name, content) self._cache.set(brand_name, content, result) return result # 调用DeepSeek API result = await self._call_with_retry(brand_name, content) self._cache.set(brand_name, content, result) return result async def _call_with_retry( self, brand_name: str, content: str, ) -> SentimentResult: """带重试的API调用""" last_error = None for attempt in range(self.max_retries): try: result = await self._call_deepseek(brand_name, content) return result except Exception as e: last_error = e logger.warning( f"情感分析API调用失败 " f"(尝试 {attempt + 1}/{self.max_retries}): {e}" ) if attempt < self.max_retries - 1: # 指数退避 delay = self.retry_delay * (2 ** attempt) await asyncio.sleep(delay) # 所有重试失败,降级到规则分析 logger.error( f"情感分析API调用失败,已重试{self.max_retries}次," f"降级到规则分析: {last_error}" ) return self._rule_based_analysis(brand_name, content) async def _call_deepseek( self, brand_name: str, content: str, ) -> SentimentResult: """调用DeepSeek API进行情感分析""" prompt = SENTIMENT_ANALYSIS_PROMPT.format( brand_name=brand_name, content=content[:3000], # 限制内容长度,避免token过多 ) # 在线程池中执行同步的API调用 response_dict = await asyncio.to_thread( self._sync_call_deepseek, prompt ) return self._parse_response(response_dict) def _sync_call_deepseek(self, prompt: str) -> dict: """同步调用DeepSeek API""" response = self.client.chat.completions.create( model="deepseek-chat", messages=[ { "role": "system", "content": ( "你是一个专业的品牌情感分析专家。" "你的任务是分析AI回答中对特定品牌的情感倾向。" "请严格按照要求的JSON格式返回结果。" ), }, {"role": "user", "content": prompt}, ], temperature=0.1, max_tokens=500, ) content = response.choices[0].message.content if not content: raise RuntimeError("API返回空响应") # 提取JSON json_str = self._extract_json(content) return json.loads(json_str) def _extract_json(self, text: str) -> str: """从文本中提取JSON""" # 尝试直接解析 try: json.loads(text) return text except json.JSONDecodeError: pass # 尝试从代码块中提取 json_pattern = r"```(?:json)?\s*([\s\S]*?)\s*```" match = re.search(json_pattern, text) if match: return match.group(1).strip() # 尝试找到第一个{到最后一个}之间的内容 first_brace = text.find("{") last_brace = text.rfind("}") if first_brace != -1 and last_brace != -1 and last_brace > first_brace: return text[first_brace : last_brace + 1] raise RuntimeError(f"无法从响应中提取JSON: {text[:200]}") def _parse_response(self, response: dict) -> SentimentResult: """解析API响应""" sentiment = str(response.get("sentiment", "neutral")).lower() if sentiment not in ("positive", "neutral", "negative"): sentiment = "neutral" confidence = float(response.get("confidence", 0.5)) confidence = max(0.0, min(1.0, confidence)) key_phrases = response.get("key_phrases", []) if not isinstance(key_phrases, list): key_phrases = [] key_phrases = [str(p) for p in key_phrases[:10]] # 最多10个短语 reasoning = str(response.get("reasoning", "")) return SentimentResult( sentiment=sentiment, confidence=confidence, key_phrases=key_phrases, reasoning=reasoning, ) def _rule_based_analysis( self, brand_name: str, content: str, ) -> SentimentResult: """ 基于规则的情感分析(降级方案) 当LLM不可用时使用关键词匹配进行简单分析 """ # 正面关键词 positive_keywords = [ "推荐", "领先", "优秀", "首选", "最佳", "出色", "卓越", "优势", "创新", "专业", "值得", "信赖", "优质", "好评", "突出", "领先地位", "行业标杆", "第一", "top", "best", "领先者", "佼佼者", "知名", "著名", "口碑好", ] # 负面关键词 negative_keywords = [ "不足", "缺陷", "问题", "较差", "落后", "劣势", "不推荐", "差评", "投诉", "风险", "隐患", "短板", "弱点", "不足之处", "有待改善", "不及", "逊色", "劣势", "负面", ] content_lower = content.lower() positive_matches = [kw for kw in positive_keywords if kw in content_lower] negative_matches = [kw for kw in negative_keywords if kw in content_lower] positive_count = len(positive_matches) negative_count = len(negative_matches) if positive_count > negative_count: sentiment = "positive" confidence = min(0.9, 0.5 + positive_count * 0.1) key_phrases = positive_matches[:5] reasoning = f"检测到{positive_count}个正面关键词" elif negative_count > positive_count: sentiment = "negative" confidence = min(0.9, 0.5 + negative_count * 0.1) key_phrases = negative_matches[:5] reasoning = f"检测到{negative_count}个负面关键词" else: sentiment = "neutral" confidence = 0.5 key_phrases = positive_matches[:3] + negative_matches[:3] if positive_count == 0 and negative_count == 0: reasoning = "未检测到明显情感倾向关键词" else: reasoning = f"正面和负面关键词数量相当({positive_count}vs{negative_count})" return SentimentResult( sentiment=sentiment, confidence=confidence, key_phrases=key_phrases, reasoning=reasoning, ) async def batch_analyze( self, items: list[tuple[str, str]], ) -> list[SentimentResult]: """ 批量情感分析 Args: items: [(brand_name, content), ...] 列表 Returns: SentimentResult列表,顺序与输入一致 """ results = [] for brand_name, content in items: result = await self.analyze(brand_name, content) results.append(result) return results def clear_cache(self) -> None: """清空缓存""" self._cache.clear() # 全局单例 _sentiment_service: Optional[SentimentAnalysisService] = None def get_sentiment_service() -> SentimentAnalysisService: """获取情感分析服务单例""" global _sentiment_service if _sentiment_service is None: _sentiment_service = SentimentAnalysisService() return _sentiment_service