399 lines
13 KiB
Python
399 lines
13 KiB
Python
"""
|
||
情感分析服务 - 使用DeepSeek API分析AI回答中对品牌的情感倾向
|
||
"""
|
||
import asyncio
|
||
import hashlib
|
||
import json
|
||
import logging
|
||
import time
|
||
from typing import Optional
|
||
|
||
from app.config import settings
|
||
from app.utils.json_extractor import extract_json
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# 情感分析Prompt模板
|
||
SENTIMENT_ANALYSIS_PROMPT = """分析以下AI回答中对品牌"{brand_name}"的情感倾向。
|
||
|
||
请严格返回以下JSON格式,不要包含其他内容:
|
||
{{
|
||
"sentiment": "positive" | "neutral" | "negative",
|
||
"confidence": 0.0到1.0之间的浮点数,
|
||
"key_phrases": ["关键情感短语1", "关键情感短语2"],
|
||
"reasoning": "判断理由的简要说明"
|
||
}}
|
||
|
||
判断标准:
|
||
- positive: AI明确推荐、赞扬、肯定该品牌,或在对比中明显偏向该品牌
|
||
- neutral: AI客观提及该品牌,无明确褒贬,或褒贬平衡
|
||
- negative: AI批评、否定、不推荐该品牌,或在对比中明显贬低该品牌
|
||
- confidence: 判断的置信度,0.0表示完全不确定,1.0表示完全确定
|
||
|
||
AI回答内容:
|
||
{content}"""
|
||
|
||
|
||
class SentimentResult:
|
||
"""情感分析结果"""
|
||
|
||
def __init__(
|
||
self,
|
||
sentiment: str,
|
||
confidence: float,
|
||
key_phrases: list[str],
|
||
reasoning: str,
|
||
):
|
||
self.sentiment = sentiment # positive / neutral / negative
|
||
self.confidence = confidence # 0.0 - 1.0
|
||
self.key_phrases = key_phrases # 关键情感短语
|
||
self.reasoning = reasoning # 判断理由
|
||
|
||
def to_dict(self) -> dict:
|
||
return {
|
||
"sentiment": self.sentiment,
|
||
"confidence": self.confidence,
|
||
"key_phrases": self.key_phrases,
|
||
"reasoning": self.reasoning,
|
||
}
|
||
|
||
|
||
class SentimentCache:
|
||
"""
|
||
情感分析结果缓存
|
||
|
||
使用内存缓存,相同内容不重复调用LLM。
|
||
缓存键为 brand_name + content 的哈希值。
|
||
"""
|
||
|
||
def __init__(self, max_size: int = 1000, ttl_seconds: int = 86400):
|
||
self._cache: dict[str, tuple[SentimentResult, float]] = {}
|
||
self._max_size = max_size
|
||
self._ttl_seconds = ttl_seconds
|
||
|
||
def _make_key(self, brand_name: str, content: str) -> str:
|
||
"""生成缓存键"""
|
||
raw = f"{brand_name}::{content}"
|
||
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
|
||
|
||
def get(self, brand_name: str, content: str) -> Optional[SentimentResult]:
|
||
"""获取缓存结果"""
|
||
key = self._make_key(brand_name, content)
|
||
entry = self._cache.get(key)
|
||
if entry is None:
|
||
return None
|
||
result, timestamp = entry
|
||
if time.time() - timestamp > self._ttl_seconds:
|
||
del self._cache[key]
|
||
return None
|
||
return result
|
||
|
||
def set(self, brand_name: str, content: str, result: SentimentResult) -> None:
|
||
"""设置缓存结果"""
|
||
# 清理过期缓存
|
||
if len(self._cache) >= self._max_size:
|
||
self._evict_expired()
|
||
if len(self._cache) >= self._max_size:
|
||
# 按时间排序,删除最旧的10%
|
||
sorted_items = sorted(
|
||
self._cache.items(), key=lambda x: x[1][1]
|
||
)
|
||
evict_count = max(1, len(sorted_items) // 10)
|
||
for k, _ in sorted_items[:evict_count]:
|
||
del self._cache[k]
|
||
|
||
key = self._make_key(brand_name, content)
|
||
self._cache[key] = (result, time.time())
|
||
|
||
def _evict_expired(self) -> None:
|
||
"""清理过期缓存"""
|
||
now = time.time()
|
||
expired_keys = [
|
||
k for k, (_, ts) in self._cache.items()
|
||
if now - ts > self._ttl_seconds
|
||
]
|
||
for k in expired_keys:
|
||
del self._cache[k]
|
||
|
||
def clear(self) -> None:
|
||
"""清空缓存"""
|
||
self._cache.clear()
|
||
|
||
|
||
class SentimentAnalysisService:
|
||
"""
|
||
情感分析服务
|
||
|
||
使用DeepSeek API分析AI回答中对品牌的情感倾向。
|
||
支持:
|
||
- 缓存:相同内容不重复调用LLM
|
||
- 重试:API调用失败时自动重试
|
||
- 开关:通过ENABLE_LLM环境变量控制
|
||
- 降级:LLM不可用时使用基于规则的分析
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
api_key: Optional[str] = None,
|
||
max_retries: int = 3,
|
||
retry_delay: float = 1.0,
|
||
):
|
||
self.api_key = api_key or settings.DEEPSEEK_API_KEY
|
||
self.max_retries = max_retries
|
||
self.retry_delay = retry_delay
|
||
self._client = None
|
||
self._cache = SentimentCache()
|
||
|
||
@property
|
||
def client(self):
|
||
"""延迟初始化DeepSeek客户端"""
|
||
if self._client is None:
|
||
try:
|
||
from openai import OpenAI
|
||
|
||
self._client = OpenAI(
|
||
api_key=self.api_key,
|
||
base_url="https://api.deepseek.com",
|
||
)
|
||
except ImportError:
|
||
raise RuntimeError(
|
||
"请安装openai库: pip install openai"
|
||
)
|
||
return self._client
|
||
|
||
async def analyze(
|
||
self,
|
||
brand_name: str,
|
||
content: str,
|
||
) -> SentimentResult:
|
||
"""
|
||
分析AI回答中对品牌的情感倾向
|
||
|
||
Args:
|
||
brand_name: 品牌名称
|
||
content: AI回答内容
|
||
|
||
Returns:
|
||
SentimentResult: 情感分析结果
|
||
"""
|
||
if not content or not content.strip():
|
||
return SentimentResult(
|
||
sentiment="neutral",
|
||
confidence=0.0,
|
||
key_phrases=[],
|
||
reasoning="内容为空,无法分析",
|
||
)
|
||
|
||
# 检查缓存
|
||
cached = self._cache.get(brand_name, content)
|
||
if cached is not None:
|
||
logger.debug(f"情感分析命中缓存: brand={brand_name}")
|
||
return cached
|
||
|
||
# 检查LLM开关
|
||
if not settings.ENABLE_LLM or not self.api_key:
|
||
logger.info(
|
||
f"LLM情感分析未启用 (ENABLE_LLM={settings.ENABLE_LLM}, "
|
||
f"has_api_key={bool(self.api_key)}),使用规则分析"
|
||
)
|
||
result = self._rule_based_analysis(brand_name, content)
|
||
self._cache.set(brand_name, content, result)
|
||
return result
|
||
|
||
# 调用DeepSeek API
|
||
result = await self._call_with_retry(brand_name, content)
|
||
self._cache.set(brand_name, content, result)
|
||
return result
|
||
|
||
async def _call_with_retry(
|
||
self,
|
||
brand_name: str,
|
||
content: str,
|
||
) -> SentimentResult:
|
||
"""带重试的API调用"""
|
||
last_error = None
|
||
for attempt in range(self.max_retries):
|
||
try:
|
||
result = await self._call_deepseek(brand_name, content)
|
||
return result
|
||
except Exception as e:
|
||
last_error = e
|
||
logger.warning(
|
||
f"情感分析API调用失败 "
|
||
f"(尝试 {attempt + 1}/{self.max_retries}): {e}"
|
||
)
|
||
if attempt < self.max_retries - 1:
|
||
# 指数退避
|
||
delay = self.retry_delay * (2 ** attempt)
|
||
await asyncio.sleep(delay)
|
||
|
||
# 所有重试失败,降级到规则分析
|
||
logger.error(
|
||
f"情感分析API调用失败,已重试{self.max_retries}次,"
|
||
f"降级到规则分析: {last_error}"
|
||
)
|
||
return self._rule_based_analysis(brand_name, content)
|
||
|
||
async def _call_deepseek(
|
||
self,
|
||
brand_name: str,
|
||
content: str,
|
||
) -> SentimentResult:
|
||
"""调用DeepSeek API进行情感分析"""
|
||
prompt = SENTIMENT_ANALYSIS_PROMPT.format(
|
||
brand_name=brand_name,
|
||
content=content[:3000], # 限制内容长度,避免token过多
|
||
)
|
||
|
||
# 在线程池中执行同步的API调用
|
||
response_dict = await asyncio.to_thread(
|
||
self._sync_call_deepseek, prompt
|
||
)
|
||
|
||
return self._parse_response(response_dict)
|
||
|
||
def _sync_call_deepseek(self, prompt: str) -> dict:
|
||
"""同步调用DeepSeek API"""
|
||
response = self.client.chat.completions.create(
|
||
model="deepseek-chat",
|
||
messages=[
|
||
{
|
||
"role": "system",
|
||
"content": (
|
||
"你是一个专业的品牌情感分析专家。"
|
||
"你的任务是分析AI回答中对特定品牌的情感倾向。"
|
||
"请严格按照要求的JSON格式返回结果。"
|
||
),
|
||
},
|
||
{"role": "user", "content": prompt},
|
||
],
|
||
temperature=0.1,
|
||
max_tokens=500,
|
||
)
|
||
|
||
content = response.choices[0].message.content
|
||
if not content:
|
||
raise RuntimeError("API返回空响应")
|
||
|
||
# 提取JSON
|
||
try:
|
||
json_str = extract_json(content)
|
||
except ValueError as e:
|
||
raise RuntimeError(str(e)) from e
|
||
return json.loads(json_str)
|
||
|
||
def _parse_response(self, response: dict) -> SentimentResult:
|
||
"""解析API响应"""
|
||
sentiment = str(response.get("sentiment", "neutral")).lower()
|
||
if sentiment not in ("positive", "neutral", "negative"):
|
||
sentiment = "neutral"
|
||
|
||
confidence = float(response.get("confidence", 0.5))
|
||
confidence = max(0.0, min(1.0, confidence))
|
||
|
||
key_phrases = response.get("key_phrases", [])
|
||
if not isinstance(key_phrases, list):
|
||
key_phrases = []
|
||
key_phrases = [str(p) for p in key_phrases[:10]] # 最多10个短语
|
||
|
||
reasoning = str(response.get("reasoning", ""))
|
||
|
||
return SentimentResult(
|
||
sentiment=sentiment,
|
||
confidence=confidence,
|
||
key_phrases=key_phrases,
|
||
reasoning=reasoning,
|
||
)
|
||
|
||
def _rule_based_analysis(
|
||
self,
|
||
brand_name: str,
|
||
content: str,
|
||
) -> SentimentResult:
|
||
"""
|
||
基于规则的情感分析(降级方案)
|
||
|
||
当LLM不可用时使用关键词匹配进行简单分析
|
||
"""
|
||
# 正面关键词
|
||
positive_keywords = [
|
||
"推荐", "领先", "优秀", "首选", "最佳", "出色", "卓越",
|
||
"优势", "创新", "专业", "值得", "信赖", "优质", "好评",
|
||
"突出", "领先地位", "行业标杆", "第一", "top", "best",
|
||
"领先者", "佼佼者", "知名", "著名", "口碑好",
|
||
]
|
||
# 负面关键词
|
||
negative_keywords = [
|
||
"不足", "缺陷", "问题", "较差", "落后", "劣势", "不推荐",
|
||
"差评", "投诉", "风险", "隐患", "短板", "弱点", "不足之处",
|
||
"有待改善", "不及", "逊色", "劣势", "负面",
|
||
]
|
||
|
||
content_lower = content.lower()
|
||
positive_matches = [kw for kw in positive_keywords if kw in content_lower]
|
||
negative_matches = [kw for kw in negative_keywords if kw in content_lower]
|
||
|
||
positive_count = len(positive_matches)
|
||
negative_count = len(negative_matches)
|
||
|
||
if positive_count > negative_count:
|
||
sentiment = "positive"
|
||
confidence = min(0.9, 0.5 + positive_count * 0.1)
|
||
key_phrases = positive_matches[:5]
|
||
reasoning = f"检测到{positive_count}个正面关键词"
|
||
elif negative_count > positive_count:
|
||
sentiment = "negative"
|
||
confidence = min(0.9, 0.5 + negative_count * 0.1)
|
||
key_phrases = negative_matches[:5]
|
||
reasoning = f"检测到{negative_count}个负面关键词"
|
||
else:
|
||
sentiment = "neutral"
|
||
confidence = 0.5
|
||
key_phrases = positive_matches[:3] + negative_matches[:3]
|
||
if positive_count == 0 and negative_count == 0:
|
||
reasoning = "未检测到明显情感倾向关键词"
|
||
else:
|
||
reasoning = f"正面和负面关键词数量相当({positive_count}vs{negative_count})"
|
||
|
||
return SentimentResult(
|
||
sentiment=sentiment,
|
||
confidence=confidence,
|
||
key_phrases=key_phrases,
|
||
reasoning=reasoning,
|
||
)
|
||
|
||
async def batch_analyze(
|
||
self,
|
||
items: list[tuple[str, str]],
|
||
) -> list[SentimentResult]:
|
||
"""
|
||
批量情感分析
|
||
|
||
Args:
|
||
items: [(brand_name, content), ...] 列表
|
||
|
||
Returns:
|
||
SentimentResult列表,顺序与输入一致
|
||
"""
|
||
results = []
|
||
for brand_name, content in items:
|
||
result = await self.analyze(brand_name, content)
|
||
results.append(result)
|
||
return results
|
||
|
||
def clear_cache(self) -> None:
|
||
"""清空缓存"""
|
||
self._cache.clear()
|
||
|
||
|
||
# 全局单例
|
||
_sentiment_service: Optional[SentimentAnalysisService] = None
|
||
|
||
|
||
def get_sentiment_service() -> SentimentAnalysisService:
|
||
"""获取情感分析服务单例"""
|
||
global _sentiment_service
|
||
if _sentiment_service is None:
|
||
_sentiment_service = SentimentAnalysisService()
|
||
return _sentiment_service
|