feat(gateway): empty response auto-retry with fallback model chain

This commit is contained in:
chiguyong 2026-06-16 08:07:21 +08:00
parent 9caf332e9e
commit 16ac592855
4 changed files with 58 additions and 10 deletions

View File

@ -598,10 +598,16 @@ class ReActEngine:
# 兜底:确保 output 永远不为空字符串
if not output or not output.strip():
output = (
"抱歉,我暂时无法生成有效的回复。请尝试换一种方式描述你的需求,"
"或者稍后再试。"
)
if step >= self._max_steps:
output = (
f"已达到最大推理步数({self._max_steps}步),但仍未得到完整结论。"
"建议1) 简化问题后重试2) 将任务拆分为更小的步骤。"
)
else:
output = (
"模型未返回有效内容可能原因1) 服务暂时过载2) 请求内容超出模型处理范围。"
"建议1) 稍后重试2) 简化或缩短输入内容。"
)
trace_outcome = "empty_fallback"
# 结束轨迹记录
@ -1127,10 +1133,16 @@ class ReActEngine:
# 兜底:确保 output 永远不为空字符串
if not output or not output.strip():
output = (
"抱歉,我暂时无法生成有效的回复。请尝试换一种方式描述你的需求,"
"或者稍后再试。"
)
if step >= self._max_steps:
output = (
f"已达到最大推理步数({self._max_steps}步),但仍未得到完整结论。"
"建议1) 简化问题后重试2) 将任务拆分为更小的步骤。"
)
else:
output = (
"模型未返回有效内容可能原因1) 服务暂时过载2) 请求内容超出模型处理范围。"
"建议1) 稍后重试2) 简化或缩短输入内容。"
)
trace_outcome = "empty_fallback"
yield ReActEvent(
event_type="final_answer",

View File

@ -169,6 +169,7 @@ class LLMGateway:
# ── Normal provider call ──
models_to_try = self._get_models_to_try(resolved_model)
last_error: LLMProviderError | None = None
response: LLMResponse | None = None
try:
for model_name in models_to_try:
@ -186,6 +187,21 @@ class LLMGateway:
)
try:
response = await provider.chat(req)
# Empty response detection: if content is None/empty and no tool_calls,
# treat as failure and try next fallback model.
# This handles the common case where providers return 200 OK but empty body.
if (
response.content is None or not response.content.strip()
) and not response.tool_calls:
logger.warning(
f"Model '{model_name}' returned empty content with no tool_calls, "
f"trying next fallback"
)
last_error = LLMProviderError(
model_name,
f"Empty response from {model_name} (no content, no tool_calls)",
)
continue
break
except LLMProviderError as e:
last_error = e
@ -302,6 +318,20 @@ class LLMGateway:
cost=cost,
latency_ms=latency_ms,
)
# Empty stream detection: if no content was produced and no tool_calls,
# try next fallback model (same as non-streaming empty response handling).
if not total_content.strip():
logger.warning(
f"Stream from '{model_name}' produced empty content, "
f"trying next fallback"
)
last_error = LLMProviderError(
model_name,
f"Empty stream from {model_name}",
)
continue
return # Success, done
except Exception as e:
last_error = e

View File

@ -515,7 +515,11 @@ async def _handle_chat_message(
)
final_content = response.content or ""
if not final_content or not final_content.strip():
final_content = "抱歉,我暂时无法生成有效的回复。请尝试换一种方式描述你的需求,或者稍后再试。"
final_content = (
"模型未返回有效内容,已尝试备用模型仍未成功。"
"可能原因1) 服务暂时过载2) 请求内容超出模型处理范围。"
"建议1) 稍后重试2) 简化或缩短输入内容。"
)
await websocket.send_json({
"type": "final_answer",
"content": final_content,

View File

@ -42,7 +42,9 @@ _api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False)
_api_key_query = APIKeyQuery(name="api_key", auto_error=False)
_EMPTY_RESPONSE_FALLBACK = (
"抱歉,我暂时无法生成有效的回复。请尝试换一种方式描述你的需求,或者稍后再试。"
"模型未返回有效内容,已尝试备用模型仍未成功。"
"可能原因1) 服务暂时过载2) 请求内容超出模型处理范围。"
"建议1) 稍后重试2) 简化或缩短输入内容。"
)