feat(gateway): empty response auto-retry with fallback model chain
This commit is contained in:
parent
9caf332e9e
commit
16ac592855
|
|
@ -598,10 +598,16 @@ class ReActEngine:
|
||||||
|
|
||||||
# 兜底:确保 output 永远不为空字符串
|
# 兜底:确保 output 永远不为空字符串
|
||||||
if not output or not output.strip():
|
if not output or not output.strip():
|
||||||
output = (
|
if step >= self._max_steps:
|
||||||
"抱歉,我暂时无法生成有效的回复。请尝试换一种方式描述你的需求,"
|
output = (
|
||||||
"或者稍后再试。"
|
f"已达到最大推理步数({self._max_steps}步),但仍未得到完整结论。"
|
||||||
)
|
"建议:1) 简化问题后重试;2) 将任务拆分为更小的步骤。"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
output = (
|
||||||
|
"模型未返回有效内容,可能原因:1) 服务暂时过载;2) 请求内容超出模型处理范围。"
|
||||||
|
"建议:1) 稍后重试;2) 简化或缩短输入内容。"
|
||||||
|
)
|
||||||
trace_outcome = "empty_fallback"
|
trace_outcome = "empty_fallback"
|
||||||
|
|
||||||
# 结束轨迹记录
|
# 结束轨迹记录
|
||||||
|
|
@ -1127,10 +1133,16 @@ class ReActEngine:
|
||||||
|
|
||||||
# 兜底:确保 output 永远不为空字符串
|
# 兜底:确保 output 永远不为空字符串
|
||||||
if not output or not output.strip():
|
if not output or not output.strip():
|
||||||
output = (
|
if step >= self._max_steps:
|
||||||
"抱歉,我暂时无法生成有效的回复。请尝试换一种方式描述你的需求,"
|
output = (
|
||||||
"或者稍后再试。"
|
f"已达到最大推理步数({self._max_steps}步),但仍未得到完整结论。"
|
||||||
)
|
"建议:1) 简化问题后重试;2) 将任务拆分为更小的步骤。"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
output = (
|
||||||
|
"模型未返回有效内容,可能原因:1) 服务暂时过载;2) 请求内容超出模型处理范围。"
|
||||||
|
"建议:1) 稍后重试;2) 简化或缩短输入内容。"
|
||||||
|
)
|
||||||
trace_outcome = "empty_fallback"
|
trace_outcome = "empty_fallback"
|
||||||
yield ReActEvent(
|
yield ReActEvent(
|
||||||
event_type="final_answer",
|
event_type="final_answer",
|
||||||
|
|
|
||||||
|
|
@ -169,6 +169,7 @@ class LLMGateway:
|
||||||
# ── Normal provider call ──
|
# ── Normal provider call ──
|
||||||
models_to_try = self._get_models_to_try(resolved_model)
|
models_to_try = self._get_models_to_try(resolved_model)
|
||||||
last_error: LLMProviderError | None = None
|
last_error: LLMProviderError | None = None
|
||||||
|
response: LLMResponse | None = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for model_name in models_to_try:
|
for model_name in models_to_try:
|
||||||
|
|
@ -186,6 +187,21 @@ class LLMGateway:
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
response = await provider.chat(req)
|
response = await provider.chat(req)
|
||||||
|
# Empty response detection: if content is None/empty and no tool_calls,
|
||||||
|
# treat as failure and try next fallback model.
|
||||||
|
# This handles the common case where providers return 200 OK but empty body.
|
||||||
|
if (
|
||||||
|
response.content is None or not response.content.strip()
|
||||||
|
) and not response.tool_calls:
|
||||||
|
logger.warning(
|
||||||
|
f"Model '{model_name}' returned empty content with no tool_calls, "
|
||||||
|
f"trying next fallback"
|
||||||
|
)
|
||||||
|
last_error = LLMProviderError(
|
||||||
|
model_name,
|
||||||
|
f"Empty response from {model_name} (no content, no tool_calls)",
|
||||||
|
)
|
||||||
|
continue
|
||||||
break
|
break
|
||||||
except LLMProviderError as e:
|
except LLMProviderError as e:
|
||||||
last_error = e
|
last_error = e
|
||||||
|
|
@ -302,6 +318,20 @@ class LLMGateway:
|
||||||
cost=cost,
|
cost=cost,
|
||||||
latency_ms=latency_ms,
|
latency_ms=latency_ms,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Empty stream detection: if no content was produced and no tool_calls,
|
||||||
|
# try next fallback model (same as non-streaming empty response handling).
|
||||||
|
if not total_content.strip():
|
||||||
|
logger.warning(
|
||||||
|
f"Stream from '{model_name}' produced empty content, "
|
||||||
|
f"trying next fallback"
|
||||||
|
)
|
||||||
|
last_error = LLMProviderError(
|
||||||
|
model_name,
|
||||||
|
f"Empty stream from {model_name}",
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
return # Success, done
|
return # Success, done
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
last_error = e
|
last_error = e
|
||||||
|
|
|
||||||
|
|
@ -515,7 +515,11 @@ async def _handle_chat_message(
|
||||||
)
|
)
|
||||||
final_content = response.content or ""
|
final_content = response.content or ""
|
||||||
if not final_content or not final_content.strip():
|
if not final_content or not final_content.strip():
|
||||||
final_content = "抱歉,我暂时无法生成有效的回复。请尝试换一种方式描述你的需求,或者稍后再试。"
|
final_content = (
|
||||||
|
"模型未返回有效内容,已尝试备用模型仍未成功。"
|
||||||
|
"可能原因:1) 服务暂时过载;2) 请求内容超出模型处理范围。"
|
||||||
|
"建议:1) 稍后重试;2) 简化或缩短输入内容。"
|
||||||
|
)
|
||||||
await websocket.send_json({
|
await websocket.send_json({
|
||||||
"type": "final_answer",
|
"type": "final_answer",
|
||||||
"content": final_content,
|
"content": final_content,
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,9 @@ _api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False)
|
||||||
_api_key_query = APIKeyQuery(name="api_key", auto_error=False)
|
_api_key_query = APIKeyQuery(name="api_key", auto_error=False)
|
||||||
|
|
||||||
_EMPTY_RESPONSE_FALLBACK = (
|
_EMPTY_RESPONSE_FALLBACK = (
|
||||||
"抱歉,我暂时无法生成有效的回复。请尝试换一种方式描述你的需求,或者稍后再试。"
|
"模型未返回有效内容,已尝试备用模型仍未成功。"
|
||||||
|
"可能原因:1) 服务暂时过载;2) 请求内容超出模型处理范围。"
|
||||||
|
"建议:1) 稍后重试;2) 简化或缩短输入内容。"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue