From 16ac5928557511ddc8430fad281f477e6cb12b8d Mon Sep 17 00:00:00 2001 From: chiguyong Date: Tue, 16 Jun 2026 08:07:21 +0800 Subject: [PATCH] feat(gateway): empty response auto-retry with fallback model chain --- src/agentkit/core/react.py | 28 ++++++++++++++++++-------- src/agentkit/llm/gateway.py | 30 ++++++++++++++++++++++++++++ src/agentkit/server/routes/chat.py | 6 +++++- src/agentkit/server/routes/portal.py | 4 +++- 4 files changed, 58 insertions(+), 10 deletions(-) diff --git a/src/agentkit/core/react.py b/src/agentkit/core/react.py index 6e1025a..005d08a 100644 --- a/src/agentkit/core/react.py +++ b/src/agentkit/core/react.py @@ -598,10 +598,16 @@ class ReActEngine: # 兜底:确保 output 永远不为空字符串 if not output or not output.strip(): - output = ( - "抱歉,我暂时无法生成有效的回复。请尝试换一种方式描述你的需求," - "或者稍后再试。" - ) + if step >= self._max_steps: + output = ( + f"已达到最大推理步数({self._max_steps}步),但仍未得到完整结论。" + "建议:1) 简化问题后重试;2) 将任务拆分为更小的步骤。" + ) + else: + output = ( + "模型未返回有效内容,可能原因:1) 服务暂时过载;2) 请求内容超出模型处理范围。" + "建议:1) 稍后重试;2) 简化或缩短输入内容。" + ) trace_outcome = "empty_fallback" # 结束轨迹记录 @@ -1127,10 +1133,16 @@ class ReActEngine: # 兜底:确保 output 永远不为空字符串 if not output or not output.strip(): - output = ( - "抱歉,我暂时无法生成有效的回复。请尝试换一种方式描述你的需求," - "或者稍后再试。" - ) + if step >= self._max_steps: + output = ( + f"已达到最大推理步数({self._max_steps}步),但仍未得到完整结论。" + "建议:1) 简化问题后重试;2) 将任务拆分为更小的步骤。" + ) + else: + output = ( + "模型未返回有效内容,可能原因:1) 服务暂时过载;2) 请求内容超出模型处理范围。" + "建议:1) 稍后重试;2) 简化或缩短输入内容。" + ) trace_outcome = "empty_fallback" yield ReActEvent( event_type="final_answer", diff --git a/src/agentkit/llm/gateway.py b/src/agentkit/llm/gateway.py index 64bad9a..03024d4 100644 --- a/src/agentkit/llm/gateway.py +++ b/src/agentkit/llm/gateway.py @@ -169,6 +169,7 @@ class LLMGateway: # ── Normal provider call ── models_to_try = self._get_models_to_try(resolved_model) last_error: LLMProviderError | None = None + response: LLMResponse | None = None try: for model_name in models_to_try: @@ -186,6 +187,21 @@ class LLMGateway: ) try: response = await provider.chat(req) + # Empty response detection: if content is None/empty and no tool_calls, + # treat as failure and try next fallback model. + # This handles the common case where providers return 200 OK but empty body. + if ( + response.content is None or not response.content.strip() + ) and not response.tool_calls: + logger.warning( + f"Model '{model_name}' returned empty content with no tool_calls, " + f"trying next fallback" + ) + last_error = LLMProviderError( + model_name, + f"Empty response from {model_name} (no content, no tool_calls)", + ) + continue break except LLMProviderError as e: last_error = e @@ -302,6 +318,20 @@ class LLMGateway: cost=cost, latency_ms=latency_ms, ) + + # Empty stream detection: if no content was produced and no tool_calls, + # try next fallback model (same as non-streaming empty response handling). + if not total_content.strip(): + logger.warning( + f"Stream from '{model_name}' produced empty content, " + f"trying next fallback" + ) + last_error = LLMProviderError( + model_name, + f"Empty stream from {model_name}", + ) + continue + return # Success, done except Exception as e: last_error = e diff --git a/src/agentkit/server/routes/chat.py b/src/agentkit/server/routes/chat.py index 3587653..96eaff6 100644 --- a/src/agentkit/server/routes/chat.py +++ b/src/agentkit/server/routes/chat.py @@ -515,7 +515,11 @@ async def _handle_chat_message( ) final_content = response.content or "" if not final_content or not final_content.strip(): - final_content = "抱歉,我暂时无法生成有效的回复。请尝试换一种方式描述你的需求,或者稍后再试。" + final_content = ( + "模型未返回有效内容,已尝试备用模型仍未成功。" + "可能原因:1) 服务暂时过载;2) 请求内容超出模型处理范围。" + "建议:1) 稍后重试;2) 简化或缩短输入内容。" + ) await websocket.send_json({ "type": "final_answer", "content": final_content, diff --git a/src/agentkit/server/routes/portal.py b/src/agentkit/server/routes/portal.py index 9ad5a3b..3f7dcdd 100644 --- a/src/agentkit/server/routes/portal.py +++ b/src/agentkit/server/routes/portal.py @@ -42,7 +42,9 @@ _api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False) _api_key_query = APIKeyQuery(name="api_key", auto_error=False) _EMPTY_RESPONSE_FALLBACK = ( - "抱歉,我暂时无法生成有效的回复。请尝试换一种方式描述你的需求,或者稍后再试。" + "模型未返回有效内容,已尝试备用模型仍未成功。" + "可能原因:1) 服务暂时过载;2) 请求内容超出模型处理范围。" + "建议:1) 稍后重试;2) 简化或缩短输入内容。" )