From 16ac5928557511ddc8430fad281f477e6cb12b8d Mon Sep 17 00:00:00 2001
From: chiguyong <chiguyong@beyondsoft.com>
Date: Tue, 16 Jun 2026 08:07:21 +0800
Subject: [PATCH] feat(gateway): empty response auto-retry with fallback model
 chain

---
 src/agentkit/core/react.py           | 28 ++++++++++++++++++--------
 src/agentkit/llm/gateway.py          | 30 ++++++++++++++++++++++++++++
 src/agentkit/server/routes/chat.py   |  6 +++++-
 src/agentkit/server/routes/portal.py |  4 +++-
 4 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/src/agentkit/core/react.py b/src/agentkit/core/react.py
index 6e1025a..005d08a 100644
--- a/src/agentkit/core/react.py
+++ b/src/agentkit/core/react.py
@@ -598,10 +598,16 @@ class ReActEngine:
 
             # 兜底：确保 output 永远不为空字符串
             if not output or not output.strip():
-                output = (
-                    "抱歉，我暂时无法生成有效的回复。请尝试换一种方式描述你的需求，"
-                    "或者稍后再试。"
-                )
+                if step >= self._max_steps:
+                    output = (
+                        f"已达到最大推理步数（{self._max_steps}步），但仍未得到完整结论。"
+                        "建议：1) 简化问题后重试；2) 将任务拆分为更小的步骤。"
+                    )
+                else:
+                    output = (
+                        "模型未返回有效内容，可能原因：1) 服务暂时过载；2) 请求内容超出模型处理范围。"
+                        "建议：1) 稍后重试；2) 简化或缩短输入内容。"
+                    )
                 trace_outcome = "empty_fallback"
 
             # 结束轨迹记录
@@ -1127,10 +1133,16 @@ class ReActEngine:
 
             # 兜底：确保 output 永远不为空字符串
             if not output or not output.strip():
-                output = (
-                    "抱歉，我暂时无法生成有效的回复。请尝试换一种方式描述你的需求，"
-                    "或者稍后再试。"
-                )
+                if step >= self._max_steps:
+                    output = (
+                        f"已达到最大推理步数（{self._max_steps}步），但仍未得到完整结论。"
+                        "建议：1) 简化问题后重试；2) 将任务拆分为更小的步骤。"
+                    )
+                else:
+                    output = (
+                        "模型未返回有效内容，可能原因：1) 服务暂时过载；2) 请求内容超出模型处理范围。"
+                        "建议：1) 稍后重试；2) 简化或缩短输入内容。"
+                    )
                 trace_outcome = "empty_fallback"
                 yield ReActEvent(
                     event_type="final_answer",
diff --git a/src/agentkit/llm/gateway.py b/src/agentkit/llm/gateway.py
index 64bad9a..03024d4 100644
--- a/src/agentkit/llm/gateway.py
+++ b/src/agentkit/llm/gateway.py
@@ -169,6 +169,7 @@ class LLMGateway:
         # ── Normal provider call ──
         models_to_try = self._get_models_to_try(resolved_model)
         last_error: LLMProviderError | None = None
+        response: LLMResponse | None = None
 
         try:
             for model_name in models_to_try:
@@ -186,6 +187,21 @@ class LLMGateway:
                 )
                 try:
                     response = await provider.chat(req)
+                    # Empty response detection: if content is None/empty and no tool_calls,
+                    # treat as failure and try next fallback model.
+                    # This handles the common case where providers return 200 OK but empty body.
+                    if (
+                        response.content is None or not response.content.strip()
+                    ) and not response.tool_calls:
+                        logger.warning(
+                            f"Model '{model_name}' returned empty content with no tool_calls, "
+                            f"trying next fallback"
+                        )
+                        last_error = LLMProviderError(
+                            model_name,
+                            f"Empty response from {model_name} (no content, no tool_calls)",
+                        )
+                        continue
                     break
                 except LLMProviderError as e:
                     last_error = e
@@ -302,6 +318,20 @@ class LLMGateway:
                     cost=cost,
                     latency_ms=latency_ms,
                 )
+
+                # Empty stream detection: if no content was produced and no tool_calls,
+                # try next fallback model (same as non-streaming empty response handling).
+                if not total_content.strip():
+                    logger.warning(
+                        f"Stream from '{model_name}' produced empty content, "
+                        f"trying next fallback"
+                    )
+                    last_error = LLMProviderError(
+                        model_name,
+                        f"Empty stream from {model_name}",
+                    )
+                    continue
+
                 return  # Success, done
             except Exception as e:
                 last_error = e
diff --git a/src/agentkit/server/routes/chat.py b/src/agentkit/server/routes/chat.py
index 3587653..96eaff6 100644
--- a/src/agentkit/server/routes/chat.py
+++ b/src/agentkit/server/routes/chat.py
@@ -515,7 +515,11 @@ async def _handle_chat_message(
             )
             final_content = response.content or ""
             if not final_content or not final_content.strip():
-                final_content = "抱歉，我暂时无法生成有效的回复。请尝试换一种方式描述你的需求，或者稍后再试。"
+                final_content = (
+                    "模型未返回有效内容，已尝试备用模型仍未成功。"
+                    "可能原因：1) 服务暂时过载；2) 请求内容超出模型处理范围。"
+                    "建议：1) 稍后重试；2) 简化或缩短输入内容。"
+                )
             await websocket.send_json({
                 "type": "final_answer",
                 "content": final_content,
diff --git a/src/agentkit/server/routes/portal.py b/src/agentkit/server/routes/portal.py
index 9ad5a3b..3f7dcdd 100644
--- a/src/agentkit/server/routes/portal.py
+++ b/src/agentkit/server/routes/portal.py
@@ -42,7 +42,9 @@ _api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False)
 _api_key_query = APIKeyQuery(name="api_key", auto_error=False)
 
 _EMPTY_RESPONSE_FALLBACK = (
-    "抱歉，我暂时无法生成有效的回复。请尝试换一种方式描述你的需求，或者稍后再试。"
+    "模型未返回有效内容，已尝试备用模型仍未成功。"
+    "可能原因：1) 服务暂时过载；2) 请求内容超出模型处理范围。"
+    "建议：1) 稍后重试；2) 简化或缩短输入内容。"
 )