feat(gateway): empty response auto-retry with fallback model chain

2026-06-16 08:07:21 +08:00 · 2026-06-16 08:07:21 +08:00 · 16ac592855
parent 9caf332e9e
commit 16ac592855
4 changed files with 58 additions and 10 deletions
--- a/src/agentkit/core/react.py
+++ b/src/agentkit/core/react.py
@ -598,10 +598,16 @@ class ReActEngine:

            # 兜底：确保 output 永远不为空字符串
            if not output or not output.strip():
-                output = (
-                    "抱歉，我暂时无法生成有效的回复。请尝试换一种方式描述你的需求，"
-                    "或者稍后再试。"
-                )
+                if step >= self._max_steps:
+                    output = (
+                        f"已达到最大推理步数（{self._max_steps}步），但仍未得到完整结论。"
+                        "建议：1) 简化问题后重试；2) 将任务拆分为更小的步骤。"
+                    )
+                else:
+                    output = (
+                        "模型未返回有效内容，可能原因：1) 服务暂时过载；2) 请求内容超出模型处理范围。"
+                        "建议：1) 稍后重试；2) 简化或缩短输入内容。"
+                    )
                trace_outcome = "empty_fallback"

            # 结束轨迹记录
@ -1127,10 +1133,16 @@ class ReActEngine:

            # 兜底：确保 output 永远不为空字符串
            if not output or not output.strip():
-                output = (
-                    "抱歉，我暂时无法生成有效的回复。请尝试换一种方式描述你的需求，"
-                    "或者稍后再试。"
-                )
+                if step >= self._max_steps:
+                    output = (
+                        f"已达到最大推理步数（{self._max_steps}步），但仍未得到完整结论。"
+                        "建议：1) 简化问题后重试；2) 将任务拆分为更小的步骤。"
+                    )
+                else:
+                    output = (
+                        "模型未返回有效内容，可能原因：1) 服务暂时过载；2) 请求内容超出模型处理范围。"
+                        "建议：1) 稍后重试；2) 简化或缩短输入内容。"
+                    )
                trace_outcome = "empty_fallback"
                yield ReActEvent(
                    event_type="final_answer",
--- a/src/agentkit/llm/gateway.py
+++ b/src/agentkit/llm/gateway.py
@ -169,6 +169,7 @@ class LLMGateway:
        # ── Normal provider call ──
        models_to_try = self._get_models_to_try(resolved_model)
        last_error: LLMProviderError | None = None
+        response: LLMResponse | None = None

        try:
            for model_name in models_to_try:
@ -186,6 +187,21 @@ class LLMGateway:
                )
                try:
                    response = await provider.chat(req)
+                    # Empty response detection: if content is None/empty and no tool_calls,
+                    # treat as failure and try next fallback model.
+                    # This handles the common case where providers return 200 OK but empty body.
+                    if (
+                        response.content is None or not response.content.strip()
+                    ) and not response.tool_calls:
+                        logger.warning(
+                            f"Model '{model_name}' returned empty content with no tool_calls, "
+                            f"trying next fallback"
+                        )
+                        last_error = LLMProviderError(
+                            model_name,
+                            f"Empty response from {model_name} (no content, no tool_calls)",
+                        )
+                        continue
                    break
                except LLMProviderError as e:
                    last_error = e
@ -302,6 +318,20 @@ class LLMGateway:
                    cost=cost,
                    latency_ms=latency_ms,
                )
+
+                # Empty stream detection: if no content was produced and no tool_calls,
+                # try next fallback model (same as non-streaming empty response handling).
+                if not total_content.strip():
+                    logger.warning(
+                        f"Stream from '{model_name}' produced empty content, "
+                        f"trying next fallback"
+                    )
+                    last_error = LLMProviderError(
+                        model_name,
+                        f"Empty stream from {model_name}",
+                    )
+                    continue
+
                return  # Success, done
            except Exception as e:
                last_error = e
--- a/src/agentkit/server/routes/chat.py
+++ b/src/agentkit/server/routes/chat.py
@ -515,7 +515,11 @@ async def _handle_chat_message(
            )
            final_content = response.content or ""
            if not final_content or not final_content.strip():
-                final_content = "抱歉，我暂时无法生成有效的回复。请尝试换一种方式描述你的需求，或者稍后再试。"
+                final_content = (
+                    "模型未返回有效内容，已尝试备用模型仍未成功。"
+                    "可能原因：1) 服务暂时过载；2) 请求内容超出模型处理范围。"
+                    "建议：1) 稍后重试；2) 简化或缩短输入内容。"
+                )
            await websocket.send_json({
                "type": "final_answer",
                "content": final_content,
--- a/src/agentkit/server/routes/portal.py
+++ b/src/agentkit/server/routes/portal.py
@ -42,7 +42,9 @@ _api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False)
 _api_key_query = APIKeyQuery(name="api_key", auto_error=False)

 _EMPTY_RESPONSE_FALLBACK = (
-    "抱歉，我暂时无法生成有效的回复。请尝试换一种方式描述你的需求，或者稍后再试。"
+    "模型未返回有效内容，已尝试备用模型仍未成功。"
+    "可能原因：1) 服务暂时过载；2) 请求内容超出模型处理范围。"
+    "建议：1) 稍后重试；2) 简化或缩短输入内容。"
 )