diff --git a/src/agentkit/core/fallback.py b/src/agentkit/core/fallback.py
new file mode 100644
index 0000000..c3f7455
--- /dev/null
+++ b/src/agentkit/core/fallback.py
@@ -0,0 +1,18 @@
+"""Shared fallback messages for empty/failed responses.
+
+All layers (ReActEngine, Portal, Chat) should use these constants
+to ensure consistent user-facing messages.
+"""
+
+# When LLM returns empty content after all fallback models exhausted
+EMPTY_LLM_RESPONSE = (
+    "模型未返回有效内容，已尝试备用模型仍未成功。"
+    "可能原因：1) 服务暂时过载；2) 请求内容超出模型处理范围。"
+    "建议：1) 稍后重试；2) 简化或缩短输入内容。"
+)
+
+# When ReActEngine reaches max steps without a complete answer
+MAX_STEPS_REACHED = "已达到最大推理步数，但仍未得到完整结论。建议：1) 简化问题后重试；2) 将任务拆分为更小的步骤。"
+
+# When a shell command succeeds but produces no output
+SHELL_NO_OUTPUT = "[命令执行成功，无输出内容]"
diff --git a/src/agentkit/core/react.py b/src/agentkit/core/react.py
index 005d08a..01e7dc7 100644
--- a/src/agentkit/core/react.py
+++ b/src/agentkit/core/react.py
@@ -598,16 +598,11 @@ class ReActEngine:
 
             # 兜底：确保 output 永远不为空字符串
             if not output or not output.strip():
+                from agentkit.core.fallback import EMPTY_LLM_RESPONSE, MAX_STEPS_REACHED
                 if step >= self._max_steps:
-                    output = (
-                        f"已达到最大推理步数（{self._max_steps}步），但仍未得到完整结论。"
-                        "建议：1) 简化问题后重试；2) 将任务拆分为更小的步骤。"
-                    )
+                    output = MAX_STEPS_REACHED
                 else:
-                    output = (
-                        "模型未返回有效内容，可能原因：1) 服务暂时过载；2) 请求内容超出模型处理范围。"
-                        "建议：1) 稍后重试；2) 简化或缩短输入内容。"
-                    )
+                    output = EMPTY_LLM_RESPONSE
                 trace_outcome = "empty_fallback"
 
             # 结束轨迹记录
@@ -1133,16 +1128,11 @@ class ReActEngine:
 
             # 兜底：确保 output 永远不为空字符串
             if not output or not output.strip():
+                from agentkit.core.fallback import EMPTY_LLM_RESPONSE, MAX_STEPS_REACHED
                 if step >= self._max_steps:
-                    output = (
-                        f"已达到最大推理步数（{self._max_steps}步），但仍未得到完整结论。"
-                        "建议：1) 简化问题后重试；2) 将任务拆分为更小的步骤。"
-                    )
+                    output = MAX_STEPS_REACHED
                 else:
-                    output = (
-                        "模型未返回有效内容，可能原因：1) 服务暂时过载；2) 请求内容超出模型处理范围。"
-                        "建议：1) 稍后重试；2) 简化或缩短输入内容。"
-                    )
+                    output = EMPTY_LLM_RESPONSE
                 trace_outcome = "empty_fallback"
                 yield ReActEvent(
                     event_type="final_answer",
diff --git a/src/agentkit/llm/gateway.py b/src/agentkit/llm/gateway.py
index 03024d4..b1a9962 100644
--- a/src/agentkit/llm/gateway.py
+++ b/src/agentkit/llm/gateway.py
@@ -193,6 +193,17 @@ class LLMGateway:
                     if (
                         response.content is None or not response.content.strip()
                     ) and not response.tool_calls:
+                        # Record usage for billing before discarding this response
+                        if response.usage:
+                            latency_ms = (time.monotonic() - start) * 1000
+                            cost = self._calculate_cost(model_name, response.usage)
+                            self._usage_tracker.record(
+                                agent_name=agent_name,
+                                model=model_name,
+                                usage=response.usage,
+                                cost=cost,
+                                latency_ms=latency_ms,
+                            )
                         logger.warning(
                             f"Model '{model_name}' returned empty content with no tool_calls, "
                             f"trying next fallback"
@@ -319,18 +330,19 @@ class LLMGateway:
                     latency_ms=latency_ms,
                 )
 
-                # Empty stream detection: if no content was produced and no tool_calls,
-                # try next fallback model (same as non-streaming empty response handling).
+                # Empty stream detection: if no content was produced,
+                # raise error so the caller (ReActEngine) can retry with a different model.
+                # We cannot continue to next model here because chunks may have already
+                # been yielded to the client, which would cause mixed output.
+                # Note: stream tool_calls are not tracked in chunks, so we only check content.
                 if not total_content.strip():
                     logger.warning(
-                        f"Stream from '{model_name}' produced empty content, "
-                        f"trying next fallback"
+                        f"Stream from '{model_name}' produced empty content"
                     )
-                    last_error = LLMProviderError(
+                    raise LLMProviderError(
                         model_name,
                         f"Empty stream from {model_name}",
                     )
-                    continue
 
                 return  # Success, done
             except Exception as e:
diff --git a/src/agentkit/server/app.py b/src/agentkit/server/app.py
index 31788a0..5ac33d5 100644
--- a/src/agentkit/server/app.py
+++ b/src/agentkit/server/app.py
@@ -153,6 +153,10 @@ async def lifespan(app: FastAPI):
     if mcp_manager is not None:
         await mcp_manager.start_all()
 
+    # Restore conversation history from persistent store (async, in lifespan)
+    from agentkit.server.routes.portal import _conversation_store
+    await _conversation_store.restore_from_store()
+
     # In GUI mode, ensure a default chat agent exists with memory + tools
     gui_mode = os.environ.get("AGENTKIT_GUI_MODE")
     if gui_mode and not app.state.agent_pool.list_agents():
@@ -680,9 +684,6 @@ def create_app(
     from agentkit.server.routes.portal import _conversation_store
     _conversation_store.set_session_manager(app.state.session_manager)
 
-    # Restore conversation history from persistent store
-    await _conversation_store.restore_from_store()
-
     # Initialize evolution store if configured
     if server_config and hasattr(server_config, "evolution") and server_config.evolution:
         try:
diff --git a/src/agentkit/server/routes/chat.py b/src/agentkit/server/routes/chat.py
index 96eaff6..bc4dd3b 100644
--- a/src/agentkit/server/routes/chat.py
+++ b/src/agentkit/server/routes/chat.py
@@ -515,11 +515,8 @@ async def _handle_chat_message(
             )
             final_content = response.content or ""
             if not final_content or not final_content.strip():
-                final_content = (
-                    "模型未返回有效内容，已尝试备用模型仍未成功。"
-                    "可能原因：1) 服务暂时过载；2) 请求内容超出模型处理范围。"
-                    "建议：1) 稍后重试；2) 简化或缩短输入内容。"
-                )
+                from agentkit.core.fallback import EMPTY_LLM_RESPONSE
+                final_content = EMPTY_LLM_RESPONSE
             await websocket.send_json({
                 "type": "final_answer",
                 "content": final_content,
@@ -617,7 +614,8 @@ async def _handle_chat_message(
                 # Then send final answer
                 final_content = event.data.get("output", "")
                 if not final_content or not final_content.strip():
-                    final_content = "抱歉，我暂时无法生成有效的回复。请尝试换一种方式描述你的需求，或者稍后再试。"
+                    from agentkit.core.fallback import EMPTY_LLM_RESPONSE
+                    final_content = EMPTY_LLM_RESPONSE
                 await websocket.send_json({
                     "type": "final_answer",
                     "content": final_content,
diff --git a/src/agentkit/server/routes/portal.py b/src/agentkit/server/routes/portal.py
index 8c44942..31c9669 100644
--- a/src/agentkit/server/routes/portal.py
+++ b/src/agentkit/server/routes/portal.py
@@ -28,6 +28,7 @@ from agentkit.server.routes.evolution_dashboard import (
     DashboardExperience,
     _broadcast_event as _broadcast_dashboard_event,
 )
+from agentkit.core.fallback import EMPTY_LLM_RESPONSE
 from agentkit.session.manager import SessionManager
 
 logger = logging.getLogger(__name__)
@@ -41,18 +42,12 @@ router = APIRouter(tags=["portal"])
 _api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False)
 _api_key_query = APIKeyQuery(name="api_key", auto_error=False)
 
-_EMPTY_RESPONSE_FALLBACK = (
-    "模型未返回有效内容，已尝试备用模型仍未成功。"
-    "可能原因：1) 服务暂时过载；2) 请求内容超出模型处理范围。"
-    "建议：1) 稍后重试；2) 简化或缩短输入内容。"
-)
-
 
 def _ensure_non_empty(text: str | None) -> str:
     """Ensure response text is never empty or whitespace-only."""
     if text and text.strip():
         return text
-    return _EMPTY_RESPONSE_FALLBACK
+    return EMPTY_LLM_RESPONSE
 
 
 async def _verify_api_key(
@@ -117,16 +112,21 @@ class ConversationStore:
         """Set or update the session manager for persistence."""
         self._session_manager = sm
 
-    async def restore_from_store(self) -> None:
+    async def restore_from_store(
+        self,
+        max_sessions: int = 50,
+        max_messages_per_session: int = 100,
+    ) -> None:
         """Restore recent conversations from SessionManager on startup.
 
         Loads the most recent sessions and their messages so that
         ConversationStore is populated after a server restart.
+        Limits are applied to prevent memory exhaustion on startup.
         """
         if self._session_manager is None:
             return
         try:
-            sessions = await self._session_manager.list_sessions(limit=self._max)
+            sessions = await self._session_manager.list_sessions(limit=max_sessions)
             for session in sessions:
                 sid = session.session_id
                 if sid in self._conversations:
@@ -137,7 +137,9 @@ class ConversationStore:
                     created_at=session.created_at,
                     updated_at=session.updated_at,
                 )
-                messages = await self._session_manager.get_messages(sid)
+                messages = await self._session_manager.get_messages(
+                    sid, limit=max_messages_per_session
+                )
                 for msg in messages:
                     conv.messages.append(ChatMessage(
                         role=msg.role.value,
@@ -312,7 +314,7 @@ class ChatRequest(BaseModel):
 class ChatResponse(BaseModel):
     conversation_id: str
     message: str
-    timestamp: str = ""
+    timestamp: str
     matched_skill: str | None = None
     routing_method: str | None = None
     confidence: float | None = None
@@ -1050,7 +1052,7 @@ async def portal_websocket(websocket: WebSocket):
             )
             await _conversation_store.add_message(conv.id, "assistant", response_text)
 
-            outcome = "success" if response_text != _EMPTY_RESPONSE_FALLBACK else "failure"
+            outcome = "success" if response_text != EMPTY_LLM_RESPONSE else "failure"
             await websocket.send_json(
                 {
                     "type": "result",
diff --git a/src/agentkit/tools/shell.py b/src/agentkit/tools/shell.py
index ff17f8b..ba84377 100644
--- a/src/agentkit/tools/shell.py
+++ b/src/agentkit/tools/shell.py
@@ -357,6 +357,11 @@ class ShellTool(Tool):
         if len(output) > self._max_output_length:
             output = output[: self._max_output_length] + "\n... [输出已截断]"
 
+        # Ensure non-empty output for successful commands (all execution modes)
+        if result.exit_code == 0 and not output.strip():
+            from agentkit.core.fallback import SHELL_NO_OUTPUT
+            output = SHELL_NO_OUTPUT
+
         return {
             "output": output,
             "exit_code": result.exit_code,
@@ -404,9 +409,6 @@ class ShellTool(Tool):
             else:
                 output = stdout.decode("utf-8", errors="replace") if stdout else ""
                 exit_code = proc.returncode if proc.returncode is not None else 0
-                # Ensure non-empty output for successful commands
-                if exit_code == 0 and not output.strip():
-                    output = "[命令执行成功，无输出内容]"
         except Exception as e:
             output = str(e)
             exit_code = -1
diff --git a/src/agentkit/tools/web_search.py b/src/agentkit/tools/web_search.py
index 87f0309..0c059d8 100644
--- a/src/agentkit/tools/web_search.py
+++ b/src/agentkit/tools/web_search.py
@@ -91,6 +91,7 @@ class WebSearchTool(Tool):
                 "backend": {"type": "string", "description": "使用的搜索后端"},
                 "success": {"type": "boolean", "description": "是否成功"},
                 "error": {"type": "string", "description": "错误信息（仅失败时）"},
+                "fallback_message": {"type": "string", "description": "搜索无结果时的提示信息"},
             },
         }