diff --git a/src/agentkit/core/fallback.py b/src/agentkit/core/fallback.py new file mode 100644 index 0000000..c3f7455 --- /dev/null +++ b/src/agentkit/core/fallback.py @@ -0,0 +1,18 @@ +"""Shared fallback messages for empty/failed responses. + +All layers (ReActEngine, Portal, Chat) should use these constants +to ensure consistent user-facing messages. +""" + +# When LLM returns empty content after all fallback models exhausted +EMPTY_LLM_RESPONSE = ( + "模型未返回有效内容,已尝试备用模型仍未成功。" + "可能原因:1) 服务暂时过载;2) 请求内容超出模型处理范围。" + "建议:1) 稍后重试;2) 简化或缩短输入内容。" +) + +# When ReActEngine reaches max steps without a complete answer +MAX_STEPS_REACHED = "已达到最大推理步数,但仍未得到完整结论。建议:1) 简化问题后重试;2) 将任务拆分为更小的步骤。" + +# When a shell command succeeds but produces no output +SHELL_NO_OUTPUT = "[命令执行成功,无输出内容]" diff --git a/src/agentkit/core/react.py b/src/agentkit/core/react.py index 005d08a..01e7dc7 100644 --- a/src/agentkit/core/react.py +++ b/src/agentkit/core/react.py @@ -598,16 +598,11 @@ class ReActEngine: # 兜底:确保 output 永远不为空字符串 if not output or not output.strip(): + from agentkit.core.fallback import EMPTY_LLM_RESPONSE, MAX_STEPS_REACHED if step >= self._max_steps: - output = ( - f"已达到最大推理步数({self._max_steps}步),但仍未得到完整结论。" - "建议:1) 简化问题后重试;2) 将任务拆分为更小的步骤。" - ) + output = MAX_STEPS_REACHED else: - output = ( - "模型未返回有效内容,可能原因:1) 服务暂时过载;2) 请求内容超出模型处理范围。" - "建议:1) 稍后重试;2) 简化或缩短输入内容。" - ) + output = EMPTY_LLM_RESPONSE trace_outcome = "empty_fallback" # 结束轨迹记录 @@ -1133,16 +1128,11 @@ class ReActEngine: # 兜底:确保 output 永远不为空字符串 if not output or not output.strip(): + from agentkit.core.fallback import EMPTY_LLM_RESPONSE, MAX_STEPS_REACHED if step >= self._max_steps: - output = ( - f"已达到最大推理步数({self._max_steps}步),但仍未得到完整结论。" - "建议:1) 简化问题后重试;2) 将任务拆分为更小的步骤。" - ) + output = MAX_STEPS_REACHED else: - output = ( - "模型未返回有效内容,可能原因:1) 服务暂时过载;2) 请求内容超出模型处理范围。" - "建议:1) 稍后重试;2) 简化或缩短输入内容。" - ) + output = EMPTY_LLM_RESPONSE trace_outcome = "empty_fallback" yield ReActEvent( event_type="final_answer", diff --git a/src/agentkit/llm/gateway.py b/src/agentkit/llm/gateway.py index 03024d4..b1a9962 100644 --- a/src/agentkit/llm/gateway.py +++ b/src/agentkit/llm/gateway.py @@ -193,6 +193,17 @@ class LLMGateway: if ( response.content is None or not response.content.strip() ) and not response.tool_calls: + # Record usage for billing before discarding this response + if response.usage: + latency_ms = (time.monotonic() - start) * 1000 + cost = self._calculate_cost(model_name, response.usage) + self._usage_tracker.record( + agent_name=agent_name, + model=model_name, + usage=response.usage, + cost=cost, + latency_ms=latency_ms, + ) logger.warning( f"Model '{model_name}' returned empty content with no tool_calls, " f"trying next fallback" @@ -319,18 +330,19 @@ class LLMGateway: latency_ms=latency_ms, ) - # Empty stream detection: if no content was produced and no tool_calls, - # try next fallback model (same as non-streaming empty response handling). + # Empty stream detection: if no content was produced, + # raise error so the caller (ReActEngine) can retry with a different model. + # We cannot continue to next model here because chunks may have already + # been yielded to the client, which would cause mixed output. + # Note: stream tool_calls are not tracked in chunks, so we only check content. if not total_content.strip(): logger.warning( - f"Stream from '{model_name}' produced empty content, " - f"trying next fallback" + f"Stream from '{model_name}' produced empty content" ) - last_error = LLMProviderError( + raise LLMProviderError( model_name, f"Empty stream from {model_name}", ) - continue return # Success, done except Exception as e: diff --git a/src/agentkit/server/app.py b/src/agentkit/server/app.py index 31788a0..5ac33d5 100644 --- a/src/agentkit/server/app.py +++ b/src/agentkit/server/app.py @@ -153,6 +153,10 @@ async def lifespan(app: FastAPI): if mcp_manager is not None: await mcp_manager.start_all() + # Restore conversation history from persistent store (async, in lifespan) + from agentkit.server.routes.portal import _conversation_store + await _conversation_store.restore_from_store() + # In GUI mode, ensure a default chat agent exists with memory + tools gui_mode = os.environ.get("AGENTKIT_GUI_MODE") if gui_mode and not app.state.agent_pool.list_agents(): @@ -680,9 +684,6 @@ def create_app( from agentkit.server.routes.portal import _conversation_store _conversation_store.set_session_manager(app.state.session_manager) - # Restore conversation history from persistent store - await _conversation_store.restore_from_store() - # Initialize evolution store if configured if server_config and hasattr(server_config, "evolution") and server_config.evolution: try: diff --git a/src/agentkit/server/routes/chat.py b/src/agentkit/server/routes/chat.py index 96eaff6..bc4dd3b 100644 --- a/src/agentkit/server/routes/chat.py +++ b/src/agentkit/server/routes/chat.py @@ -515,11 +515,8 @@ async def _handle_chat_message( ) final_content = response.content or "" if not final_content or not final_content.strip(): - final_content = ( - "模型未返回有效内容,已尝试备用模型仍未成功。" - "可能原因:1) 服务暂时过载;2) 请求内容超出模型处理范围。" - "建议:1) 稍后重试;2) 简化或缩短输入内容。" - ) + from agentkit.core.fallback import EMPTY_LLM_RESPONSE + final_content = EMPTY_LLM_RESPONSE await websocket.send_json({ "type": "final_answer", "content": final_content, @@ -617,7 +614,8 @@ async def _handle_chat_message( # Then send final answer final_content = event.data.get("output", "") if not final_content or not final_content.strip(): - final_content = "抱歉,我暂时无法生成有效的回复。请尝试换一种方式描述你的需求,或者稍后再试。" + from agentkit.core.fallback import EMPTY_LLM_RESPONSE + final_content = EMPTY_LLM_RESPONSE await websocket.send_json({ "type": "final_answer", "content": final_content, diff --git a/src/agentkit/server/routes/portal.py b/src/agentkit/server/routes/portal.py index 8c44942..31c9669 100644 --- a/src/agentkit/server/routes/portal.py +++ b/src/agentkit/server/routes/portal.py @@ -28,6 +28,7 @@ from agentkit.server.routes.evolution_dashboard import ( DashboardExperience, _broadcast_event as _broadcast_dashboard_event, ) +from agentkit.core.fallback import EMPTY_LLM_RESPONSE from agentkit.session.manager import SessionManager logger = logging.getLogger(__name__) @@ -41,18 +42,12 @@ router = APIRouter(tags=["portal"]) _api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False) _api_key_query = APIKeyQuery(name="api_key", auto_error=False) -_EMPTY_RESPONSE_FALLBACK = ( - "模型未返回有效内容,已尝试备用模型仍未成功。" - "可能原因:1) 服务暂时过载;2) 请求内容超出模型处理范围。" - "建议:1) 稍后重试;2) 简化或缩短输入内容。" -) - def _ensure_non_empty(text: str | None) -> str: """Ensure response text is never empty or whitespace-only.""" if text and text.strip(): return text - return _EMPTY_RESPONSE_FALLBACK + return EMPTY_LLM_RESPONSE async def _verify_api_key( @@ -117,16 +112,21 @@ class ConversationStore: """Set or update the session manager for persistence.""" self._session_manager = sm - async def restore_from_store(self) -> None: + async def restore_from_store( + self, + max_sessions: int = 50, + max_messages_per_session: int = 100, + ) -> None: """Restore recent conversations from SessionManager on startup. Loads the most recent sessions and their messages so that ConversationStore is populated after a server restart. + Limits are applied to prevent memory exhaustion on startup. """ if self._session_manager is None: return try: - sessions = await self._session_manager.list_sessions(limit=self._max) + sessions = await self._session_manager.list_sessions(limit=max_sessions) for session in sessions: sid = session.session_id if sid in self._conversations: @@ -137,7 +137,9 @@ class ConversationStore: created_at=session.created_at, updated_at=session.updated_at, ) - messages = await self._session_manager.get_messages(sid) + messages = await self._session_manager.get_messages( + sid, limit=max_messages_per_session + ) for msg in messages: conv.messages.append(ChatMessage( role=msg.role.value, @@ -312,7 +314,7 @@ class ChatRequest(BaseModel): class ChatResponse(BaseModel): conversation_id: str message: str - timestamp: str = "" + timestamp: str matched_skill: str | None = None routing_method: str | None = None confidence: float | None = None @@ -1050,7 +1052,7 @@ async def portal_websocket(websocket: WebSocket): ) await _conversation_store.add_message(conv.id, "assistant", response_text) - outcome = "success" if response_text != _EMPTY_RESPONSE_FALLBACK else "failure" + outcome = "success" if response_text != EMPTY_LLM_RESPONSE else "failure" await websocket.send_json( { "type": "result", diff --git a/src/agentkit/tools/shell.py b/src/agentkit/tools/shell.py index ff17f8b..ba84377 100644 --- a/src/agentkit/tools/shell.py +++ b/src/agentkit/tools/shell.py @@ -357,6 +357,11 @@ class ShellTool(Tool): if len(output) > self._max_output_length: output = output[: self._max_output_length] + "\n... [输出已截断]" + # Ensure non-empty output for successful commands (all execution modes) + if result.exit_code == 0 and not output.strip(): + from agentkit.core.fallback import SHELL_NO_OUTPUT + output = SHELL_NO_OUTPUT + return { "output": output, "exit_code": result.exit_code, @@ -404,9 +409,6 @@ class ShellTool(Tool): else: output = stdout.decode("utf-8", errors="replace") if stdout else "" exit_code = proc.returncode if proc.returncode is not None else 0 - # Ensure non-empty output for successful commands - if exit_code == 0 and not output.strip(): - output = "[命令执行成功,无输出内容]" except Exception as e: output = str(e) exit_code = -1 diff --git a/src/agentkit/tools/web_search.py b/src/agentkit/tools/web_search.py index 87f0309..0c059d8 100644 --- a/src/agentkit/tools/web_search.py +++ b/src/agentkit/tools/web_search.py @@ -91,6 +91,7 @@ class WebSearchTool(Tool): "backend": {"type": "string", "description": "使用的搜索后端"}, "success": {"type": "boolean", "description": "是否成功"}, "error": {"type": "string", "description": "错误信息(仅失败时)"}, + "fallback_message": {"type": "string", "description": "搜索无结果时的提示信息"}, }, }