feat(streaming): Phase C - LLM streaming + ReAct events + SSE endpoint

U8: StreamChunk protocol + OpenAI chat_stream + Gateway streaming with usage tracking U9: ReActEvent dataclass + execute_stream() yielding thinking/tool_call/tool_result/final_answer U10: POST /tasks/stream SSE endpoint + Client SDK stream_task() 15 new tests passing, no regression.
2026-06-06 11:54:17 +08:00 · 2026-06-06 11:54:17 +08:00 · 2844eeb548
parent ec0e221beb
commit 2844eeb548
9 changed files with 908 additions and 3 deletions
--- a/configs/geo_tools.py
+++ b/configs/geo_tools.py
@ -462,4 +462,4 @@ def register_geo_tools(registry: ToolRegistry) -> None:
        tags=["knowledge", "deai"],
    ))
-    logger.info(f"GEO tools registered: {len(registry.list_all_tools())} tools")
+    logger.info(f"GEO tools registered: {len(registry.list_tools())} tools")
--- a/pyproject.toml
+++ b/pyproject.toml
@ -26,6 +26,7 @@ dependencies = [
 server = [
    "fastapi>=0.110",
    "uvicorn>=0.27",
    "sse-starlette>=2.0",
 ]
 mcp = [
    "mcp>=1.0",
--- a/src/agentkit/core/react.py
+++ b/src/agentkit/core/react.py
@ -8,6 +8,7 @@ import json
 import logging
 import re
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from typing import Any
 from agentkit.llm.gateway import LLMGateway
@ -39,6 +40,16 @@ class ReActResult:
    total_tokens: int
@dataclass
 class ReActEvent:
    """ReAct 执行事件"""
    event_type: str  # "thinking", "tool_call", "tool_result", "final_answer", "error"
    step: int
    data: dict[str, Any] = field(default_factory=dict)
    timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
 class ReActEngine:
    """ReAct 推理-行动循环引擎
@ -186,6 +197,172 @@ class ReActEngine:
            total_tokens=total_tokens,
        )
    async def execute_stream(
        self,
        messages: list[dict[str, str]],
        tools: list[Tool] | None = None,
        model: str = "default",
        agent_name: str = "",
        task_type: str = "",
        system_prompt: str | None = None,
    ):
        """Execute ReAct loop, yielding ReActEvent objects.
        Same logic as execute() but yields events at each step instead of
        accumulating a result.
        """
        tools = tools or []
        tool_schemas = self._build_tool_schemas(tools) if tools else None
        conversation: list[dict[str, Any]] = []
        if system_prompt:
            conversation.append({"role": "system", "content": system_prompt})
        conversation.extend(messages)
        trajectory: list[ReActStep] = []
        total_tokens = 0
        step = 0
        output = ""
        while step < self._max_steps:
            step += 1
            # Yield thinking event
            yield ReActEvent(
                event_type="thinking",
                step=step,
                data={"message": f"Step {step}: Calling LLM..."},
            )
            # Think: call LLM
            response = await self._llm_gateway.chat(
                messages=conversation,
                model=model,
                agent_name=agent_name,
                task_type=task_type,
                tools=tool_schemas,
            )
            step_tokens = response.usage.total_tokens
            total_tokens += step_tokens
            if response.has_tool_calls:
                # Record assistant message
                assistant_msg: dict[str, Any] = {
                    "role": "assistant",
                    "content": response.content or "",
                    "tool_calls": [
                        {
                            "id": tc.id,
                            "type": "function",
                            "function": {
                                "name": tc.name,
                                "arguments": json.dumps(tc.arguments),
                            },
                        }
                        for tc in response.tool_calls
                    ],
                }
                conversation.append(assistant_msg)
                for tc in response.tool_calls:
                    # Yield tool_call event
                    yield ReActEvent(
                        event_type="tool_call",
                        step=step,
                        data={"tool_name": tc.name, "arguments": tc.arguments},
                    )
                    tool_result = await self._execute_tool(tc.name, tc.arguments, tools)
                    react_step = ReActStep(
                        step=step,
                        action="tool_call",
                        tool_name=tc.name,
                        arguments=tc.arguments,
                        result=tool_result,
                        tokens=step_tokens,
                    )
                    trajectory.append(react_step)
                    # Yield tool_result event
                    yield ReActEvent(
                        event_type="tool_result",
                        step=step,
                        data={"tool_name": tc.name, "result": tool_result},
                    )
                    tool_msg = self._build_tool_result_message(tc.id, tool_result)
                    conversation.append(tool_msg)
            else:
                # Check text parsing mode
                parsed_calls = self._parse_text_tool_calls(response.content or "")
                if parsed_calls and tools:
                    conversation.append({"role": "assistant", "content": response.content})
                    for pc in parsed_calls:
                        yield ReActEvent(
                            event_type="tool_call",
                            step=step,
                            data={"tool_name": pc["name"], "arguments": pc["arguments"]},
                        )
                        tool_result = await self._execute_tool(pc["name"], pc["arguments"], tools)
                        trajectory.append(ReActStep(
                            step=step,
                            action="tool_call",
                            tool_name=pc["name"],
                            arguments=pc["arguments"],
                            result=tool_result,
                            tokens=step_tokens,
                        ))
                        yield ReActEvent(
                            event_type="tool_result",
                            step=step,
                            data={"tool_name": pc["name"], "result": tool_result},
                        )
                        tool_msg = self._build_tool_result_message(
                            pc.get("id", f"text_tc_{step}"), tool_result
                        )
                        conversation.append(tool_msg)
                else:
                    # Final answer
                    react_step = ReActStep(
                        step=step,
                        action="final_answer",
                        content=response.content,
                        tokens=step_tokens,
                    )
                    trajectory.append(react_step)
                    output = response.content or ""
                    yield ReActEvent(
                        event_type="final_answer",
                        step=step,
                        data={
                            "output": output,
                            "total_steps": len(trajectory),
                            "total_tokens": total_tokens,
                        },
                    )
                    break
        if step >= self._max_steps and not output:
            if trajectory and trajectory[-1].content:
                output = trajectory[-1].content
            elif trajectory and trajectory[-1].result is not None:
                output = str(trajectory[-1].result)
            else:
                output = response.content or ""
            yield ReActEvent(
                event_type="final_answer",
                step=step,
                data={
                    "output": output,
                    "total_steps": len(trajectory),
                    "total_tokens": total_tokens,
                    "max_steps_reached": True,
                },
            )
    def _build_tool_schemas(self, tools: list[Tool]) -> list[dict]:
        """将 Tool 对象转换为 OpenAI Function Calling schema 格式"""
        schemas = []
--- a/src/agentkit/llm/gateway.py
+++ b/src/agentkit/llm/gateway.py
@ -5,7 +5,7 @@ import time
 from agentkit.core.exceptions import LLMProviderError, ModelNotFoundError
 from agentkit.llm.config import LLMConfig
-from agentkit.llm.protocol import LLMProvider, LLMRequest, LLMResponse, TokenUsage
+from agentkit.llm.protocol import LLMProvider, LLMRequest, LLMResponse, StreamChunk, TokenUsage
 from agentkit.llm.providers.tracker import UsageSummary, UsageTracker
 logger = logging.getLogger(__name__)
@ -97,6 +97,62 @@ class LLMGateway:
        return response
    async def chat_stream(
        self,
        messages: list[dict[str, str]],
        model: str,
        agent_name: str = "",
        task_type: str = "",
        tools: list[dict] | None = None,
        tool_choice: str = "auto",
        **kwargs,
    ):
        """Stream chat response, yielding StreamChunk objects"""
        resolved_model = self._resolve_model_alias(model)
        if not self._providers:
            raise LLMProviderError("", "No provider registered")
        try:
            provider, actual_model = self._resolve_model(resolved_model)
        except ModelNotFoundError as e:
            raise LLMProviderError("", str(e)) from e
        request = LLMRequest(
            messages=messages,
            model=actual_model,
            tools=tools,
            tool_choice=tool_choice,
            **kwargs,
        )
        start = time.monotonic()
        total_content = ""
        final_usage = None
        final_model = resolved_model
        async for chunk in provider.chat_stream(request):
            if chunk.content:
                total_content += chunk.content
            if chunk.usage:
                final_usage = chunk.usage
            if chunk.model:
                final_model = chunk.model
            yield chunk
        # Track usage after stream completes
        latency_ms = (time.monotonic() - start) * 1000
        if final_usage is None:
            final_usage = TokenUsage()
        cost = self._calculate_cost(final_model, final_usage)
        self._usage_tracker.record(
            agent_name=agent_name,
            model=final_model,
            usage=final_usage,
            cost=cost,
            latency_ms=latency_ms,
        )
    def _resolve_model_alias(self, model: str) -> str:
        """解析模型别名"""
        if model in self._config.model_aliases:
--- a/src/agentkit/llm/protocol.py
+++ b/src/agentkit/llm/protocol.py
@ -56,6 +56,17 @@ class LLMRequest:
        self._extra = kwargs
@dataclass
 class StreamChunk:
    """LLM 流式响应块"""
    content: str  # Delta content
    model: str
    tool_calls: list[ToolCall] = field(default_factory=list)  # Accumulated tool calls (only in final chunk)
    usage: TokenUsage | None = None  # Only in final chunk
    is_final: bool = False  # True for the last chunk
@dataclass
 class LLMResponse:
    """LLM 响应"""
@ -78,3 +89,18 @@ class LLMProvider(ABC):
    async def chat(self, request: LLMRequest) -> LLMResponse:
        """发送 chat 请求并返回响应"""
        ...
    async def chat_stream(self, request: LLMRequest):
        """Stream chat response. Override in subclasses that support streaming.
        Yields StreamChunk objects. Default implementation falls back to
        non-streaming chat and yields a single chunk.
        """
        response = await self.chat(request)
        yield StreamChunk(
            content=response.content,
            model=response.model,
            tool_calls=response.tool_calls,
            usage=response.usage,
            is_final=True,
        )
--- a/src/agentkit/llm/providers/openai.py
+++ b/src/agentkit/llm/providers/openai.py
@ -7,7 +7,7 @@ import time
 import httpx
 from agentkit.core.exceptions import LLMProviderError
-from agentkit.llm.protocol import LLMProvider, LLMRequest, LLMResponse, TokenUsage, ToolCall
+from agentkit.llm.protocol import LLMProvider, LLMRequest, LLMResponse, StreamChunk, TokenUsage, ToolCall
 logger = logging.getLogger(__name__)
@ -100,3 +100,108 @@ class OpenAICompatibleProvider(LLMProvider):
            tool_calls=tool_calls,
            latency_ms=latency_ms,
        )
    async def chat_stream(self, request: LLMRequest):
        """Stream chat response using SSE"""
        url = f"{self._base_url}/chat/completions"
        headers = {
            "Authorization": f"Bearer {self._api_key}",
            "Content-Type": "application/json",
        }
        payload: dict = {
            "model": request.model,
            "messages": request.messages,
            "temperature": request.temperature,
            "max_tokens": request.max_tokens,
            "stream": True,
            "stream_options": {"include_usage": True},
        }
        if request.tools:
            payload["tools"] = request.tools
            payload["tool_choice"] = request.tool_choice
        async with self._client.stream("POST", url, json=payload, headers=headers) as response:
            if response.status_code != 200:
                error_text = await response.aread()
                raise LLMProviderError("openai", f"HTTP {response.status_code}")
            accumulated_tool_calls: dict[int, dict] = {}  # index -> {id, name, arguments_str}
            async for line in response.aiter_lines():
                line = line.strip()
                if not line or not line.startswith("data: "):
                    continue
                data_str = line[6:]  # Remove "data: " prefix
                if data_str == "[DONE]":
                    break
                try:
                    data = json.loads(data_str)
                except json.JSONDecodeError:
                    continue
                choices = data.get("choices", [])
                if not choices:
                    # Usage-only chunk
                    usage_data = data.get("usage")
                    if usage_data:
                        yield StreamChunk(
                            content="",
                            model=data.get("model", request.model),
                            usage=TokenUsage(
                                prompt_tokens=usage_data.get("prompt_tokens", 0),
                                completion_tokens=usage_data.get("completion_tokens", 0),
                            ),
                            is_final=True,
                        )
                    continue
                delta = choices[0].get("delta", {})
                content = delta.get("content", "")
                # Accumulate tool calls from streaming
                raw_tool_calls = delta.get("tool_calls")
                if raw_tool_calls:
                    for tc in raw_tool_calls:
                        idx = tc.get("index", 0)
                        if idx not in accumulated_tool_calls:
                            accumulated_tool_calls[idx] = {
                                "id": tc.get("id", ""),
                                "name": "",
                                "arguments_str": "",
                            }
                        if tc.get("id"):
                            accumulated_tool_calls[idx]["id"] = tc["id"]
                        func = tc.get("function", {})
                        if func.get("name"):
                            accumulated_tool_calls[idx]["name"] = func["name"]
                        if func.get("arguments"):
                            accumulated_tool_calls[idx]["arguments_str"] += func["arguments"]
                # Only yield content chunks (not empty deltas)
                if content:
                    yield StreamChunk(
                        content=content,
                        model=data.get("model", request.model),
                    )
            # If we accumulated tool calls, yield them as a final chunk
            if accumulated_tool_calls:
                tool_calls = []
                for idx in sorted(accumulated_tool_calls.keys()):
                    tc_data = accumulated_tool_calls[idx]
                    try:
                        arguments = json.loads(tc_data["arguments_str"]) if tc_data["arguments_str"] else {}
                    except json.JSONDecodeError:
                        arguments = {"raw": tc_data["arguments_str"]}
                    tool_calls.append(ToolCall(
                        id=tc_data["id"],
                        name=tc_data["name"],
                        arguments=arguments,
                    ))
                yield StreamChunk(
                    content="",
                    model=request.model,
                    tool_calls=tool_calls,
                    is_final=True,
                )
--- a/src/agentkit/server/client.py
+++ b/src/agentkit/server/client.py
@ -126,6 +126,38 @@ class AgentKitClient:
        response.raise_for_status()
        return response.json()
    async def stream_task(
        self,
        input_data: dict,
        skill_name: str | None = None,
        agent_name: str | None = None,
    ):
        """Stream task execution events via SSE.
        Yields event dicts with 'event' and 'data' keys.
        """
        payload: dict[str, Any] = {"input_data": input_data}
        if skill_name:
            payload["skill_name"] = skill_name
        if agent_name:
            payload["agent_name"] = agent_name
        async with self._client.stream(
            "POST", "/api/v1/tasks/stream", json=payload
        ) as response:
            response.raise_for_status()
            event_type = ""
            async for line in response.aiter_lines():
                line = line.strip()
                if not line:
                    continue
                if line.startswith("event: "):
                    event_type = line[7:]
                elif line.startswith("data: "):
                    import json as _json
                    data = _json.loads(line[6:])
                    yield {"event": event_type, "data": data}
    async def close(self) -> None:
        """Close the HTTP client"""
        await self._client.aclose()
--- a/src/agentkit/server/routes/tasks.py
+++ b/src/agentkit/server/routes/tasks.py
@ -1,5 +1,6 @@
 """Task submission routes"""
 import json
 import uuid
 from datetime import datetime, timezone
@ -191,3 +192,79 @@ async def cancel_task(task_id: str, req: Request):
    if not cancelled:
        raise HTTPException(status_code=400, detail="Task cannot be cancelled (not running or not found)")
    return {"task_id": task_id, "status": "cancelled"}
@router.post("/tasks/stream")
 async def stream_task(request: SubmitTaskRequest, req: Request):
    """Submit a task and stream ReAct events via SSE"""
    from sse_starlette.sse import EventSourceResponse
    pool = req.app.state.agent_pool
    skill_registry = req.app.state.skill_registry
    intent_router = req.app.state.intent_router
    agent = None
    # Same agent resolution logic as submit_task
    if request.agent_name:
        agent = pool.get_agent(request.agent_name)
        if agent is None:
            raise HTTPException(
                status_code=404,
                detail=f"Agent '{request.agent_name}' not found",
            )
    elif request.skill_name:
        try:
            skill_registry.get(request.skill_name)
        except Exception:
            raise HTTPException(
                status_code=404,
                detail=f"Skill '{request.skill_name}' not found",
            )
        agent = pool.get_agent(request.skill_name)
        if agent is None:
            agent = await pool.create_agent_from_skill(request.skill_name)
    else:
        all_skills = skill_registry.list_skills()
        if not all_skills:
            raise HTTPException(
                status_code=400,
                detail="No skills registered and no skill_name or agent_name specified",
            )
        try:
            routing_result = await intent_router.route(request.input_data, all_skills)
            skill_registry.get(routing_result.matched_skill)
            agent = pool.get_agent(routing_result.matched_skill)
            if agent is None:
                agent = await pool.create_agent_from_skill(routing_result.matched_skill)
        except (ValueError, RuntimeError) as e:
            raise HTTPException(status_code=400, detail=str(e))
    async def event_generator():
        from agentkit.core.react import ReActEngine
        react_engine = ReActEngine(llm_gateway=req.app.state.llm_gateway)
        # Build messages from input
        messages = [{"role": "user", "content": str(request.input_data)}]
        # Get tools from agent
        tools = list(agent._tool_registry._tools.values()) if agent._tool_registry else []
        async for event in react_engine.execute_stream(
            messages=messages,
            tools=tools,
            model=agent._llm_model if hasattr(agent, "_llm_model") else "default",
            agent_name=agent.name,
            system_prompt=agent._system_prompt if hasattr(agent, "_system_prompt") else None,
        ):
            yield {
                "event": event.event_type,
                "data": json.dumps({
                    "step": event.step,
                    "data": event.data,
                    "timestamp": event.timestamp,
                }),
            }
    return EventSourceResponse(event_generator())
--- a/tests/unit/test_streaming.py
+++ b/tests/unit/test_streaming.py
@ -0,0 +1,431 @@
 """Streaming System 单元测试 - U8/U9/U10
 覆盖:
 - StreamChunk 数据类
 - LLMProvider.chat_stream 默认回退
 - Gateway.chat_stream 流式 + 用量追踪
 - ReActEvent 数据类
 - ReActEngine.execute_stream 事件流
 - SSE 端点 /tasks/stream
 """
 import json
 from unittest.mock import AsyncMock, MagicMock, patch
 import pytest
 from agentkit.llm.protocol import LLMRequest, LLMResponse, TokenUsage, ToolCall
 from agentkit.tools.base import Tool
 # ── Test Helpers ──────────────────────────────────────────
 class FakeTool(Tool):
    """用于测试的 Fake Tool"""
    def __init__(
        self,
        name: str = "fake_tool",
        description: str = "A fake tool for testing",
        result: dict | None = None,
    ):
        super().__init__(name=name, description=description)
        self._result = result or {"status": "ok"}
    async def execute(self, **kwargs) -> dict:
        return self._result
 def make_response(
    content: str = "",
    tool_calls: list[ToolCall] | None = None,
    prompt_tokens: int = 10,
    completion_tokens: int = 20,
 ) -> LLMResponse:
    """快速构造 LLMResponse"""
    return LLMResponse(
        content=content,
        model="test-model",
        usage=TokenUsage(
            prompt_tokens=prompt_tokens,
            completion_tokens=completion_tokens,
        ),
        tool_calls=tool_calls or [],
    )
 # ══════════════════════════════════════════════════════════
 # U8: StreamChunk + chat_stream
 # ══════════════════════════════════════════════════════════
 class TestStreamChunk:
    """StreamChunk 数据类测试"""
    def test_creation_with_content(self):
        from agentkit.llm.protocol import StreamChunk
        chunk = StreamChunk(content="Hello", model="gpt-4o")
        assert chunk.content == "Hello"
        assert chunk.model == "gpt-4o"
        assert chunk.tool_calls == []
        assert chunk.usage is None
        assert chunk.is_final is False
    def test_with_tool_calls_and_is_final(self):
        from agentkit.llm.protocol import StreamChunk
        tc = ToolCall(id="tc_1", name="search", arguments={"q": "test"})
        chunk = StreamChunk(
            content="",
            model="gpt-4o",
            tool_calls=[tc],
            is_final=True,
        )
        assert len(chunk.tool_calls) == 1
        assert chunk.tool_calls[0].name == "search"
        assert chunk.is_final is True
    def test_with_usage(self):
        from agentkit.llm.protocol import StreamChunk
        usage = TokenUsage(prompt_tokens=100, completion_tokens=50)
        chunk = StreamChunk(
            content="",
            model="gpt-4o",
            usage=usage,
            is_final=True,
        )
        assert chunk.usage is not None
        assert chunk.usage.total_tokens == 150
        assert chunk.is_final is True
 class TestLLMProviderChatStreamDefault:
    """LLMProvider.chat_stream 默认实现回退到 chat()"""
    async def test_default_chat_stream_yields_single_chunk(self):
        from agentkit.llm.protocol import LLMProvider, StreamChunk
        class SimpleProvider(LLMProvider):
            async def chat(self, request: LLMRequest) -> LLMResponse:
                return LLMResponse(
                    content="hello",
                    model="test",
                    usage=TokenUsage(prompt_tokens=5, completion_tokens=10),
                )
        provider = SimpleProvider()
        request = LLMRequest(
            messages=[{"role": "user", "content": "hi"}],
            model="test",
        )
        chunks = []
        async for chunk in provider.chat_stream(request):
            chunks.append(chunk)
        assert len(chunks) == 1
        assert chunks[0].content == "hello"
        assert chunks[0].is_final is True
        assert chunks[0].usage.total_tokens == 15
 class TestGatewayChatStream:
    """Gateway.chat_stream 流式测试"""
    async def test_yields_chunks_from_provider(self):
        from agentkit.llm.protocol import StreamChunk
        from agentkit.llm.gateway import LLMGateway
        from agentkit.llm.protocol import LLMProvider
        class StreamingProvider(LLMProvider):
            async def chat(self, request: LLMRequest) -> LLMResponse:
                return LLMResponse(
                    content="fallback",
                    model="test",
                    usage=TokenUsage(),
                )
            async def chat_stream(self, request: LLMRequest):
                yield StreamChunk(content="Hello ", model="test")
                yield StreamChunk(content="World", model="test")
                yield StreamChunk(
                    content="",
                    model="test",
                    usage=TokenUsage(prompt_tokens=10, completion_tokens=5),
                    is_final=True,
                )
        gateway = LLMGateway()
        gateway.register_provider("test", StreamingProvider())
        chunks = []
        async for chunk in gateway.chat_stream(
            messages=[{"role": "user", "content": "hi"}],
            model="test/model",
        ):
            chunks.append(chunk)
        assert len(chunks) == 3
        assert chunks[0].content == "Hello "
        assert chunks[1].content == "World"
        assert chunks[2].is_final is True
    async def test_tracks_usage_after_stream_completes(self):
        from agentkit.llm.protocol import StreamChunk
        from agentkit.llm.gateway import LLMGateway
        from agentkit.llm.protocol import LLMProvider
        class StreamingProvider(LLMProvider):
            async def chat(self, request: LLMRequest) -> LLMResponse:
                return LLMResponse(
                    content="fallback",
                    model="test",
                    usage=TokenUsage(),
                )
            async def chat_stream(self, request: LLMRequest):
                yield StreamChunk(content="Hi", model="test")
                yield StreamChunk(
                    content="",
                    model="test",
                    usage=TokenUsage(prompt_tokens=100, completion_tokens=50),
                    is_final=True,
                )
        gateway = LLMGateway()
        gateway.register_provider("test", StreamingProvider())
        # Consume the stream
        chunks = []
        async for chunk in gateway.chat_stream(
            messages=[{"role": "user", "content": "hi"}],
            model="test/model",
            agent_name="stream_agent",
        ):
            chunks.append(chunk)
        # Verify usage was tracked
        usage = gateway.get_usage()
        assert usage.total_tokens == 150
 # ══════════════════════════════════════════════════════════
 # U9: ReActEvent + execute_stream
 # ══════════════════════════════════════════════════════════
 class TestReActEvent:
    """ReActEvent 数据类测试"""
    def test_creation_with_event_type_and_step(self):
        from agentkit.core.react import ReActEvent
        event = ReActEvent(event_type="thinking", step=1)
        assert event.event_type == "thinking"
        assert event.step == 1
        assert event.data == {}
    def test_has_timestamp(self):
        from agentkit.core.react import ReActEvent
        event = ReActEvent(event_type="thinking", step=1)
        assert event.timestamp is not None
        assert len(event.timestamp) > 0
    def test_with_data(self):
        from agentkit.core.react import ReActEvent
        event = ReActEvent(
            event_type="tool_call",
            step=2,
            data={"tool_name": "search", "arguments": {"q": "test"}},
        )
        assert event.data["tool_name"] == "search"
 class TestReActEngineExecuteStream:
    """ReActEngine.execute_stream 事件流测试"""
    async def test_yields_thinking_event_at_each_step(self):
        from agentkit.core.react import ReActEngine, ReActEvent
        gateway = MagicMock()
        gateway.chat = AsyncMock(return_value=make_response(content="Final answer"))
        engine = ReActEngine(llm_gateway=gateway)
        events = []
        async for event in engine.execute_stream(
            messages=[{"role": "user", "content": "Hello"}],
        ):
            events.append(event)
        # Should have thinking + final_answer
        thinking_events = [e for e in events if e.event_type == "thinking"]
        assert len(thinking_events) >= 1
        assert thinking_events[0].step == 1
    async def test_yields_tool_call_and_tool_result_events(self):
        from agentkit.core.react import ReActEngine, ReActEvent
        tool = FakeTool(name="calculator", result={"value": 42})
        gateway = MagicMock()
        gateway.chat = AsyncMock(side_effect=[
            make_response(
                content="",
                tool_calls=[ToolCall(id="tc_1", name="calculator", arguments={"expr": "6*7"})],
            ),
            make_response(content="The result is 42"),
        ])
        engine = ReActEngine(llm_gateway=gateway)
        events = []
        async for event in engine.execute_stream(
            messages=[{"role": "user", "content": "Calculate"}],
            tools=[tool],
        ):
            events.append(event)
        tool_call_events = [e for e in events if e.event_type == "tool_call"]
        tool_result_events = [e for e in events if e.event_type == "tool_result"]
        assert len(tool_call_events) == 1
        assert tool_call_events[0].data["tool_name"] == "calculator"
        assert len(tool_result_events) == 1
        assert tool_result_events[0].data["tool_name"] == "calculator"
        assert tool_result_events[0].data["result"] == {"value": 42}
    async def test_yields_final_answer_event(self):
        from agentkit.core.react import ReActEngine, ReActEvent
        gateway = MagicMock()
        gateway.chat = AsyncMock(return_value=make_response(content="The answer is 42"))
        engine = ReActEngine(llm_gateway=gateway)
        events = []
        async for event in engine.execute_stream(
            messages=[{"role": "user", "content": "What is the answer?"}],
        ):
            events.append(event)
        final_events = [e for e in events if e.event_type == "final_answer"]
        assert len(final_events) == 1
        assert final_events[0].data["output"] == "The answer is 42"
        assert final_events[0].data["total_steps"] >= 1
        assert final_events[0].data["total_tokens"] > 0
    async def test_yields_max_steps_reached_when_hitting_limit(self):
        from agentkit.core.react import ReActEngine, ReActEvent
        tool = FakeTool(name="search", result={"results": ["data"]})
        always_tool_response = make_response(
            content="Thinking...",
            tool_calls=[ToolCall(id="tc_loop", name="search", arguments={"query": "more"})],
        )
        gateway = MagicMock()
        gateway.chat = AsyncMock(return_value=always_tool_response)
        engine = ReActEngine(llm_gateway=gateway, max_steps=3)
        events = []
        async for event in engine.execute_stream(
            messages=[{"role": "user", "content": "Keep searching"}],
            tools=[tool],
        ):
            events.append(event)
        final_events = [e for e in events if e.event_type == "final_answer"]
        assert len(final_events) == 1
        assert final_events[0].data.get("max_steps_reached") is True
 # ══════════════════════════════════════════════════════════
 # U10: SSE Endpoint + Client SDK
 # ══════════════════════════════════════════════════════════
 class TestSSEEndpoint:
    """SSE /tasks/stream 端点测试"""
    def test_stream_task_returns_event_source_response(self):
        from fastapi.testclient import TestClient
        from agentkit.server.app import create_app
        from agentkit.llm.gateway import LLMGateway
        from agentkit.skills.registry import SkillRegistry
        from agentkit.tools.registry import ToolRegistry
        gateway = LLMGateway()
        mock_provider = AsyncMock()
        mock_provider.chat.return_value = LLMResponse(
            content="Final answer",
            model="test-model",
            usage=TokenUsage(prompt_tokens=10, completion_tokens=20),
        )
        gateway.register_provider("test", mock_provider)
        skill_registry = SkillRegistry()
        tool_registry = ToolRegistry()
        app = create_app(
            llm_gateway=gateway,
            skill_registry=skill_registry,
            tool_registry=tool_registry,
        )
        client = TestClient(app)
        # Create an agent first
        client.post(
            "/api/v1/agents",
            json={
                "config": {
                    "name": "stream_agent",
                    "agent_type": "test",
                    "task_mode": "llm_generate",
                    "prompt": {"identity": "Stream Agent"},
                }
            },
        )
        # Stream task
        response = client.post(
            "/api/v1/tasks/stream",
            json={
                "input_data": {"query": "test"},
                "agent_name": "stream_agent",
            },
        )
        # Should return 200 with SSE content type
        assert response.status_code == 200
        assert "text/event-stream" in response.headers.get("content-type", "")
    def test_stream_task_with_invalid_agent_returns_404(self):
        from fastapi.testclient import TestClient
        from agentkit.server.app import create_app
        from agentkit.llm.gateway import LLMGateway
        from agentkit.skills.registry import SkillRegistry
        from agentkit.tools.registry import ToolRegistry
        gateway = LLMGateway()
        skill_registry = SkillRegistry()
        tool_registry = ToolRegistry()
        app = create_app(
            llm_gateway=gateway,
            skill_registry=skill_registry,
            tool_registry=tool_registry,
        )
        client = TestClient(app)
        response = client.post(
            "/api/v1/tasks/stream",
            json={
                "input_data": {"query": "test"},
                "agent_name": "nonexistent_agent",
            },
        )
        assert response.status_code == 404