feat(tools): add AskHumanTool + token streaming in ReAct execute_stream

- AskHumanTool: Human-in-the-Loop tool for Chat mode, pushes questions via WebSocket callback and waits for user reply via asyncio.Future - Token streaming: execute_stream() now uses chat_stream() instead of chat(), yielding token-type ReActEvents for each StreamChunk - _build_response_from_stream() static method constructs LLMResponse from accumulated stream data - Export AskHumanTool from tools/__init__.py - 12 new tests (7 AskHumanTool + 5 token streaming), all passing
2026-06-07 23:40:43 +08:00 · 2026-06-07 23:40:43 +08:00 · 7054ac02b6
parent 6013d5189b
commit 7054ac02b6
5 changed files with 398 additions and 3 deletions
--- a/src/agentkit/core/react.py
+++ b/src/agentkit/core/react.py
@ -16,6 +16,7 @@ from typing import TYPE_CHECKING, Any
 from agentkit.core.exceptions import TaskCancelledError, TaskTimeoutError
 from agentkit.core.protocol import CancellationToken
 from agentkit.llm.gateway import LLMGateway
 from agentkit.llm.protocol import LLMResponse
 from agentkit.tools.base import Tool
 from agentkit.telemetry.tracing import get_tracer, start_span, _OTEL_AVAILABLE
 from agentkit.telemetry.metrics import (
@ -59,7 +60,7 @@ class ReActResult:
 class ReActEvent:
    """ReAct 执行事件"""
-    event_type: str  # "thinking", "tool_call", "tool_result", "final_answer", "error"
+    event_type: str  # "thinking", "token", "tool_call", "tool_result", "final_answer", "error"
    step: int
    data: dict[str, Any] = field(default_factory=dict)
    timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
@ -533,14 +534,42 @@ class ReActEngine:
                    data={"message": f"Step {step}: Calling LLM..."},
                )
-                # Think: call LLM
+                # Think: call LLM (with optional token streaming)
                llm_start = time.monotonic()
-                response = await self._llm_gateway.chat(
+
                # Use streaming for token-by-token output
                stream_content = ""
                stream_usage = None
                stream_tool_calls: list[Any] = []
                stream_model = model
                async for chunk in self._llm_gateway.chat_stream(
                    messages=conversation,
                    model=model,
                    agent_name=agent_name,
                    task_type=task_type,
                    tools=tool_schemas,
                ):
                    if chunk.content:
                        stream_content += chunk.content
                        yield ReActEvent(
                            event_type="token",
                            step=step,
                            data={"content": chunk.content},
                        )
                    if chunk.usage:
                        stream_usage = chunk.usage
                    if chunk.tool_calls:
                        stream_tool_calls = chunk.tool_calls
                    if chunk.model:
                        stream_model = chunk.model
                # Build response-like object from stream
                response = self._build_response_from_stream(
                    content=stream_content,
                    tool_calls=stream_tool_calls,
                    usage=stream_usage,
                    model=stream_model,
                )
                llm_duration_ms = int((time.monotonic() - llm_start) * 1000)
@ -776,6 +805,24 @@ class ReActEngine:
            schemas.append(schema)
        return schemas
    @staticmethod
    def _build_response_from_stream(
        content: str,
        tool_calls: list[Any],
        usage: Any,
        model: str,
    ) -> LLMResponse:
        """Build an LLMResponse from accumulated stream chunks."""
        from agentkit.llm.protocol import LLMResponse, TokenUsage
        if usage is None:
            usage = TokenUsage()
        return LLMResponse(
            content=content,
            tool_calls=tool_calls,
            usage=usage,
            model=model,
        )
    def _find_tool(self, name: str, tools: list[Tool]) -> Tool | None:
        """根据名称从可用工具中查找工具"""
        for tool in tools:
--- a/src/agentkit/tools/init.py
+++ b/src/agentkit/tools/init.py
@ -9,6 +9,7 @@ from agentkit.tools.composition import SequentialChain, ParallelFanOut, DynamicS
 from agentkit.tools.web_crawl import WebCrawlTool
 from agentkit.tools.schema_tools import SchemaExtractTool, SchemaGenerateTool
 from agentkit.tools.baidu_search import BaiduSearchTool
 from agentkit.tools.ask_human import AskHumanTool
 # Conditional import: HeadroomRetrieveTool requires HeadroomCompressor
 try:
@ -29,5 +30,6 @@ __all__ = [
    "SchemaExtractTool",
    "SchemaGenerateTool",
    "BaiduSearchTool",
    "AskHumanTool",
    "HeadroomRetrieveTool",
 ]
--- a/src/agentkit/tools/ask_human.py
+++ b/src/agentkit/tools/ask_human.py
@ -0,0 +1,119 @@
 """AskHumanTool — Human-in-the-Loop tool for Chat mode.
 When registered in a Chat-mode Agent, this tool allows the ReAct loop
 to pause and ask the user a question, then wait for a reply via the
 WebSocket connection.
 """
 from __future__ import annotations
 import asyncio
 import logging
 import uuid
 from typing import Any
 from agentkit.tools.base import Tool
 logger = logging.getLogger(__name__)
 class AskHumanTool(Tool):
    """Tool that asks the human user a question and waits for a reply.
    Only functional in Chat mode where a WebSocket connection exists.
    In Task mode, this tool should not be registered.
    Usage in ReAct loop:
        The Agent calls this tool when it needs clarification or
        a decision from the user. The question is pushed to the
        client via WebSocket, and the tool blocks until the user
        replies or a timeout expires.
    """
    def __init__(self, timeout: float = 60.0):
        super().__init__(
            name="ask_human",
            description="Ask the human user a question and wait for their reply. "
                        "Use this when you need clarification, a decision, or "
                        "confirmation from the user before proceeding.",
        )
        self._timeout = timeout
        # Shared dict injected by the Chat WebSocket handler:
        # request_id -> asyncio.Future
        self._pending_replies: dict[str, asyncio.Future] | None = None
        # Callback to push question to client
        self._ask_callback: Any = None
    def configure(
        self,
        pending_replies: dict[str, asyncio.Future] | None = None,
        ask_callback: Any = None,
    ) -> None:
        """Configure the tool with WebSocket communication channels.
        Args:
            pending_replies: Dict mapping request_id to Future that will
                be resolved when the user replies.
            ask_callback: Async callable(request_id, question, options)
                that pushes the question to the client.
        """
        self._pending_replies = pending_replies
        self._ask_callback = ask_callback
    @property
    def parameters(self) -> dict[str, Any]:
        return {
            "type": "object",
            "properties": {
                "question": {
                    "type": "string",
                    "description": "The question to ask the user",
                },
                "options": {
                    "type": "array",
                    "items": {"type": "string"},
                    "description": "Optional list of choices for the user",
                },
            },
            "required": ["question"],
        }
    async def execute(self, **kwargs: Any) -> dict:
        """Ask the user a question and wait for their reply.
        Args:
            question: The question to ask.
            options: Optional list of choices.
        Returns:
            Dict with "reply" key containing the user's response.
        """
        question = kwargs.get("question", "")
        options = kwargs.get("options")
        if self._pending_replies is None or self._ask_callback is None:
            # Not in Chat mode — return a default response
            logger.warning("AskHumanTool called outside Chat mode, returning default response")
            default = options[0] if options else "confirmed"
            return {"reply": default}
        request_id = str(uuid.uuid4())[:8]
        # Create and register future BEFORE calling callback so the
        # callback (or any concurrent task) can resolve it immediately.
        loop = asyncio.get_event_loop()
        future = loop.create_future()
        self._pending_replies[request_id] = future
        # Push question to client
        await self._ask_callback(request_id, question, options)
        try:
            reply = await asyncio.wait_for(future, timeout=self._timeout)
            return {"reply": str(reply)}
        except asyncio.TimeoutError:
            logger.warning(f"AskHumanTool timeout for request {request_id}")
            default = options[0] if options else "timeout — no response received"
            return {"reply": default}
        finally:
            self._pending_replies.pop(request_id, None)
--- a/tests/unit/test_ask_human_tool.py
+++ b/tests/unit/test_ask_human_tool.py
@ -0,0 +1,100 @@
 """Tests for AskHumanTool."""
 import asyncio
 import pytest
 from agentkit.tools.ask_human import AskHumanTool
 class TestAskHumanToolBasic:
    def test_tool_properties(self):
        tool = AskHumanTool()
        assert tool.name == "ask_human"
        assert "question" in str(tool.parameters)
        assert tool.parameters["required"] == ["question"]
    @pytest.mark.asyncio
    async def test_no_chat_mode_returns_default(self):
        tool = AskHumanTool()
        result = await tool.execute(question="What should I do?")
        assert result == {"reply": "confirmed"}
    @pytest.mark.asyncio
    async def test_no_chat_mode_with_options(self):
        tool = AskHumanTool()
        result = await tool.execute(question="Choose:", options=["A", "B", "C"])
        assert result == {"reply": "A"}
 class TestAskHumanToolChatMode:
    @pytest.mark.asyncio
    async def test_ask_and_receive_reply(self):
        tool = AskHumanTool(timeout=5.0)
        pending: dict[str, asyncio.Future] = {}
        ask_calls: list[tuple[str, str, list[str] | None]] = []
        async def mock_ask_callback(request_id, question, options):
            ask_calls.append((request_id, question, options))
        tool.configure(pending_replies=pending, ask_callback=mock_ask_callback)
        # Start the execute in a task
        task = asyncio.create_task(
            tool.execute(question="Continue?", options=["yes", "no"])
        )
        # Wait for the ask to be pushed
        await asyncio.sleep(0.1)
        assert len(ask_calls) == 1
        request_id = ask_calls[0][0]
        assert ask_calls[0][1] == "Continue?"
        assert ask_calls[0][2] == ["yes", "no"]
        # Simulate user reply
        assert request_id in pending
        pending[request_id].set_result("yes")
        result = await task
        assert result == {"reply": "yes"}
    @pytest.mark.asyncio
    async def test_timeout_returns_default(self):
        tool = AskHumanTool(timeout=0.1)
        pending: dict[str, asyncio.Future] = {}
        async def mock_ask_callback(request_id, question, options):
            pass  # Never reply
        tool.configure(pending_replies=pending, ask_callback=mock_ask_callback)
        result = await tool.execute(question="Continue?", options=["yes", "no"])
        assert result == {"reply": "yes"}
    @pytest.mark.asyncio
    async def test_timeout_no_options(self):
        tool = AskHumanTool(timeout=0.1)
        pending: dict[str, asyncio.Future] = {}
        async def mock_ask_callback(request_id, question, options):
            pass
        tool.configure(pending_replies=pending, ask_callback=mock_ask_callback)
        result = await tool.execute(question="Continue?")
        assert "timeout" in result["reply"]
    @pytest.mark.asyncio
    async def test_cleanup_on_reply(self):
        tool = AskHumanTool(timeout=5.0)
        pending: dict[str, asyncio.Future] = {}
        async def mock_ask_callback(request_id, question, options):
            # Immediately reply
            await asyncio.sleep(0.05)
            pending[request_id].set_result("ok")
        tool.configure(pending_replies=pending, ask_callback=mock_ask_callback)
        await tool.execute(question="Test?")
        # Pending should be cleaned up
        assert len(pending) == 0
--- a/tests/unit/test_react_token_streaming.py
+++ b/tests/unit/test_react_token_streaming.py
@ -0,0 +1,127 @@
 """Tests for ReAct engine token streaming via execute_stream()."""
 import pytest
 from unittest.mock import AsyncMock, MagicMock
 from agentkit.core.react import ReActEngine, ReActEvent
 from agentkit.llm.gateway import LLMGateway
 from agentkit.llm.protocol import LLMResponse, StreamChunk, TokenUsage
 def _make_stream_chunks(
    content_parts: list[str],
    model: str = "test-model",
    usage: TokenUsage | None = None,
 ) -> list[StreamChunk]:
    """Build a list of StreamChunk objects simulating a streaming response."""
    chunks = []
    for part in content_parts:
        chunks.append(StreamChunk(content=part, model=model))
    # Final chunk with usage
    chunks.append(StreamChunk(
        content="",
        model=model,
        usage=usage or TokenUsage(prompt_tokens=10, completion_tokens=20),
        is_final=True,
    ))
    return chunks
 def _make_stream_gateway(chunks_list: list[list[StreamChunk]]) -> MagicMock:
    """Create a mock LLMGateway whose chat_stream yields the given chunks."""
    gateway = MagicMock(spec=LLMGateway)
    async def _stream(**kwargs):
        for chunks in chunks_list:
            for chunk in chunks:
                yield chunk
        # Remove after use
        chunks_list.pop(0)
    gateway.chat_stream = _stream
    return gateway
 class TestReActTokenStreaming:
    """Test that execute_stream() yields token events from chat_stream()."""
    @pytest.mark.asyncio
    async def test_yields_token_events(self):
        """execute_stream should yield token events for each stream chunk."""
        chunks = _make_stream_chunks(["Hello", " world", "!"])
        gateway = _make_stream_gateway([chunks])
        engine = ReActEngine(llm_gateway=gateway)
        events = []
        async for event in engine.execute_stream(
            messages=[{"role": "user", "content": "Hi"}],
        ):
            events.append(event)
        token_events = [e for e in events if e.event_type == "token"]
        assert len(token_events) == 3
        assert token_events[0].data["content"] == "Hello"
        assert token_events[1].data["content"] == " world"
        assert token_events[2].data["content"] == "!"
    @pytest.mark.asyncio
    async def test_final_answer_after_streaming(self):
        """After streaming tokens, a final_answer event should be yielded."""
        chunks = _make_stream_chunks(["The answer is 42"])
        gateway = _make_stream_gateway([chunks])
        engine = ReActEngine(llm_gateway=gateway)
        events = []
        async for event in engine.execute_stream(
            messages=[{"role": "user", "content": "What?"}],
        ):
            events.append(event)
        final_events = [e for e in events if e.event_type == "final_answer"]
        assert len(final_events) == 1
        assert "42" in final_events[0].data.get("output", "")
    @pytest.mark.asyncio
    async def test_token_events_have_correct_step(self):
        """Token events should carry the current step number."""
        chunks = _make_stream_chunks(["Hi"])
        gateway = _make_stream_gateway([chunks])
        engine = ReActEngine(llm_gateway=gateway)
        events = []
        async for event in engine.execute_stream(
            messages=[{"role": "user", "content": "Hi"}],
        ):
            events.append(event)
        token_events = [e for e in events if e.event_type == "token"]
        for te in token_events:
            assert te.step >= 1
    @pytest.mark.asyncio
    async def test_build_response_from_stream(self):
        """_build_response_from_stream should construct a valid LLMResponse."""
        usage = TokenUsage(prompt_tokens=5, completion_tokens=10)
        response = ReActEngine._build_response_from_stream(
            content="Hello world",
            tool_calls=[],
            usage=usage,
            model="test-model",
        )
        assert response.content == "Hello world"
        assert response.model == "test-model"
        assert response.usage.prompt_tokens == 5
        assert response.usage.completion_tokens == 10
        assert not response.has_tool_calls
    @pytest.mark.asyncio
    async def test_build_response_from_stream_no_usage(self):
        """_build_response_from_stream should handle None usage gracefully."""
        response = ReActEngine._build_response_from_stream(
            content="test",
            tool_calls=[],
            usage=None,
            model="test-model",
        )
        assert response.content == "test"
        assert response.usage.prompt_tokens == 0