998 lines
38 KiB
Python
998 lines
38 KiB
Python
"""ReWOO Engine 单元测试"""
|
||
|
||
import json
|
||
from unittest.mock import AsyncMock, MagicMock, patch
|
||
|
||
import pytest
|
||
|
||
from agentkit.llm.gateway import LLMGateway
|
||
from agentkit.llm.protocol import LLMResponse, TokenUsage, ToolCall
|
||
from agentkit.tools.base import Tool
|
||
|
||
|
||
# ── Test Helpers ──────────────────────────────────────────
|
||
|
||
|
||
class FakeTool(Tool):
|
||
"""用于测试的 Fake Tool"""
|
||
|
||
def __init__(
|
||
self,
|
||
name: str = "fake_tool",
|
||
description: str = "A fake tool for testing",
|
||
input_schema: dict | None = None,
|
||
result: dict | None = None,
|
||
should_fail: bool = False,
|
||
):
|
||
super().__init__(
|
||
name=name,
|
||
description=description,
|
||
input_schema=input_schema,
|
||
)
|
||
self._result = result or {"status": "ok"}
|
||
self._should_fail = should_fail
|
||
self.call_count = 0
|
||
self.last_kwargs: dict | None = None
|
||
|
||
async def execute(self, **kwargs) -> dict:
|
||
self.call_count += 1
|
||
self.last_kwargs = kwargs
|
||
if self._should_fail:
|
||
raise RuntimeError(f"Tool '{self.name}' execution failed")
|
||
return self._result
|
||
|
||
|
||
def make_mock_gateway(responses: list[LLMResponse]) -> LLMGateway:
|
||
"""创建一个 mock LLMGateway,按顺序返回给定响应"""
|
||
gateway = MagicMock(spec=LLMGateway)
|
||
gateway.chat = AsyncMock(side_effect=responses)
|
||
return gateway
|
||
|
||
|
||
def make_response(
|
||
content: str = "",
|
||
tool_calls: list[ToolCall] | None = None,
|
||
prompt_tokens: int = 10,
|
||
completion_tokens: int = 20,
|
||
) -> LLMResponse:
|
||
"""快速构造 LLMResponse"""
|
||
return LLMResponse(
|
||
content=content,
|
||
model="test-model",
|
||
usage=TokenUsage(
|
||
prompt_tokens=prompt_tokens,
|
||
completion_tokens=completion_tokens,
|
||
),
|
||
tool_calls=tool_calls or [],
|
||
)
|
||
|
||
|
||
def make_plan_response(
|
||
steps: list[dict],
|
||
reasoning: str = "Plan reasoning",
|
||
prompt_tokens: int = 50,
|
||
completion_tokens: int = 100,
|
||
) -> LLMResponse:
|
||
"""构造包含执行计划的 LLMResponse"""
|
||
plan_json = json.dumps({
|
||
"reasoning": reasoning,
|
||
"steps": steps,
|
||
})
|
||
return make_response(
|
||
content=plan_json,
|
||
prompt_tokens=prompt_tokens,
|
||
completion_tokens=completion_tokens,
|
||
)
|
||
|
||
|
||
# ── Test: Single-step Plan ────────────────────────────────
|
||
|
||
|
||
class TestReWOOSingleStepPlan:
|
||
"""单步计划:规划 1 个工具调用,执行后综合"""
|
||
|
||
async def test_single_tool_call_plan(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
from agentkit.core.react import ReActResult
|
||
|
||
tool = FakeTool(name="calculator", result={"value": 42})
|
||
|
||
# Phase 1: Planning response
|
||
plan_response = make_plan_response([
|
||
{"step_id": 1, "tool_name": "calculator", "arguments": {"expr": "6*7"}, "reasoning": "Need to calculate"},
|
||
])
|
||
# Phase 3: Synthesis response
|
||
synthesis_response = make_response(content="The result is 42")
|
||
|
||
gateway = make_mock_gateway([plan_response, synthesis_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Calculate 6*7"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
assert isinstance(result, ReActResult)
|
||
assert result.output == "The result is 42"
|
||
# trajectory: 1 tool_call + 1 final_answer = 2 steps
|
||
assert result.total_steps == 2
|
||
assert len(result.trajectory) == 2
|
||
assert result.trajectory[0].action == "tool_call"
|
||
assert result.trajectory[0].tool_name == "calculator"
|
||
assert result.trajectory[0].arguments == {"expr": "6*7"}
|
||
assert result.trajectory[0].result == {"value": 42}
|
||
assert result.trajectory[1].action == "final_answer"
|
||
assert result.trajectory[1].content == "The result is 42"
|
||
assert tool.call_count == 1
|
||
|
||
|
||
# ── Test: Multi-step Plan ─────────────────────────────────
|
||
|
||
|
||
class TestReWOOMultiStepPlan:
|
||
"""多步计划:规划 3 个工具调用,全部执行后综合"""
|
||
|
||
async def test_three_step_plan(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
search_tool = FakeTool(name="search", result={"results": ["Python is great"]})
|
||
calc_tool = FakeTool(name="calculator", result={"value": 100})
|
||
weather_tool = FakeTool(name="weather", result={"temp": 25, "city": "Shanghai"})
|
||
|
||
plan_response = make_plan_response([
|
||
{"step_id": 1, "tool_name": "search", "arguments": {"query": "Python"}, "reasoning": "Search first"},
|
||
{"step_id": 2, "tool_name": "calculator", "arguments": {"expr": "10*10"}, "reasoning": "Calculate"},
|
||
{"step_id": 3, "tool_name": "weather", "arguments": {"city": "Shanghai"}, "reasoning": "Check weather"},
|
||
])
|
||
synthesis_response = make_response(content="Based on search, calculation (100), and weather (25°C), here is the answer")
|
||
|
||
gateway = make_mock_gateway([plan_response, synthesis_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Search, calculate and check weather"}],
|
||
tools=[search_tool, calc_tool, weather_tool],
|
||
)
|
||
|
||
# 3 tool_calls + 1 final_answer = 4 steps
|
||
assert result.total_steps == 4
|
||
assert result.trajectory[0].tool_name == "search"
|
||
assert result.trajectory[1].tool_name == "calculator"
|
||
assert result.trajectory[2].tool_name == "weather"
|
||
assert result.trajectory[3].action == "final_answer"
|
||
assert search_tool.call_count == 1
|
||
assert calc_tool.call_count == 1
|
||
assert weather_tool.call_count == 1
|
||
assert "100" in result.output
|
||
assert "25" in result.output
|
||
|
||
async def test_plan_step_ids_preserved(self):
|
||
from agentkit.core.rewoo import ReWOOEngine, ReWOOStep
|
||
|
||
tool = FakeTool(name="tool_a", result={"a": 1})
|
||
|
||
plan_response = make_plan_response([
|
||
{"step_id": 1, "tool_name": "tool_a", "arguments": {"x": 1}, "reasoning": "Step 1"},
|
||
{"step_id": 2, "tool_name": "tool_a", "arguments": {"x": 2}, "reasoning": "Step 2"},
|
||
])
|
||
synthesis_response = make_response(content="Done")
|
||
|
||
gateway = make_mock_gateway([plan_response, synthesis_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Do two things"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
# Check ReWOOStep has plan_step_id
|
||
assert isinstance(result.trajectory[0], ReWOOStep)
|
||
assert result.trajectory[0].plan_step_id == 1
|
||
assert isinstance(result.trajectory[1], ReWOOStep)
|
||
assert result.trajectory[1].plan_step_id == 2
|
||
|
||
|
||
# ── Test: Tool Call Failure ───────────────────────────────
|
||
|
||
|
||
class TestReWOOToolCallFailure:
|
||
"""工具调用失败:一个工具失败,其余继续执行"""
|
||
|
||
async def test_one_tool_fails_others_continue(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
good_tool = FakeTool(name="good_tool", result={"status": "ok"})
|
||
bad_tool = FakeTool(name="bad_tool", should_fail=True)
|
||
another_tool = FakeTool(name="another_tool", result={"data": "hello"})
|
||
|
||
plan_response = make_plan_response([
|
||
{"step_id": 1, "tool_name": "good_tool", "arguments": {}, "reasoning": "Call good tool"},
|
||
{"step_id": 2, "tool_name": "bad_tool", "arguments": {}, "reasoning": "Call bad tool"},
|
||
{"step_id": 3, "tool_name": "another_tool", "arguments": {}, "reasoning": "Call another tool"},
|
||
])
|
||
synthesis_response = make_response(content="Partial results with one error")
|
||
|
||
gateway = make_mock_gateway([plan_response, synthesis_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Use all tools"}],
|
||
tools=[good_tool, bad_tool, another_tool],
|
||
)
|
||
|
||
# All 3 tools should have been attempted
|
||
assert good_tool.call_count == 1
|
||
assert bad_tool.call_count == 1
|
||
assert another_tool.call_count == 1
|
||
|
||
# Step 2 should have error result
|
||
assert result.trajectory[1].tool_name == "bad_tool"
|
||
assert "error" in str(result.trajectory[1].result).lower() or "failed" in str(result.trajectory[1].result).lower()
|
||
|
||
# Step 3 should still succeed
|
||
assert result.trajectory[2].tool_name == "another_tool"
|
||
assert result.trajectory[2].result == {"data": "hello"}
|
||
|
||
# Final answer should still be generated
|
||
assert result.trajectory[3].action == "final_answer"
|
||
assert result.output == "Partial results with one error"
|
||
|
||
async def test_tool_not_found_returns_error(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
plan_response = make_plan_response([
|
||
{"step_id": 1, "tool_name": "nonexistent_tool", "arguments": {}, "reasoning": "Call missing tool"},
|
||
])
|
||
synthesis_response = make_response(content="Tool was not found, but here is my answer")
|
||
|
||
gateway = make_mock_gateway([plan_response, synthesis_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Use missing tool"}],
|
||
tools=[], # Empty tools list
|
||
)
|
||
|
||
assert result.trajectory[0].action == "tool_call"
|
||
assert "error" in str(result.trajectory[0].result).lower() or "not found" in str(result.trajectory[0].result).lower()
|
||
assert result.output == "Tool was not found, but here is my answer"
|
||
|
||
|
||
# ── Test: Planning Failure Fallback ───────────────────────
|
||
|
||
|
||
class TestReWOOPlanningFailureFallback:
|
||
"""规划失败:LLM 未返回有效 JSON 时回退到 ReActEngine"""
|
||
|
||
async def test_invalid_json_falls_back_to_react(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
# Planning returns invalid JSON, simplified planning also fails
|
||
invalid_plan_response = make_response(content="I cannot create a plan for this task.")
|
||
simplified_fail_response = make_response(content="Still cannot create a plan")
|
||
# ReAct fallback responses
|
||
react_tool_response = make_response(
|
||
content="",
|
||
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"query": "test"})],
|
||
)
|
||
react_final_response = make_response(content="ReAct fallback answer")
|
||
|
||
gateway = make_mock_gateway([
|
||
invalid_plan_response,
|
||
simplified_fail_response,
|
||
react_tool_response,
|
||
react_final_response,
|
||
])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
tool = FakeTool(name="search", result={"results": ["found"]})
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Complex task"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
# Should have fallen back to ReAct and produced a result
|
||
assert result.output == "ReAct fallback answer"
|
||
assert result.total_steps >= 1
|
||
assert result.fallback_strategy == "react"
|
||
|
||
async def test_malformed_json_falls_back_to_react(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
# Planning returns malformed JSON, simplified planning also fails, ReAct succeeds
|
||
malformed_response = make_response(content='{"reasoning": "plan", "steps": [invalid json')
|
||
simplified_fail_response = make_response(content='Also not a plan')
|
||
react_response = make_response(content="ReAct answer")
|
||
|
||
gateway = make_mock_gateway([malformed_response, simplified_fail_response, react_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Task"}],
|
||
)
|
||
|
||
assert result.output == "ReAct answer"
|
||
assert result.fallback_strategy == "react"
|
||
|
||
async def test_missing_steps_key_falls_back_to_react(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
# JSON without "steps" key, simplified planning also fails, ReAct succeeds
|
||
no_steps_response = make_response(content='{"reasoning": "no steps here"}')
|
||
simplified_fail_response = make_response(content='Also no steps')
|
||
react_response = make_response(content="ReAct fallback")
|
||
|
||
gateway = make_mock_gateway([no_steps_response, simplified_fail_response, react_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Task"}],
|
||
)
|
||
|
||
assert result.output == "ReAct fallback"
|
||
assert result.fallback_strategy == "react"
|
||
|
||
|
||
# ── Test: Cancellation Token ──────────────────────────────
|
||
|
||
|
||
class TestReWOOCancellation:
|
||
"""ReWOO 取消令牌测试"""
|
||
|
||
async def test_cancel_before_execution_raises_error(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
from agentkit.core.protocol import CancellationToken
|
||
from agentkit.core.exceptions import TaskCancelledError
|
||
|
||
gateway = make_mock_gateway([make_response(content="plan")])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
token = CancellationToken()
|
||
token.cancel()
|
||
|
||
with pytest.raises(TaskCancelledError):
|
||
await engine.execute(
|
||
messages=[{"role": "user", "content": "Task"}],
|
||
cancellation_token=token,
|
||
)
|
||
|
||
async def test_cancel_mid_execution(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
from agentkit.core.protocol import CancellationToken
|
||
from agentkit.core.exceptions import TaskCancelledError
|
||
|
||
token = CancellationToken()
|
||
call_count = 0
|
||
|
||
tool = FakeTool(name="tool_a", result={"a": 1})
|
||
|
||
async def chat_with_cancel(**kwargs):
|
||
nonlocal call_count
|
||
call_count += 1
|
||
# First call is planning, cancel after it
|
||
if call_count >= 1:
|
||
token.cancel()
|
||
# Return a plan with multiple steps
|
||
return make_plan_response([
|
||
{"step_id": 1, "tool_name": "tool_a", "arguments": {"x": 1}, "reasoning": "Step 1"},
|
||
{"step_id": 2, "tool_name": "tool_a", "arguments": {"x": 2}, "reasoning": "Step 2"},
|
||
])
|
||
|
||
gateway = MagicMock(spec=LLMGateway)
|
||
gateway.chat = AsyncMock(side_effect=chat_with_cancel)
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
with pytest.raises(TaskCancelledError):
|
||
await engine.execute(
|
||
messages=[{"role": "user", "content": "Task"}],
|
||
tools=[tool],
|
||
cancellation_token=token,
|
||
)
|
||
|
||
async def test_uncancelled_token_works_normally(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
from agentkit.core.protocol import CancellationToken
|
||
|
||
plan_response = make_plan_response([
|
||
{"step_id": 1, "tool_name": "search", "arguments": {"q": "test"}, "reasoning": "Search"},
|
||
])
|
||
synthesis_response = make_response(content="Answer")
|
||
|
||
gateway = make_mock_gateway([plan_response, synthesis_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
tool = FakeTool(name="search", result={"results": ["found"]})
|
||
token = CancellationToken() # Not cancelled
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Task"}],
|
||
tools=[tool],
|
||
cancellation_token=token,
|
||
)
|
||
|
||
assert result.output == "Answer"
|
||
assert result.status == "success"
|
||
|
||
|
||
# ── Test: Timeout ─────────────────────────────────────────
|
||
|
||
|
||
class TestReWOOTimeout:
|
||
"""ReWOO 超时测试"""
|
||
|
||
async def test_timeout_raises_task_timeout_error(self):
|
||
import asyncio
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
from agentkit.core.exceptions import TaskTimeoutError
|
||
|
||
async def slow_chat(**kwargs):
|
||
await asyncio.sleep(0.5)
|
||
return make_response(content="slow")
|
||
|
||
gateway = MagicMock(spec=LLMGateway)
|
||
gateway.chat = AsyncMock(side_effect=slow_chat)
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
with pytest.raises(TaskTimeoutError):
|
||
await engine.execute(
|
||
messages=[{"role": "user", "content": "Slow task"}],
|
||
timeout_seconds=0.3,
|
||
)
|
||
|
||
async def test_timeout_zero_means_no_timeout(self):
|
||
import asyncio
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
async def slightly_slow_chat(**kwargs):
|
||
await asyncio.sleep(0.1)
|
||
return make_response(content="done")
|
||
|
||
gateway = MagicMock(spec=LLMGateway)
|
||
gateway.chat = AsyncMock(side_effect=slightly_slow_chat)
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Task"}],
|
||
timeout_seconds=0,
|
||
)
|
||
assert result.output == "done"
|
||
|
||
|
||
# ── Test: Interface Compatibility ─────────────────────────
|
||
|
||
|
||
class TestReWOOInterfaceCompatibility:
|
||
"""ReWOOEngine 与 ReActEngine 接口兼容性"""
|
||
|
||
async def test_same_return_type(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
from agentkit.core.react import ReActResult
|
||
|
||
plan_response = make_plan_response([
|
||
{"step_id": 1, "tool_name": "tool_a", "arguments": {}, "reasoning": "Step"},
|
||
])
|
||
synthesis_response = make_response(content="Answer")
|
||
|
||
gateway = make_mock_gateway([plan_response, synthesis_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
tool = FakeTool(name="tool_a", result={"a": 1})
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Task"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
assert isinstance(result, ReActResult)
|
||
assert hasattr(result, "output")
|
||
assert hasattr(result, "trajectory")
|
||
assert hasattr(result, "total_steps")
|
||
assert hasattr(result, "total_tokens")
|
||
assert hasattr(result, "status")
|
||
|
||
async def test_same_execute_signature(self):
|
||
"""验证 execute 方法签名与 ReActEngine 兼容"""
|
||
import inspect
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
from agentkit.core.react import ReActEngine
|
||
|
||
rewoo_sig = inspect.signature(ReWOOEngine.execute)
|
||
react_sig = inspect.signature(ReActEngine.execute)
|
||
|
||
rewoo_params = list(rewoo_sig.parameters.keys())
|
||
react_params = list(react_sig.parameters.keys())
|
||
|
||
assert rewoo_params == react_params, f"Parameter mismatch: ReWOO={rewoo_params}, ReAct={react_params}"
|
||
|
||
async def test_trajectory_uses_react_step(self):
|
||
"""验证 trajectory 中的步骤兼容 ReActStep"""
|
||
from agentkit.core.rewoo import ReWOOEngine, ReWOOStep
|
||
from agentkit.core.react import ReActStep
|
||
|
||
plan_response = make_plan_response([
|
||
{"step_id": 1, "tool_name": "tool_a", "arguments": {"x": 1}, "reasoning": "Step"},
|
||
])
|
||
synthesis_response = make_response(content="Done")
|
||
|
||
gateway = make_mock_gateway([plan_response, synthesis_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
tool = FakeTool(name="tool_a", result={"a": 1})
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Task"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
# ReWOOStep should be a subclass of ReActStep
|
||
for step in result.trajectory:
|
||
assert isinstance(step, ReActStep), f"Step {step} is not a ReActStep"
|
||
|
||
# Tool call steps should be ReWOOStep with plan_step_id
|
||
tool_steps = [s for s in result.trajectory if s.action == "tool_call"]
|
||
for step in tool_steps:
|
||
assert isinstance(step, ReWOOStep)
|
||
assert step.plan_step_id is not None
|
||
|
||
async def test_status_field_present(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
plan_response = make_plan_response([
|
||
{"step_id": 1, "tool_name": "tool_a", "arguments": {}, "reasoning": "Step"},
|
||
])
|
||
synthesis_response = make_response(content="Answer")
|
||
|
||
gateway = make_mock_gateway([plan_response, synthesis_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
tool = FakeTool(name="tool_a", result={"a": 1})
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Task"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
assert result.status == "success"
|
||
|
||
|
||
# ── Test: Empty Plan (No Tools Needed) ────────────────────
|
||
|
||
|
||
class TestReWOOEmptyPlan:
|
||
"""空计划:LLM 判断无需工具,直接回答"""
|
||
|
||
async def test_empty_plan_direct_answer(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
# Plan with empty steps
|
||
plan_response = make_plan_response([], reasoning="No tools needed")
|
||
direct_response = make_response(content="Direct answer without tools")
|
||
|
||
gateway = make_mock_gateway([plan_response, direct_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Simple question"}],
|
||
)
|
||
|
||
assert result.output == "Direct answer without tools"
|
||
assert result.total_steps == 1
|
||
assert result.trajectory[0].action == "final_answer"
|
||
|
||
|
||
# ── Test: Token Accumulation ──────────────────────────────
|
||
|
||
|
||
class TestReWOOTokenAccumulation:
|
||
"""Token 累积测试"""
|
||
|
||
async def test_total_tokens_accumulated(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
plan_response = make_plan_response(
|
||
steps=[{"step_id": 1, "tool_name": "tool_a", "arguments": {}, "reasoning": "Step"}],
|
||
prompt_tokens=100,
|
||
completion_tokens=50,
|
||
)
|
||
synthesis_response = make_response(
|
||
content="Answer",
|
||
prompt_tokens=200,
|
||
completion_tokens=30,
|
||
)
|
||
|
||
gateway = make_mock_gateway([plan_response, synthesis_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
tool = FakeTool(name="tool_a", result={"a": 1})
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Task"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
# 100+50 + 200+30 = 380
|
||
assert result.total_tokens == 380
|
||
|
||
|
||
# ── Test: Streaming ───────────────────────────────────────
|
||
|
||
|
||
class TestReWOOStreaming:
|
||
"""ReWOO 流式执行测试"""
|
||
|
||
async def test_stream_yields_correct_events(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
from agentkit.core.react import ReActEvent
|
||
|
||
plan_response = make_plan_response([
|
||
{"step_id": 1, "tool_name": "tool_a", "arguments": {"x": 1}, "reasoning": "Step 1"},
|
||
])
|
||
synthesis_response = make_response(content="Final answer")
|
||
|
||
gateway = make_mock_gateway([plan_response, synthesis_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
tool = FakeTool(name="tool_a", result={"a": 1})
|
||
|
||
events = []
|
||
async for event in engine.execute_stream(
|
||
messages=[{"role": "user", "content": "Task"}],
|
||
tools=[tool],
|
||
):
|
||
events.append(event)
|
||
|
||
event_types = [e.event_type for e in events]
|
||
|
||
assert "planning" in event_types
|
||
assert "plan_generated" in event_types
|
||
assert "tool_call" in event_types
|
||
assert "tool_result" in event_types
|
||
assert "synthesis" in event_types
|
||
assert "final_answer" in event_types
|
||
|
||
# Verify event order
|
||
planning_idx = event_types.index("planning")
|
||
plan_gen_idx = event_types.index("plan_generated")
|
||
tool_call_idx = event_types.index("tool_call")
|
||
tool_result_idx = event_types.index("tool_result")
|
||
synthesis_idx = event_types.index("synthesis")
|
||
final_idx = event_types.index("final_answer")
|
||
|
||
assert planning_idx < plan_gen_idx < tool_call_idx < tool_result_idx < synthesis_idx < final_idx
|
||
|
||
async def test_stream_plan_generated_event_data(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
from agentkit.core.react import ReActEvent
|
||
|
||
plan_response = make_plan_response([
|
||
{"step_id": 1, "tool_name": "tool_a", "arguments": {"x": 1}, "reasoning": "Step 1"},
|
||
{"step_id": 2, "tool_name": "tool_b", "arguments": {"y": 2}, "reasoning": "Step 2"},
|
||
])
|
||
synthesis_response = make_response(content="Done")
|
||
|
||
gateway = make_mock_gateway([plan_response, synthesis_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
tool_a = FakeTool(name="tool_a", result={"a": 1})
|
||
tool_b = FakeTool(name="tool_b", result={"b": 2})
|
||
|
||
events = []
|
||
async for event in engine.execute_stream(
|
||
messages=[{"role": "user", "content": "Task"}],
|
||
tools=[tool_a, tool_b],
|
||
):
|
||
events.append(event)
|
||
|
||
plan_event = next(e for e in events if e.event_type == "plan_generated")
|
||
assert "steps" in plan_event.data
|
||
assert len(plan_event.data["steps"]) == 2
|
||
assert plan_event.data["steps"][0]["tool_name"] == "tool_a"
|
||
assert plan_event.data["steps"][1]["tool_name"] == "tool_b"
|
||
|
||
async def test_stream_final_answer_event_data(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
plan_response = make_plan_response([
|
||
{"step_id": 1, "tool_name": "tool_a", "arguments": {}, "reasoning": "Step"},
|
||
])
|
||
synthesis_response = make_response(content="Final answer")
|
||
|
||
gateway = make_mock_gateway([plan_response, synthesis_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
tool = FakeTool(name="tool_a", result={"a": 1})
|
||
|
||
events = []
|
||
async for event in engine.execute_stream(
|
||
messages=[{"role": "user", "content": "Task"}],
|
||
tools=[tool],
|
||
):
|
||
events.append(event)
|
||
|
||
final_event = next(e for e in events if e.event_type == "final_answer")
|
||
assert final_event.data["output"] == "Final answer"
|
||
assert "total_steps" in final_event.data
|
||
assert "total_tokens" in final_event.data
|
||
|
||
async def test_stream_planning_failure_falls_back(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
# Invalid plan, simplified also fails, then ReAct fallback
|
||
invalid_plan = make_response(content="Not a plan")
|
||
simplified_fail = make_response(content="Still not a plan")
|
||
react_response = make_response(content="ReAct answer")
|
||
|
||
gateway = make_mock_gateway([invalid_plan, simplified_fail, react_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
events = []
|
||
async for event in engine.execute_stream(
|
||
messages=[{"role": "user", "content": "Task"}],
|
||
):
|
||
events.append(event)
|
||
|
||
# Should have events from ReAct fallback
|
||
event_types = [e.event_type for e in events]
|
||
assert "planning" in event_types # ReWOO planning started
|
||
# After fallback, ReAct events should appear
|
||
assert "final_answer" in event_types
|
||
|
||
|
||
# ── Test: Plan Parsing ────────────────────────────────────
|
||
|
||
|
||
class TestReWOOPlanParsing:
|
||
"""计划解析测试"""
|
||
|
||
def test_parse_valid_json(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
engine = ReWOOEngine(llm_gateway=MagicMock(spec=LLMGateway))
|
||
content = json.dumps({
|
||
"reasoning": "Need to search and calculate",
|
||
"steps": [
|
||
{"step_id": 1, "tool_name": "search", "arguments": {"q": "test"}, "reasoning": "Search"},
|
||
{"step_id": 2, "tool_name": "calc", "arguments": {"expr": "1+1"}, "reasoning": "Calculate"},
|
||
],
|
||
})
|
||
|
||
plan = engine._parse_plan(content)
|
||
assert plan is not None
|
||
assert plan.reasoning == "Need to search and calculate"
|
||
assert len(plan.steps) == 2
|
||
assert plan.steps[0].tool_name == "search"
|
||
assert plan.steps[1].tool_name == "calc"
|
||
|
||
def test_parse_json_in_code_block(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
engine = ReWOOEngine(llm_gateway=MagicMock(spec=LLMGateway))
|
||
content = '```json\n{"reasoning": "Plan", "steps": [{"step_id": 1, "tool_name": "search", "arguments": {}, "reasoning": "Search"}]}\n```'
|
||
|
||
plan = engine._parse_plan(content)
|
||
assert plan is not None
|
||
assert len(plan.steps) == 1
|
||
|
||
def test_parse_json_with_surrounding_text(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
engine = ReWOOEngine(llm_gateway=MagicMock(spec=LLMGateway))
|
||
content = 'Here is my plan:\n{"reasoning": "Plan", "steps": [{"step_id": 1, "tool_name": "search", "arguments": {}, "reasoning": "Search"}]}\nThat should work!'
|
||
|
||
plan = engine._parse_plan(content)
|
||
assert plan is not None
|
||
assert len(plan.steps) == 1
|
||
|
||
def test_parse_invalid_json_returns_none(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
engine = ReWOOEngine(llm_gateway=MagicMock(spec=LLMGateway))
|
||
plan = engine._parse_plan("This is not JSON at all")
|
||
assert plan is None
|
||
|
||
def test_parse_missing_steps_returns_none(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
engine = ReWOOEngine(llm_gateway=MagicMock(spec=LLMGateway))
|
||
plan = engine._parse_plan('{"reasoning": "No steps"}')
|
||
assert plan is None
|
||
|
||
def test_parse_steps_without_tool_name_skipped(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
engine = ReWOOEngine(llm_gateway=MagicMock(spec=LLMGateway))
|
||
content = json.dumps({
|
||
"reasoning": "Plan",
|
||
"steps": [
|
||
{"step_id": 1, "arguments": {}, "reasoning": "No tool name"},
|
||
{"step_id": 2, "tool_name": "search", "arguments": {}, "reasoning": "Has tool name"},
|
||
],
|
||
})
|
||
|
||
plan = engine._parse_plan(content)
|
||
assert plan is not None
|
||
assert len(plan.steps) == 1
|
||
assert plan.steps[0].tool_name == "search"
|
||
|
||
|
||
# ── Test: Max Plan Steps ──────────────────────────────────
|
||
|
||
|
||
class TestReWOOMaxPlanSteps:
|
||
"""最大计划步数限制"""
|
||
|
||
async def test_plan_truncated_to_max_steps(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
# Create a plan with 5 steps, but max_plan_steps=2
|
||
plan_steps = [
|
||
{"step_id": i, "tool_name": "tool_a", "arguments": {"x": i}, "reasoning": f"Step {i}"}
|
||
for i in range(1, 6)
|
||
]
|
||
plan_response = make_plan_response(plan_steps)
|
||
synthesis_response = make_response(content="Done")
|
||
|
||
gateway = make_mock_gateway([plan_response, synthesis_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway, max_plan_steps=2)
|
||
|
||
tool = FakeTool(name="tool_a", result={"a": 1})
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Task"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
# Only 2 tool calls should be executed (truncated from 5)
|
||
tool_call_steps = [s for s in result.trajectory if s.action == "tool_call"]
|
||
assert len(tool_call_steps) == 2
|
||
|
||
async def test_max_plan_steps_validation(self):
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
with pytest.raises(ValueError, match="max_plan_steps must be >= 1"):
|
||
ReWOOEngine(llm_gateway=MagicMock(spec=LLMGateway), max_plan_steps=0)
|
||
|
||
|
||
# Need to import ReActResult for type checking in tests
|
||
from agentkit.core.react import ReActResult
|
||
|
||
|
||
# ── Test: Progressive Fallback Chain ──────────────────────
|
||
|
||
|
||
class TestReWOOProgressiveFallback:
|
||
"""渐进式回退链:planning → simplified_rewoo → react → direct"""
|
||
|
||
async def test_normal_planning_succeeds_no_fallback(self):
|
||
"""正常规划成功,不使用回退"""
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
tool = FakeTool(name="calculator", result={"value": 42})
|
||
|
||
plan_response = make_plan_response([
|
||
{"step_id": 1, "tool_name": "calculator", "arguments": {"expr": "6*7"}, "reasoning": "Calculate"},
|
||
])
|
||
synthesis_response = make_response(content="The result is 42")
|
||
|
||
gateway = make_mock_gateway([plan_response, synthesis_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Calculate 6*7"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
assert result.output == "The result is 42"
|
||
assert result.fallback_strategy is None
|
||
|
||
async def test_planning_fails_simplified_succeeds(self):
|
||
"""规划失败,简化规划成功 → fallback_strategy="simplified_rewoo" """
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
tool = FakeTool(name="search", result={"results": ["found"]})
|
||
|
||
# First plan fails (invalid JSON), second (simplified) succeeds
|
||
invalid_plan_response = make_response(content="I cannot create a plan for this task.")
|
||
simplified_plan_response = make_plan_response([
|
||
{"step_id": 1, "tool_name": "search", "arguments": {"query": "test"}, "reasoning": "Simplified search"},
|
||
])
|
||
synthesis_response = make_response(content="Simplified result")
|
||
|
||
gateway = make_mock_gateway([invalid_plan_response, simplified_plan_response, synthesis_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Complex task"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
assert result.output == "Simplified result"
|
||
assert result.fallback_strategy == "simplified_rewoo"
|
||
|
||
async def test_planning_and_simplified_fail_react_succeeds(self):
|
||
"""规划和简化规划都失败,ReAct 回退成功 → fallback_strategy="react" """
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
# Both plan attempts fail (invalid JSON), ReAct succeeds
|
||
invalid_plan1 = make_response(content="Not a plan")
|
||
invalid_plan2 = make_response(content="Still not a plan")
|
||
react_response = make_response(content="ReAct fallback answer")
|
||
|
||
gateway = make_mock_gateway([invalid_plan1, invalid_plan2, react_response])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Complex task"}],
|
||
)
|
||
|
||
assert result.output == "ReAct fallback answer"
|
||
assert result.fallback_strategy == "react"
|
||
|
||
async def test_all_fail_direct_fallback(self):
|
||
"""规划、简化规划、ReAct 全部失败 → fallback_strategy="direct" """
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
# Both plan attempts fail
|
||
invalid_plan1 = make_response(content="Not a plan")
|
||
invalid_plan2 = make_response(content="Still not a plan")
|
||
|
||
# Make ReAct engine fail by having its LLM call raise an exception
|
||
call_count = 0
|
||
|
||
async def chat_side_effect(**kwargs):
|
||
nonlocal call_count
|
||
call_count += 1
|
||
if call_count <= 2:
|
||
# First two calls are for planning (both fail to parse)
|
||
return make_response(content="Not a plan")
|
||
if call_count == 3:
|
||
# ReAct engine call - raise exception
|
||
raise RuntimeError("ReAct engine failed")
|
||
# Direct fallback call
|
||
return make_response(content="Direct fallback answer")
|
||
|
||
gateway = MagicMock(spec=LLMGateway)
|
||
gateway.chat = AsyncMock(side_effect=chat_side_effect)
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Impossible task"}],
|
||
)
|
||
|
||
assert result.output == "Direct fallback answer"
|
||
assert result.fallback_strategy == "direct"
|
||
|
||
async def test_fallback_strategies_constant_exists(self):
|
||
"""验证 FALLBACK_STRATEGIES 常量存在"""
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
assert hasattr(ReWOOEngine, "FALLBACK_STRATEGIES")
|
||
assert ReWOOEngine.FALLBACK_STRATEGIES == ["simplified_rewoo", "react", "direct"]
|
||
|
||
async def test_react_fallback_with_tools(self):
|
||
"""规划失败后 ReAct 回退,带工具调用"""
|
||
from agentkit.core.rewoo import ReWOOEngine
|
||
|
||
tool = FakeTool(name="search", result={"results": ["found"]})
|
||
|
||
# Both plan attempts fail
|
||
invalid_plan1 = make_response(content="Cannot plan")
|
||
invalid_plan2 = make_response(content="Still cannot plan")
|
||
# ReAct: tool call then final answer
|
||
react_tool_response = make_response(
|
||
content="",
|
||
tool_calls=[ToolCall(id="tc_1", name="search", arguments={"query": "test"})],
|
||
)
|
||
react_final_response = make_response(content="ReAct answer with tool")
|
||
|
||
gateway = make_mock_gateway([
|
||
invalid_plan1,
|
||
invalid_plan2,
|
||
react_tool_response,
|
||
react_final_response,
|
||
])
|
||
engine = ReWOOEngine(llm_gateway=gateway)
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Search task"}],
|
||
tools=[tool],
|
||
)
|
||
|
||
assert result.output == "ReAct answer with tool"
|
||
assert result.fallback_strategy == "react"
|