604 lines
23 KiB
Python
604 lines
23 KiB
Python
"""集成测试 - CostAwareRouter → Engine → AlignmentGuard 全链路
|
||
|
||
包含合并 LLM 分类路由和并行工具执行的集成测试。"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import json
|
||
import time
|
||
from unittest.mock import AsyncMock, MagicMock
|
||
|
||
import pytest
|
||
|
||
from agentkit.chat.skill_routing import CostAwareRouter, SkillRoutingResult
|
||
from agentkit.core.react import ReActEngine, ReActResult, ReActStep, ReActEvent
|
||
from agentkit.llm.gateway import LLMGateway
|
||
from agentkit.llm.protocol import LLMResponse, TokenUsage, ToolCall
|
||
from agentkit.org.context import AgentProfile, OrganizationContext
|
||
from agentkit.quality.alignment import AlignmentConfig, AlignmentGuard
|
||
from agentkit.tools.base import Tool
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Helpers
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
def _make_llm_response(content: str) -> LLMResponse:
|
||
return LLMResponse(
|
||
content=content,
|
||
model="test-model",
|
||
usage=TokenUsage(prompt_tokens=10, completion_tokens=20),
|
||
)
|
||
|
||
|
||
def _make_mock_gateway(responses: list[LLMResponse]) -> MagicMock:
|
||
gateway = MagicMock()
|
||
gateway.chat = AsyncMock(side_effect=responses)
|
||
return gateway
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Test 1: Router routes to ReAct engine, output passes alignment check
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
class TestRouterToEnginePassesAlignment:
|
||
"""路由到 ReAct 引擎,输出通过对齐检查"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_react_output_passes_alignment(self):
|
||
# --- Setup: LLM returns low complexity → default agent (ReAct) ---
|
||
gateway = _make_mock_gateway([
|
||
_make_llm_response('{"complexity": 0.2}'), # quick_classify
|
||
_make_llm_response("你好!有什么可以帮你的?"), # ReAct final answer
|
||
])
|
||
|
||
org_context = OrganizationContext()
|
||
alignment_config = AlignmentConfig(
|
||
constraints=["password", "secret_key"],
|
||
)
|
||
guard = AlignmentGuard(config=alignment_config)
|
||
|
||
router = CostAwareRouter(llm_gateway=gateway, org_context=org_context)
|
||
|
||
mock_skill_registry = MagicMock()
|
||
mock_skill_registry.list_skills.return_value = []
|
||
mock_intent_router = AsyncMock()
|
||
|
||
# Step 1: Route
|
||
route_result = await router.route(
|
||
content="随便聊聊",
|
||
skill_registry=mock_skill_registry,
|
||
intent_router=mock_intent_router,
|
||
default_tools=[],
|
||
default_system_prompt="You are helpful",
|
||
default_model="default",
|
||
default_agent_name="default",
|
||
)
|
||
assert route_result.complexity < 0.3
|
||
assert route_result.agent_name == "default"
|
||
|
||
# Step 2: Inject constraints
|
||
input_data = {"content": route_result.clean_content}
|
||
injected = guard.inject_constraints(input_data)
|
||
assert "alignment_constraints" in injected
|
||
|
||
# Step 3: Simulate engine execution (use real ReActEngine with mock gateway)
|
||
react_engine = ReActEngine(llm_gateway=gateway)
|
||
engine_result = await react_engine.execute(
|
||
messages=[{"role": "user", "content": injected["content"]}],
|
||
)
|
||
|
||
# Step 4: Alignment check
|
||
output = {"result": engine_result.output}
|
||
check_result = await guard.check_output(output)
|
||
assert check_result.passed is True
|
||
assert check_result.violations == []
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Test 2: Router routes to ReAct engine, output fails alignment check
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
class TestRouterToEngineFailsAlignment:
|
||
"""路由到 ReAct 引擎,输出未通过对齐检查"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_react_output_fails_alignment(self):
|
||
gateway = _make_mock_gateway([
|
||
_make_llm_response('{"complexity": 0.2}'),
|
||
_make_llm_response("Your password is 123456"),
|
||
])
|
||
|
||
org_context = OrganizationContext()
|
||
alignment_config = AlignmentConfig(
|
||
constraints=["password", "secret_key"],
|
||
)
|
||
guard = AlignmentGuard(config=alignment_config)
|
||
|
||
router = CostAwareRouter(llm_gateway=gateway, org_context=org_context)
|
||
|
||
mock_skill_registry = MagicMock()
|
||
mock_skill_registry.list_skills.return_value = []
|
||
mock_intent_router = AsyncMock()
|
||
|
||
route_result = await router.route(
|
||
content="随便聊聊",
|
||
skill_registry=mock_skill_registry,
|
||
intent_router=mock_intent_router,
|
||
default_tools=[],
|
||
default_system_prompt="You are helpful",
|
||
default_model="default",
|
||
default_agent_name="default",
|
||
)
|
||
|
||
input_data = {"content": route_result.clean_content}
|
||
injected = guard.inject_constraints(input_data)
|
||
|
||
react_engine = ReActEngine(llm_gateway=gateway)
|
||
engine_result = await react_engine.execute(
|
||
messages=[{"role": "user", "content": injected["content"]}],
|
||
)
|
||
|
||
output = {"result": engine_result.output}
|
||
check_result = await guard.check_output(output)
|
||
assert check_result.passed is False
|
||
assert len(check_result.violations) > 0
|
||
assert "password" in check_result.violations
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Test 3: Router routes based on complexity (low→default, high→org_context)
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
class TestRouterComplexityBasedRouting:
|
||
"""基于复杂度的路由:低复杂度→默认,高复杂度→org_context 能力匹配"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_low_complexity_routes_to_default(self):
|
||
gateway = _make_mock_gateway([
|
||
_make_llm_response('{"complexity": 0.15}'),
|
||
])
|
||
|
||
org_context = OrganizationContext()
|
||
org_context.register_agent(AgentProfile(
|
||
name="analyst",
|
||
agent_type="react",
|
||
capabilities=["analysis"],
|
||
skills=["analysis"],
|
||
))
|
||
|
||
router = CostAwareRouter(llm_gateway=gateway, org_context=org_context)
|
||
|
||
mock_skill_registry = MagicMock()
|
||
mock_skill_registry.list_skills.return_value = []
|
||
mock_intent_router = AsyncMock()
|
||
|
||
result = await router.route(
|
||
content="简单问题",
|
||
skill_registry=mock_skill_registry,
|
||
intent_router=mock_intent_router,
|
||
default_tools=[],
|
||
default_system_prompt="You are helpful",
|
||
default_model="default",
|
||
default_agent_name="default",
|
||
)
|
||
|
||
assert result.complexity < 0.3
|
||
assert result.agent_name == "default"
|
||
assert result.match_method == "low_complexity"
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_high_complexity_routes_via_org_context(self):
|
||
gateway = _make_mock_gateway([
|
||
_make_llm_response('{"complexity": 0.85}'),
|
||
])
|
||
|
||
org_context = OrganizationContext()
|
||
org_context.register_agent(AgentProfile(
|
||
name="analyst",
|
||
agent_type="react",
|
||
capabilities=["分析", "市场", "调研"],
|
||
skills=["market_analysis"],
|
||
current_load=0,
|
||
))
|
||
|
||
# find_best_agent returns real AgentProfile
|
||
org_context.find_best_agent = MagicMock(
|
||
return_value=org_context.get_agent_profile("analyst")
|
||
)
|
||
|
||
router = CostAwareRouter(llm_gateway=gateway, org_context=org_context)
|
||
|
||
mock_skill_registry = MagicMock()
|
||
mock_skill_registry.list_skills.return_value = []
|
||
mock_intent_router = AsyncMock()
|
||
|
||
result = await router.route(
|
||
content="请对市场趋势进行深度分析并给出投资建议",
|
||
skill_registry=mock_skill_registry,
|
||
intent_router=mock_intent_router,
|
||
default_tools=[],
|
||
default_system_prompt="You are helpful",
|
||
default_model="default",
|
||
default_agent_name="default",
|
||
)
|
||
|
||
assert result.complexity >= 0.7
|
||
assert result.match_method == "capability"
|
||
assert result.agent_name == "analyst"
|
||
assert result.matched is True
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Test 4: AlignmentGuard injects constraints into input before engine execution
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
class TestAlignmentGuardConstraintInjection:
|
||
"""AlignmentGuard 在引擎执行前将约束注入到输入中"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_constraints_injected_before_engine_execution(self):
|
||
gateway = _make_mock_gateway([
|
||
_make_llm_response('{"complexity": 0.5}'),
|
||
_make_llm_response("Safe answer"),
|
||
])
|
||
|
||
alignment_config = AlignmentConfig(
|
||
constraints=["不得泄露用户隐私", "禁止生成有害内容"],
|
||
)
|
||
guard = AlignmentGuard(config=alignment_config)
|
||
|
||
org_context = OrganizationContext()
|
||
router = CostAwareRouter(llm_gateway=gateway, org_context=org_context)
|
||
|
||
mock_skill_registry = MagicMock()
|
||
mock_skill_registry.list_skills.return_value = []
|
||
mock_intent_router = AsyncMock()
|
||
|
||
# Step 1: Route
|
||
route_result = await router.route(
|
||
content="请帮我写一篇文章",
|
||
skill_registry=mock_skill_registry,
|
||
intent_router=mock_intent_router,
|
||
default_tools=[],
|
||
default_system_prompt="You are helpful",
|
||
default_model="default",
|
||
default_agent_name="default",
|
||
)
|
||
|
||
# Step 2: Inject constraints
|
||
input_data = {"content": route_result.clean_content}
|
||
injected = guard.inject_constraints(input_data)
|
||
|
||
# Verify constraints are present
|
||
assert "alignment_constraints" in injected
|
||
assert "不得泄露用户隐私" in injected["alignment_constraints"]
|
||
assert "禁止生成有害内容" in injected["alignment_constraints"]
|
||
# Original data preserved
|
||
assert injected["content"] == route_result.clean_content
|
||
# Original dict not mutated
|
||
assert "alignment_constraints" not in input_data
|
||
|
||
# Step 3: Engine executes with injected input
|
||
react_engine = ReActEngine(llm_gateway=gateway)
|
||
engine_result = await react_engine.execute(
|
||
messages=[{"role": "user", "content": injected["content"]}],
|
||
)
|
||
|
||
# Step 4: Output passes alignment
|
||
output = {"result": engine_result.output}
|
||
check_result = await guard.check_output(output)
|
||
assert check_result.passed is True
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_constraint_injection_with_cascade_monitoring(self):
|
||
"""约束注入 + 级联故障监控的完整链路"""
|
||
alignment_config = AlignmentConfig(
|
||
constraints=["password"],
|
||
cascade_max_interactions=5,
|
||
)
|
||
guard = AlignmentGuard(config=alignment_config)
|
||
|
||
# Inject constraints
|
||
input_data = {"content": "请帮我重置密码"}
|
||
injected = guard.inject_constraints(input_data)
|
||
assert "alignment_constraints" in injected
|
||
|
||
# Simulate safe output
|
||
output = {"result": "密码重置链接已发送到您的邮箱。"}
|
||
check_result = await guard.check_output(output)
|
||
assert check_result.passed is True
|
||
|
||
# Record interactions — no cascade alert
|
||
alert = guard.record_interaction("session-chain-1")
|
||
assert alert is None
|
||
assert guard.get_interaction_count("session-chain-1") == 1
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Test 5: Merged Router → Engine chain (HeuristicClassifier → merged LLM → ReAct)
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
class _SlowTool(Tool):
|
||
"""带延迟的 Fake Tool,用于验证并行执行"""
|
||
|
||
def __init__(
|
||
self,
|
||
name: str = "slow_tool",
|
||
description: str = "A slow tool for testing",
|
||
delay: float = 0.1,
|
||
result: dict | None = None,
|
||
):
|
||
super().__init__(name=name, description=description)
|
||
self._delay = delay
|
||
self._result = result or {"status": "ok"}
|
||
self.call_count = 0
|
||
|
||
async def execute(self, **kwargs) -> dict:
|
||
self.call_count += 1
|
||
await asyncio.sleep(self._delay)
|
||
return self._result
|
||
|
||
|
||
class TestMergedRouterToEngineChain:
|
||
"""完整链路:用户消息 → HeuristicClassifier → merged LLM classify → ReActEngine → 结果
|
||
|
||
当 HeuristicClassifier 返回中等复杂度 (0.3-0.7) 时,使用合并 LLM 调用。
|
||
"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_medium_complexity_uses_merged_llm_then_react(self):
|
||
"""中等复杂度触发 merged LLM classify,然后路由到 ReActEngine 执行"""
|
||
# "如何优化代码" 包含 "如何"(中等)和 "代码"(高),heuristic 给出中等复杂度
|
||
# merged LLM classify 返回中等复杂度 + 无 skill_hint
|
||
# → 路由到默认 agent (ReAct)
|
||
merged_response = _make_llm_response(json.dumps({
|
||
"complexity": 0.5,
|
||
"intent": "code_optimization",
|
||
"skill_hint": None,
|
||
}))
|
||
# ReActEngine 最终答案
|
||
react_final = _make_llm_response("建议使用缓存和异步IO来优化代码性能。")
|
||
|
||
gateway = _make_mock_gateway([merged_response, react_final])
|
||
|
||
org_context = OrganizationContext()
|
||
router = CostAwareRouter(
|
||
llm_gateway=gateway,
|
||
org_context=org_context,
|
||
merged_llm_classify=True,
|
||
)
|
||
|
||
mock_skill_registry = MagicMock()
|
||
mock_skill_registry.list_skills.return_value = []
|
||
mock_intent_router = AsyncMock()
|
||
|
||
# Step 1: Route
|
||
route_result = await router.route(
|
||
content="如何优化代码性能",
|
||
skill_registry=mock_skill_registry,
|
||
intent_router=mock_intent_router,
|
||
default_tools=[],
|
||
default_system_prompt="You are helpful",
|
||
default_model="default",
|
||
default_agent_name="default",
|
||
)
|
||
|
||
# 验证路由结果:中等复杂度,使用 merged_llm 方法
|
||
assert 0.3 <= route_result.complexity <= 0.7
|
||
assert route_result.match_method is not None
|
||
assert "merged_llm" in route_result.match_method
|
||
|
||
# Step 2: Execute with ReActEngine
|
||
react_engine = ReActEngine(llm_gateway=gateway)
|
||
engine_result = await react_engine.execute(
|
||
messages=[{"role": "user", "content": route_result.clean_content}],
|
||
system_prompt=route_result.system_prompt,
|
||
)
|
||
|
||
# Step 3: Verify result
|
||
assert isinstance(engine_result, ReActResult)
|
||
assert "优化" in engine_result.output or "缓存" in engine_result.output
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_medium_complexity_merged_llm_routes_to_skill_then_react(self):
|
||
"""中等复杂度 + merged LLM 返回 skill_hint → 路由到 skill → ReAct 执行"""
|
||
merged_response = _make_llm_response(json.dumps({
|
||
"complexity": 0.45,
|
||
"intent": "code_review",
|
||
"skill_hint": "code_reviewer",
|
||
}))
|
||
react_final = _make_llm_response("代码审查完成,发现3个潜在问题。")
|
||
|
||
gateway = _make_mock_gateway([merged_response, react_final])
|
||
|
||
# 创建包含 code_reviewer skill 的 mock registry
|
||
mock_skill = MagicMock()
|
||
mock_skill.name = "code_reviewer"
|
||
mock_skill.config.intent.keywords = ["code", "review"]
|
||
mock_skill.config.llm = None
|
||
mock_skill.config.prompt = None
|
||
mock_skill.tools = []
|
||
|
||
mock_skill_registry = MagicMock()
|
||
mock_skill_registry.list_skills.return_value = [mock_skill]
|
||
mock_skill_registry.get.return_value = mock_skill
|
||
|
||
org_context = OrganizationContext()
|
||
router = CostAwareRouter(
|
||
llm_gateway=gateway,
|
||
org_context=org_context,
|
||
merged_llm_classify=True,
|
||
)
|
||
|
||
route_result = await router.route(
|
||
content="如何优化代码性能",
|
||
skill_registry=mock_skill_registry,
|
||
intent_router=AsyncMock(),
|
||
default_tools=[],
|
||
default_system_prompt="You are helpful",
|
||
default_model="default",
|
||
default_agent_name="default",
|
||
)
|
||
|
||
# 验证路由到 skill
|
||
assert route_result.matched is True
|
||
assert route_result.skill_name == "code_reviewer"
|
||
assert route_result.match_method == "merged_llm"
|
||
|
||
# Execute with ReActEngine
|
||
react_engine = ReActEngine(llm_gateway=gateway)
|
||
engine_result = await react_engine.execute(
|
||
messages=[{"role": "user", "content": route_result.clean_content}],
|
||
system_prompt=route_result.system_prompt,
|
||
)
|
||
|
||
assert isinstance(engine_result, ReActResult)
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_merged_llm_high_complexity_delegates_to_layer2(self):
|
||
"""HeuristicClassifier 中等复杂度 → merged LLM 返回高复杂度 → 委派到 Layer 2"""
|
||
merged_response = _make_llm_response(json.dumps({
|
||
"complexity": 0.85,
|
||
"intent": "deep_analysis",
|
||
"skill_hint": None,
|
||
}))
|
||
|
||
gateway = _make_mock_gateway([merged_response])
|
||
|
||
org_context = OrganizationContext()
|
||
org_context.register_agent(AgentProfile(
|
||
name="analyst",
|
||
agent_type="react",
|
||
capabilities=["分析", "优化", "代码"],
|
||
skills=["code_analysis"],
|
||
current_load=0,
|
||
))
|
||
org_context.find_best_agent = MagicMock(
|
||
return_value=org_context.get_agent_profile("analyst")
|
||
)
|
||
|
||
router = CostAwareRouter(
|
||
llm_gateway=gateway,
|
||
org_context=org_context,
|
||
merged_llm_classify=True,
|
||
)
|
||
|
||
mock_skill_registry = MagicMock()
|
||
mock_skill_registry.list_skills.return_value = []
|
||
mock_intent_router = AsyncMock()
|
||
|
||
route_result = await router.route(
|
||
content="如何优化代码性能",
|
||
skill_registry=mock_skill_registry,
|
||
intent_router=mock_intent_router,
|
||
default_tools=[],
|
||
default_system_prompt="You are helpful",
|
||
default_model="default",
|
||
default_agent_name="default",
|
||
)
|
||
|
||
# 高复杂度应委派到 Layer 2
|
||
assert route_result.complexity >= 0.7
|
||
assert route_result.matched is True
|
||
assert route_result.agent_name == "analyst"
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Test 6: Parallel Tools Integration (ReActEngine with parallel_tools="auto")
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
class TestParallelToolsIntegration:
|
||
"""ReActEngine + parallel_tools="auto" 在真实场景下的集成测试
|
||
|
||
LLM 返回 2 个 tool_calls 且 _parallelizable=true,两者并行执行。
|
||
"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_auto_parallel_two_tools_realistic(self):
|
||
"""真实场景:LLM 返回 2 个并行工具调用,并行执行"""
|
||
tool_a = _SlowTool(name="search_web", delay=0.1, result={"results": ["Python best practices"]})
|
||
tool_b = _SlowTool(name="search_docs", delay=0.1, result={"docs": ["Official Python docs"]})
|
||
|
||
# LLM 返回 2 个并行工具调用
|
||
tool_call_response = LLMResponse(
|
||
content="",
|
||
model="test-model",
|
||
usage=TokenUsage(prompt_tokens=50, completion_tokens=20),
|
||
tool_calls=[
|
||
ToolCall(id="tc_1", name="search_web", arguments={"query": "python", "_parallelizable": True}),
|
||
ToolCall(id="tc_2", name="search_docs", arguments={"topic": "python", "_parallelizable": True}),
|
||
],
|
||
)
|
||
final_response = _make_llm_response("Based on search results, Python best practices include...")
|
||
|
||
gateway = MagicMock(spec=LLMGateway)
|
||
gateway.chat = AsyncMock(side_effect=[tool_call_response, final_response])
|
||
|
||
engine = ReActEngine(llm_gateway=gateway, parallel_tools="auto")
|
||
|
||
start = time.monotonic()
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Search for Python best practices"}],
|
||
tools=[tool_a, tool_b],
|
||
)
|
||
elapsed = time.monotonic() - start
|
||
|
||
assert isinstance(result, ReActResult)
|
||
assert tool_a.call_count == 1
|
||
assert tool_b.call_count == 1
|
||
# 并行执行应比串行快
|
||
assert elapsed < 0.25, f"Parallel execution too slow: {elapsed:.2f}s"
|
||
|
||
# 验证轨迹包含两个工具调用
|
||
tool_steps = [s for s in result.trajectory if s.action == "tool_call"]
|
||
assert len(tool_steps) == 2
|
||
names = {s.tool_name for s in tool_steps}
|
||
assert names == {"search_web", "search_docs"}
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_auto_parallel_with_serial_tool_mixed(self):
|
||
"""混合场景:1 个串行工具 + 2 个并行工具"""
|
||
tool_serial = _SlowTool(name="init_context", delay=0.05, result={"context": "ready"})
|
||
tool_para_a = _SlowTool(name="search_web", delay=0.1, result={"results": ["web result"]})
|
||
tool_para_b = _SlowTool(name="search_docs", delay=0.1, result={"docs": ["doc result"]})
|
||
|
||
tool_call_response = LLMResponse(
|
||
content="",
|
||
model="test-model",
|
||
usage=TokenUsage(prompt_tokens=50, completion_tokens=20),
|
||
tool_calls=[
|
||
ToolCall(id="tc_1", name="init_context", arguments={"project": "test"}),
|
||
ToolCall(id="tc_2", name="search_web", arguments={"query": "test", "_parallelizable": True}),
|
||
ToolCall(id="tc_3", name="search_docs", arguments={"topic": "test", "_parallelizable": True}),
|
||
],
|
||
)
|
||
final_response = _make_llm_response("Combined result from all tools")
|
||
|
||
gateway = MagicMock(spec=LLMGateway)
|
||
gateway.chat = AsyncMock(side_effect=[tool_call_response, final_response])
|
||
|
||
engine = ReActEngine(llm_gateway=gateway, parallel_tools="auto")
|
||
|
||
result = await engine.execute(
|
||
messages=[{"role": "user", "content": "Initialize and search"}],
|
||
tools=[tool_serial, tool_para_a, tool_para_b],
|
||
)
|
||
|
||
assert isinstance(result, ReActResult)
|
||
assert tool_serial.call_count == 1
|
||
assert tool_para_a.call_count == 1
|
||
assert tool_para_b.call_count == 1
|
||
|
||
# 所有工具结果都在轨迹中
|
||
tool_steps = [s for s in result.trajectory if s.action == "tool_call"]
|
||
assert len(tool_steps) == 3
|