fischer-agentkit/tests/unit/experts/test_board_backtest.py

660 lines
28 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""私董会 (Board Meeting) 回测脚本 — 能力评估与回归测试
对 BoardRouter / BoardTeam / BoardOrchestrator 进行系统性回测,
覆盖以下能力维度:
1. 前缀路由准确性 (Prefix Routing Accuracy)
- @board 前缀匹配
- @board:expert1,expert2 显式专家
- @board:private_board 显式模板
- 非 @board 输入不应误匹配
2. 主题提取准确性 (Topic Extraction Accuracy)
- 从 @board topic 提取 topic
- 从 @board:experts topic 提取 topic
- 空主题处理
3. 专家名验证 (Expert Name Validation)
- 有效名称接受
- 无效名称拒绝(含空格、特殊字符)
- MAX_EXPERTS=10 上限截断
4. 模板加载 (Template Loading)
- 默认 private_board 模板成员加载
- 配置目录 YAML 加载
5. 停止命令检测 (Stop Command Detection)
- /stop, 停止讨论, stop, 结束讨论
6. BoardTeam 状态机 (BoardTeam State Machine)
- FORMING → DISCUSSING → CONCLUDING → COMPLETED → DISSOLVED
7. 讨论历史管理 (Discussion History Management)
- 添加发言
- 历史格式化
- 用户干预
8. BoardRoutingResult 数据结构 (Data Structure Integrity)
- 默认值验证
- 匹配结果字段填充
9. 回归测试 (Regression: No Interference)
- @board 路由不干扰 @team / @skill / 普通聊天
运行方式:
pytest tests/unit/experts/test_board_backtest.py -v
pytest tests/unit/experts/test_board_backtest.py -v -k "TestPrefixRouting"
"""
from __future__ import annotations
import pytest
from agentkit.experts.board_orchestrator import BoardOrchestrator
from agentkit.experts.board_router import (
BOARD_PREFIX_PATTERN,
DEFAULT_TEMPLATE,
MAX_EXPERTS,
BoardRouter,
BoardRoutingResult,
)
from agentkit.experts.board import BoardStatus, BoardTeam
from agentkit.experts.config import ExpertConfig, ExpertTemplate
from agentkit.experts.registry import ExpertTemplateRegistry
# ── 辅助函数 ──────────────────────────────────────────────
def _make_expert_template(name: str, persona: str = "测试专家") -> ExpertTemplate:
"""创建测试用 ExpertTemplate"""
config = ExpertConfig(
name=name,
agent_type="expert",
persona=persona,
thinking_style="analytical",
bound_skills=[],
task_mode="llm_generate",
prompt={"identity": persona},
)
return ExpertTemplate(name=name, config=config, is_builtin=True, description=persona)
def _make_registry_with_experts() -> ExpertTemplateRegistry:
"""创建包含预注册专家模板的注册中心"""
registry = ExpertTemplateRegistry()
registry.register(_make_expert_template("elon_musk", persona="Elon Musk"))
registry.register(_make_expert_template("jeff_bezos", persona="Jeff Bezos"))
registry.register(_make_expert_template("allenzhang", persona="张小龙"))
return registry
# ═══════════════════════════════════════════════════════════════════════════
# 1. 前缀路由准确性 (Prefix Routing Accuracy)
# ═══════════════════════════════════════════════════════════════════════════
class TestPrefixRouting:
"""回测:@board 前缀路由准确性"""
def setup_method(self) -> None:
self.router = BoardRouter(template_registry=ExpertTemplateRegistry())
@pytest.mark.parametrize(
"content,expected_matched,expected_board_mode",
[
("@board 讨论主题", True, True),
("@board:elon_musk,jeff_bezos 主题", True, True),
("@board:private_board 主题", True, True),
("@board", True, True),
("讨论一下市场策略", False, False),
("@team:analyst,writer 任务", False, False),
("@skill:react_agent 查看ip", False, False),
("普通聊天消息", False, False),
],
ids=[
"board_default",
"board_explicit_experts",
"board_explicit_template",
"board_no_topic",
"plain_text",
"team_prefix",
"skill_prefix",
"chitchat",
],
)
def test_prefix_matching(
self, content: str, expected_matched: bool, expected_board_mode: bool
) -> None:
"""验证 @board 前缀匹配准确性"""
result = self.router.resolve(content)
assert result.matched == expected_matched, (
f"matched mismatch for {content!r}: "
f"expected {expected_matched}, got {result.matched}"
)
assert result.board_mode == expected_board_mode, (
f"board_mode mismatch for {content!r}: "
f"expected {expected_board_mode}, got {result.board_mode}"
)
def test_regex_pattern_directly(self) -> None:
"""直接测试正则表达式 BOARD_PREFIX_PATTERN"""
# 匹配 @board
m = BOARD_PREFIX_PATTERN.match("@board 主题")
assert m is not None
assert m.group(1) is None # no expert list
assert m.group(2).strip() == "主题"
# 匹配 @board:experts
m = BOARD_PREFIX_PATTERN.match("@board:a,b 主题")
assert m is not None
assert m.group(1) == "a,b"
assert m.group(2).strip() == "主题"
# 不匹配 @team
m = BOARD_PREFIX_PATTERN.match("@team task")
assert m is None
def test_default_template_uses_private_board(self) -> None:
"""@board 无指定专家时应使用 private_board 默认模板"""
result = self.router.resolve("@board 讨论主题")
assert result.use_default_template is True
assert result.match_method == "explicit_board"
def test_explicit_private_board_template(self) -> None:
"""@board:private_board 应显式使用默认模板"""
result = self.router.resolve("@board:private_board 讨论主题")
assert result.use_default_template is True
assert result.board_mode is True
# ═══════════════════════════════════════════════════════════════════════════
# 2. 主题提取准确性 (Topic Extraction Accuracy)
# ═══════════════════════════════════════════════════════════════════════════
class TestTopicExtraction:
"""回测:讨论主题提取准确性"""
def setup_method(self) -> None:
self.router = BoardRouter(template_registry=ExpertTemplateRegistry())
@pytest.mark.parametrize(
"content,expected_topic",
[
("@board 讨论是否应该进入东南亚市场", "讨论是否应该进入东南亚市场"),
("@board AI产品定价策略应该怎么做", "AI产品定价策略应该怎么做"),
("@board:elon_musk,jeff_bezos 火星商业化方案", "火星商业化方案"),
("@board:private_board 创业融资策略", "创业融资策略"),
("@board", ""),
("@board ", ""),
],
ids=[
"default_chinese",
"default_chinese_2",
"explicit_experts",
"explicit_template",
"empty_topic",
"whitespace_only",
],
)
def test_topic_extraction(self, content: str, expected_topic: str) -> None:
"""验证从 @board 输入中提取讨论主题"""
result = self.router.resolve(content)
actual = " ".join(result.topic.split())
assert actual == expected_topic, (
f"topic mismatch for {content!r}: "
f"expected {expected_topic!r}, got {actual!r}"
)
def test_colon_no_experts(self) -> None:
"""@board: topic冒号后无专家名的边界行为"""
# \S+ requires non-whitespace after colon, so ": topic" → group(1)=None
# The colon becomes part of the topic
result = self.router.resolve("@board: topic")
assert result.matched is True
assert result.board_mode is True
# group(1) is None because \S+ doesn't match " " (space after colon)
assert result.use_default_template is True
# ═══════════════════════════════════════════════════════════════════════════
# 3. 专家名验证 (Expert Name Validation)
# ═══════════════════════════════════════════════════════════════════════════
class TestExpertNameValidation:
"""回测:专家名格式验证与上限"""
def setup_method(self) -> None:
self.router = BoardRouter(template_registry=ExpertTemplateRegistry())
@pytest.mark.parametrize(
"content,expected_count",
[
("@board:elon_musk,jeff_bezos 主题", 2),
("@board:elon_musk,jeff_bezos,allenzhang 主题", 3),
("@board:charlie_munger,warren_buffett,paul_graham 主题", 3),
("@board:a,b,c,d,e,f,g,h,i,j 主题", 10),
("@board:a,b,c,d,e,f,g,h,i,j,k 主题", 10), # 11 → capped to 10
],
ids=["two", "three", "three_alt", "exactly_ten", "eleven_capped"],
)
def test_valid_expert_count(self, content: str, expected_count: int) -> None:
"""验证有效专家名数量"""
result = self.router.resolve(content)
assert len(result.specified_experts) == expected_count, (
f"expert count mismatch for {content!r}: "
f"expected {expected_count}, got {len(result.specified_experts)}"
)
def test_invalid_names_rejected(self) -> None:
"""全部无效专家名时应回退到默认模板"""
# @#$ are not valid per _EXPERT_NAME_RE = ^[a-zA-Z0-9_-]{1,64}$
result = self.router.resolve("@board:@#$ 主题")
# All names invalid → should fall back to default template
assert result.use_default_template is True, (
"should fall back to default template when all names are invalid"
)
assert len(result.specified_experts) > 0, (
f"default template members should be loaded, got: {result.specified_experts}"
)
def test_max_experts_constant(self) -> None:
"""MAX_EXPERTS 应为 10"""
assert MAX_EXPERTS == 10
def test_resolve_expert_configs_first_is_moderator(self) -> None:
"""resolve_expert_configs 应将首位专家设为主持人"""
result = self.router.resolve("@board:expert_a,expert_b 主题")
configs = self.router.resolve_expert_configs(result.specified_experts)
assert len(configs) == 2
assert configs[0].is_lead is True
assert configs[1].is_lead is False
def test_resolve_expert_configs_dynamic_generation(self) -> None:
"""未注册的专家名应动态生成 ExpertConfig"""
result = self.router.resolve("@board:dynamic_expert 主题")
configs = self.router.resolve_expert_configs(result.specified_experts)
assert len(configs) == 1
assert configs[0].name == "dynamic_expert"
assert configs[0].is_lead is True
def test_mixed_valid_invalid_names(self) -> None:
"""混合有效+无效专家名:无效名被过滤,有效名保留"""
result = self.router.resolve("@board:elon_musk,@#$,jeff_bezos 主题")
assert result.specified_experts == ["elon_musk", "jeff_bezos"], (
f"invalid names should be filtered, got: {result.specified_experts}"
)
assert result.use_default_template is False
@pytest.mark.parametrize(
"name_length,expected_valid",
[
(64, True), # exactly 64 chars — valid
(65, False), # 65 chars — invalid (exceeds {1,64})
],
ids=["boundary_64_valid", "boundary_65_invalid"],
)
def test_expert_name_length_boundary(self, name_length: int, expected_valid: bool) -> None:
"""专家名长度边界64 字符有效65 字符无效"""
name = "a" * name_length
result = self.router.resolve(f"@board:{name} 主题")
if expected_valid:
assert len(result.specified_experts) == 1
assert result.specified_experts[0] == name
else:
# All names invalid → fallback to default template
assert result.use_default_template is True
def test_resolve_expert_configs_no_template_mutation(self) -> None:
"""resolve_expert_configs 不应修改注册表中的共享模板配置"""
registry = _make_registry_with_experts()
router = BoardRouter(template_registry=registry)
# First call: elon_musk is lead
configs1 = router.resolve_expert_configs(["elon_musk", "jeff_bezos"])
assert configs1[0].is_lead is True
assert configs1[1].is_lead is False
# Second call: jeff_bezos is lead (reversed order)
configs2 = router.resolve_expert_configs(["jeff_bezos", "elon_musk"])
assert configs2[0].is_lead is True
assert configs2[1].is_lead is False
# Verify first call's configs are NOT mutated by the second call
assert configs1[0].is_lead is True, (
"first call's is_lead was mutated by second call (shared template bug)"
)
assert configs1[1].is_lead is False
# ═══════════════════════════════════════════════════════════════════════════
# 4. 模板加载 (Template Loading)
# ═══════════════════════════════════════════════════════════════════════════
class TestTemplateLoading:
"""回测:专家模板加载"""
def test_default_template_name(self) -> None:
"""DEFAULT_TEMPLATE 应为 'private_board'"""
assert DEFAULT_TEMPLATE == "private_board"
def test_fallback_default_members(self) -> None:
"""空注册表时应返回回退默认成员列表"""
router = BoardRouter(template_registry=ExpertTemplateRegistry())
members = router._load_default_template_members()
assert len(members) > 0
assert len(members) <= MAX_EXPERTS
# 回退列表应包含预设专家
assert "elon_musk" in members
assert "jeff_bezos" in members
def test_default_template_members_from_registry(self) -> None:
"""注册表中存在 private_board 模板时应从模板加载成员"""
registry = _make_registry_with_experts()
# Register a private_board template with bound_skills as members
board_config = ExpertConfig(
name="private_board",
agent_type="expert",
persona="私董会模板",
bound_skills=["elon_musk", "jeff_bezos", "allenzhang"],
task_mode="llm_generate",
prompt={"identity": "Private Board"},
)
registry.register(
ExpertTemplate(
name="private_board",
config=board_config,
is_builtin=True,
description="默认私董会模板",
)
)
router = BoardRouter(template_registry=registry)
members = router._load_default_template_members()
assert members == ["elon_musk", "jeff_bezos", "allenzhang"]
assert len(members) <= MAX_EXPERTS
def test_load_from_configs_directory(self) -> None:
"""从 configs/experts/ 目录加载 YAML 模板"""
from pathlib import Path
experts_dir = Path(__file__).parent.parent.parent.parent / "configs" / "experts"
if not experts_dir.is_dir():
pytest.skip(f"configs/experts/ not found at {experts_dir}")
registry = ExpertTemplateRegistry()
loaded = registry.load_from_directory(str(experts_dir))
# load_from_directory returns a list[ExpertTemplate]
assert isinstance(loaded, list)
assert len(loaded) >= 5, f"expected ≥5 templates, got {len(loaded)}"
# 验证关键专家存在
names = {t.config.name for t in loaded}
expected_names = {"elon_musk", "jeff_bezos", "allenzhang", "charlie_munger"}
assert expected_names.issubset(names), (
f"missing expected experts: {expected_names - names}"
)
# ═══════════════════════════════════════════════════════════════════════════
# 5. 停止命令检测 (Stop Command Detection)
# ═══════════════════════════════════════════════════════════════════════════
class TestStopCommandDetection:
"""回测:停止命令检测"""
@pytest.mark.parametrize(
"command,expected_stop",
[
("/stop", True),
("停止讨论", True),
("stop", True),
("结束讨论", True),
("继续讨论", False),
("", False),
("请继续", False),
("STOP", False), # case-sensitive
],
ids=[
"slash_stop",
"chinese_stop",
"english_stop",
"chinese_end",
"continue",
"empty",
"please_continue",
"uppercase_not_match",
],
)
def test_stop_command_detection(self, command: str, expected_stop: bool) -> None:
"""验证停止命令检测"""
is_stop = command.strip() in BoardOrchestrator.STOP_COMMANDS
assert is_stop == expected_stop, (
f"stop detection mismatch for {command!r}: "
f"expected {expected_stop}, got {is_stop}"
)
def test_stop_commands_count(self) -> None:
"""STOP_COMMANDS 应包含 4 个命令"""
assert len(BoardOrchestrator.STOP_COMMANDS) == 4
# ═══════════════════════════════════════════════════════════════════════════
# 6. BoardTeam 状态机 (BoardTeam State Machine)
# ═══════════════════════════════════════════════════════════════════════════
class TestBoardTeamStateMachine:
"""回测BoardTeam 生命周期状态"""
def test_initial_status_is_forming(self) -> None:
"""新建 BoardTeam 初始状态应为 FORMING"""
team = BoardTeam()
assert team.status == BoardStatus.FORMING
def test_status_transitions(self) -> None:
"""状态转换FORMING → DISCUSSING → CONCLUDING → COMPLETED → DISSOLVED"""
team = BoardTeam()
assert team.status == BoardStatus.FORMING
team.set_status(BoardStatus.DISCUSSING)
assert team.status == BoardStatus.DISCUSSING
team.set_status(BoardStatus.CONCLUDING)
assert team.status == BoardStatus.CONCLUDING
team.set_status(BoardStatus.COMPLETED)
assert team.status == BoardStatus.COMPLETED
team.set_status(BoardStatus.DISSOLVED)
assert team.status == BoardStatus.DISSOLVED
def test_team_id_is_unique(self) -> None:
"""每个 BoardTeam 应有唯一 team_id"""
team1 = BoardTeam()
team2 = BoardTeam()
assert team1.team_id != team2.team_id
def test_team_channel_format(self) -> None:
"""team_channel 应为 'board:{team_id}' 格式"""
team = BoardTeam()
assert team.team_channel == f"board:{team.team_id}"
def test_max_rounds_configurable(self) -> None:
"""max_rounds 应可配置"""
team = BoardTeam(max_rounds=3)
assert team.max_rounds == 3
team2 = BoardTeam(max_rounds=10)
assert team2.max_rounds == 10
def test_default_max_rounds(self) -> None:
"""默认 max_rounds 应为 5"""
team = BoardTeam()
assert team.max_rounds == 5
# ═══════════════════════════════════════════════════════════════════════════
# 7. 讨论历史管理 (Discussion History Management)
# ═══════════════════════════════════════════════════════════════════════════
class TestDiscussionHistory:
"""回测:讨论历史管理"""
@pytest.fixture
def team(self) -> BoardTeam:
return BoardTeam()
async def test_add_to_history(self, team: BoardTeam) -> None:
"""添加发言到历史"""
await team.add_to_history(round=1, expert_name="elon_musk", content="First speech")
history = team.history
assert len(history) == 1
assert history[0]["round"] == 1
assert history[0]["expert_name"] == "elon_musk"
assert history[0]["content"] == "First speech"
assert history[0]["role"] == "expert"
async def test_add_moderator_speech(self, team: BoardTeam) -> None:
"""添加主持人发言"""
await team.add_to_history(
round=1, expert_name="moderator", content="Summary", role="moderator"
)
history = team.history
assert history[0]["role"] == "moderator"
async def test_add_user_intervention(self, team: BoardTeam) -> None:
"""添加用户干预"""
await team.add_user_intervention("Please focus on cost")
history = team.history
assert len(history) == 1
assert history[0]["role"] == "user"
assert history[0]["expert_name"] == "user"
assert history[0]["content"] == "Please focus on cost"
async def test_history_text_format(self, team: BoardTeam) -> None:
"""历史文本格式化"""
await team.add_to_history(round=1, expert_name="elon_musk", content="Speech 1")
await team.add_to_history(
round=1, expert_name="moderator", content="Round 1 summary", role="moderator"
)
await team.add_to_history(round=2, expert_name="jeff_bezos", content="Speech 2")
text = team.get_history_text()
assert "第1轮" in text
assert "elon_musk" in text
assert "Speech 1" in text
assert "主持人小结" in text
assert "第2轮" in text
assert "jeff_bezos" in text
async def test_history_text_up_to_round(self, team: BoardTeam) -> None:
"""按轮次过滤历史文本"""
await team.add_to_history(round=1, expert_name="a", content="R1")
await team.add_to_history(round=2, expert_name="b", content="R2")
await team.add_to_history(round=3, expert_name="c", content="R3")
text_r2 = team.get_history_text(up_to_round=2)
assert "R1" in text_r2
assert "R2" in text_r2
assert "R3" not in text_r2
async def test_consume_user_interventions(self, team: BoardTeam) -> None:
"""消费用户干预列表"""
await team.add_user_intervention("Intervention 1")
await team.add_user_intervention("Intervention 2")
interventions = team.consume_user_interventions()
assert len(interventions) == 2
assert "Intervention 1" in interventions
assert "Intervention 2" in interventions
# 二次消费应为空
assert team.consume_user_interventions() == []
def test_empty_history_text(self, team: BoardTeam) -> None:
"""空历史的文本应为空字符串"""
assert team.get_history_text() == ""
def test_increment_round(self, team: BoardTeam) -> None:
"""轮次递增"""
assert team.current_round == 0
r1 = team.increment_round()
assert r1 == 1
assert team.current_round == 1
r2 = team.increment_round()
assert r2 == 2
assert team.current_round == 2
# ═══════════════════════════════════════════════════════════════════════════
# 8. BoardRoutingResult 数据结构 (Data Structure Integrity)
# ═══════════════════════════════════════════════════════════════════════════
class TestBoardRoutingResult:
"""回测BoardRoutingResult 数据结构完整性"""
def test_default_values(self) -> None:
"""默认值应为空/False"""
result = BoardRoutingResult()
assert result.matched is False
assert result.board_mode is False
assert result.specified_experts == []
assert result.topic == ""
assert result.use_default_template is False
assert result.match_method == ""
def test_matched_result_fields(self) -> None:
"""匹配结果的字段应正确填充"""
router = BoardRouter(template_registry=ExpertTemplateRegistry())
result = router.resolve("@board:elon_musk,jeff_bezos 讨论主题")
assert result.matched is True
assert result.board_mode is True
assert result.specified_experts == ["elon_musk", "jeff_bezos"]
assert result.topic == "讨论主题"
assert result.use_default_template is False
assert result.match_method == "explicit_board"
# ═══════════════════════════════════════════════════════════════════════════
# 9. 回归测试 — 确保不破坏现有路由 (Regression: No Interference)
# ═══════════════════════════════════════════════════════════════════════════
class TestNoInterferenceRegression:
"""回归测试:@board 路由不应干扰其他前缀路由"""
def setup_method(self) -> None:
self.router = BoardRouter(template_registry=ExpertTemplateRegistry())
@pytest.mark.parametrize(
"content",
[
"@team:analyst,writer 协作完成任务",
"@skill:react_agent 查看ip",
"@skill:chat_only 你好",
"你好",
"搜索golang教程",
"执行ls命令",
],
ids=[
"team_prefix",
"skill_react_prefix",
"skill_chat_prefix",
"greeting",
"search",
"shell",
],
)
def test_non_board_inputs_not_matched(self, content: str) -> None:
"""非 @board 输入不应被 BoardRouter 匹配"""
result = self.router.resolve(content)
assert result.matched is False
assert result.board_mode is False
assert result.use_default_template is False