"""私董会 (Board Meeting) 回测脚本 — 能力评估与回归测试 对 BoardRouter / BoardTeam / BoardOrchestrator 进行系统性回测, 覆盖以下能力维度: 1. 前缀路由准确性 (Prefix Routing Accuracy) - @board 前缀匹配 - @board:expert1,expert2 显式专家 - @board:private_board 显式模板 - 非 @board 输入不应误匹配 2. 主题提取准确性 (Topic Extraction Accuracy) - 从 @board topic 提取 topic - 从 @board:experts topic 提取 topic - 空主题处理 3. 专家名验证 (Expert Name Validation) - 有效名称接受 - 无效名称拒绝(含空格、特殊字符) - MAX_EXPERTS=10 上限截断 4. 模板加载 (Template Loading) - 默认 private_board 模板成员加载 - 配置目录 YAML 加载 5. 停止命令检测 (Stop Command Detection) - /stop, 停止讨论, stop, 结束讨论 6. BoardTeam 状态机 (BoardTeam State Machine) - FORMING → DISCUSSING → CONCLUDING → COMPLETED → DISSOLVED 7. 讨论历史管理 (Discussion History Management) - 添加发言 - 历史格式化 - 用户干预 8. BoardRoutingResult 数据结构 (Data Structure Integrity) - 默认值验证 - 匹配结果字段填充 9. 回归测试 (Regression: No Interference) - @board 路由不干扰 @team / @skill / 普通聊天 运行方式: pytest tests/unit/experts/test_board_backtest.py -v pytest tests/unit/experts/test_board_backtest.py -v -k "TestPrefixRouting" """ from __future__ import annotations import pytest from agentkit.experts.board_orchestrator import BoardOrchestrator from agentkit.experts.board_router import ( BOARD_PREFIX_PATTERN, DEFAULT_TEMPLATE, MAX_EXPERTS, BoardRouter, BoardRoutingResult, ) from agentkit.experts.board import BoardStatus, BoardTeam from agentkit.experts.config import ExpertConfig, ExpertTemplate from agentkit.experts.registry import ExpertTemplateRegistry # ── 辅助函数 ────────────────────────────────────────────── def _make_expert_template(name: str, persona: str = "测试专家") -> ExpertTemplate: """创建测试用 ExpertTemplate""" config = ExpertConfig( name=name, agent_type="expert", persona=persona, thinking_style="analytical", bound_skills=[], task_mode="llm_generate", prompt={"identity": persona}, ) return ExpertTemplate(name=name, config=config, is_builtin=True, description=persona) def _make_registry_with_experts() -> ExpertTemplateRegistry: """创建包含预注册专家模板的注册中心""" registry = ExpertTemplateRegistry() registry.register(_make_expert_template("elon_musk", persona="Elon Musk")) registry.register(_make_expert_template("jeff_bezos", persona="Jeff Bezos")) registry.register(_make_expert_template("allenzhang", persona="张小龙")) return registry # ═══════════════════════════════════════════════════════════════════════════ # 1. 前缀路由准确性 (Prefix Routing Accuracy) # ═══════════════════════════════════════════════════════════════════════════ class TestPrefixRouting: """回测:@board 前缀路由准确性""" def setup_method(self) -> None: self.router = BoardRouter(template_registry=ExpertTemplateRegistry()) @pytest.mark.parametrize( "content,expected_matched,expected_board_mode", [ ("@board 讨论主题", True, True), ("@board:elon_musk,jeff_bezos 主题", True, True), ("@board:private_board 主题", True, True), ("@board", True, True), ("讨论一下市场策略", False, False), ("@team:analyst,writer 任务", False, False), ("@skill:react_agent 查看ip", False, False), ("普通聊天消息", False, False), ], ids=[ "board_default", "board_explicit_experts", "board_explicit_template", "board_no_topic", "plain_text", "team_prefix", "skill_prefix", "chitchat", ], ) def test_prefix_matching( self, content: str, expected_matched: bool, expected_board_mode: bool ) -> None: """验证 @board 前缀匹配准确性""" result = self.router.resolve(content) assert result.matched == expected_matched, ( f"matched mismatch for {content!r}: " f"expected {expected_matched}, got {result.matched}" ) assert result.board_mode == expected_board_mode, ( f"board_mode mismatch for {content!r}: " f"expected {expected_board_mode}, got {result.board_mode}" ) def test_regex_pattern_directly(self) -> None: """直接测试正则表达式 BOARD_PREFIX_PATTERN""" # 匹配 @board m = BOARD_PREFIX_PATTERN.match("@board 主题") assert m is not None assert m.group(1) is None # no expert list assert m.group(2).strip() == "主题" # 匹配 @board:experts m = BOARD_PREFIX_PATTERN.match("@board:a,b 主题") assert m is not None assert m.group(1) == "a,b" assert m.group(2).strip() == "主题" # 不匹配 @team m = BOARD_PREFIX_PATTERN.match("@team task") assert m is None def test_default_template_uses_private_board(self) -> None: """@board 无指定专家时应使用 private_board 默认模板""" result = self.router.resolve("@board 讨论主题") assert result.use_default_template is True assert result.match_method == "explicit_board" def test_explicit_private_board_template(self) -> None: """@board:private_board 应显式使用默认模板""" result = self.router.resolve("@board:private_board 讨论主题") assert result.use_default_template is True assert result.board_mode is True # ═══════════════════════════════════════════════════════════════════════════ # 2. 主题提取准确性 (Topic Extraction Accuracy) # ═══════════════════════════════════════════════════════════════════════════ class TestTopicExtraction: """回测:讨论主题提取准确性""" def setup_method(self) -> None: self.router = BoardRouter(template_registry=ExpertTemplateRegistry()) @pytest.mark.parametrize( "content,expected_topic", [ ("@board 讨论是否应该进入东南亚市场", "讨论是否应该进入东南亚市场"), ("@board AI产品定价策略应该怎么做", "AI产品定价策略应该怎么做"), ("@board:elon_musk,jeff_bezos 火星商业化方案", "火星商业化方案"), ("@board:private_board 创业融资策略", "创业融资策略"), ("@board", ""), ("@board ", ""), ], ids=[ "default_chinese", "default_chinese_2", "explicit_experts", "explicit_template", "empty_topic", "whitespace_only", ], ) def test_topic_extraction(self, content: str, expected_topic: str) -> None: """验证从 @board 输入中提取讨论主题""" result = self.router.resolve(content) actual = " ".join(result.topic.split()) assert actual == expected_topic, ( f"topic mismatch for {content!r}: " f"expected {expected_topic!r}, got {actual!r}" ) def test_colon_no_experts(self) -> None: """@board: topic(冒号后无专家名)的边界行为""" # \S+ requires non-whitespace after colon, so ": topic" → group(1)=None # The colon becomes part of the topic result = self.router.resolve("@board: topic") assert result.matched is True assert result.board_mode is True # group(1) is None because \S+ doesn't match " " (space after colon) assert result.use_default_template is True # ═══════════════════════════════════════════════════════════════════════════ # 3. 专家名验证 (Expert Name Validation) # ═══════════════════════════════════════════════════════════════════════════ class TestExpertNameValidation: """回测:专家名格式验证与上限""" def setup_method(self) -> None: self.router = BoardRouter(template_registry=ExpertTemplateRegistry()) @pytest.mark.parametrize( "content,expected_count", [ ("@board:elon_musk,jeff_bezos 主题", 2), ("@board:elon_musk,jeff_bezos,allenzhang 主题", 3), ("@board:charlie_munger,warren_buffett,paul_graham 主题", 3), ("@board:a,b,c,d,e,f,g,h,i,j 主题", 10), ("@board:a,b,c,d,e,f,g,h,i,j,k 主题", 10), # 11 → capped to 10 ], ids=["two", "three", "three_alt", "exactly_ten", "eleven_capped"], ) def test_valid_expert_count(self, content: str, expected_count: int) -> None: """验证有效专家名数量""" result = self.router.resolve(content) assert len(result.specified_experts) == expected_count, ( f"expert count mismatch for {content!r}: " f"expected {expected_count}, got {len(result.specified_experts)}" ) def test_invalid_names_rejected(self) -> None: """全部无效专家名时应回退到默认模板""" # @#$ are not valid per _EXPERT_NAME_RE = ^[a-zA-Z0-9_-]{1,64}$ result = self.router.resolve("@board:@#$ 主题") # All names invalid → should fall back to default template assert result.use_default_template is True, ( "should fall back to default template when all names are invalid" ) assert len(result.specified_experts) > 0, ( f"default template members should be loaded, got: {result.specified_experts}" ) def test_max_experts_constant(self) -> None: """MAX_EXPERTS 应为 10""" assert MAX_EXPERTS == 10 def test_resolve_expert_configs_first_is_moderator(self) -> None: """resolve_expert_configs 应将首位专家设为主持人""" result = self.router.resolve("@board:expert_a,expert_b 主题") configs = self.router.resolve_expert_configs(result.specified_experts) assert len(configs) == 2 assert configs[0].is_lead is True assert configs[1].is_lead is False def test_resolve_expert_configs_dynamic_generation(self) -> None: """未注册的专家名应动态生成 ExpertConfig""" result = self.router.resolve("@board:dynamic_expert 主题") configs = self.router.resolve_expert_configs(result.specified_experts) assert len(configs) == 1 assert configs[0].name == "dynamic_expert" assert configs[0].is_lead is True def test_mixed_valid_invalid_names(self) -> None: """混合有效+无效专家名:无效名被过滤,有效名保留""" result = self.router.resolve("@board:elon_musk,@#$,jeff_bezos 主题") assert result.specified_experts == ["elon_musk", "jeff_bezos"], ( f"invalid names should be filtered, got: {result.specified_experts}" ) assert result.use_default_template is False @pytest.mark.parametrize( "name_length,expected_valid", [ (64, True), # exactly 64 chars — valid (65, False), # 65 chars — invalid (exceeds {1,64}) ], ids=["boundary_64_valid", "boundary_65_invalid"], ) def test_expert_name_length_boundary(self, name_length: int, expected_valid: bool) -> None: """专家名长度边界:64 字符有效,65 字符无效""" name = "a" * name_length result = self.router.resolve(f"@board:{name} 主题") if expected_valid: assert len(result.specified_experts) == 1 assert result.specified_experts[0] == name else: # All names invalid → fallback to default template assert result.use_default_template is True def test_resolve_expert_configs_no_template_mutation(self) -> None: """resolve_expert_configs 不应修改注册表中的共享模板配置""" registry = _make_registry_with_experts() router = BoardRouter(template_registry=registry) # First call: elon_musk is lead configs1 = router.resolve_expert_configs(["elon_musk", "jeff_bezos"]) assert configs1[0].is_lead is True assert configs1[1].is_lead is False # Second call: jeff_bezos is lead (reversed order) configs2 = router.resolve_expert_configs(["jeff_bezos", "elon_musk"]) assert configs2[0].is_lead is True assert configs2[1].is_lead is False # Verify first call's configs are NOT mutated by the second call assert configs1[0].is_lead is True, ( "first call's is_lead was mutated by second call (shared template bug)" ) assert configs1[1].is_lead is False # ═══════════════════════════════════════════════════════════════════════════ # 4. 模板加载 (Template Loading) # ═══════════════════════════════════════════════════════════════════════════ class TestTemplateLoading: """回测:专家模板加载""" def test_default_template_name(self) -> None: """DEFAULT_TEMPLATE 应为 'private_board'""" assert DEFAULT_TEMPLATE == "private_board" def test_fallback_default_members(self) -> None: """空注册表时应返回回退默认成员列表""" router = BoardRouter(template_registry=ExpertTemplateRegistry()) members = router._load_default_template_members() assert len(members) > 0 assert len(members) <= MAX_EXPERTS # 回退列表应包含预设专家 assert "elon_musk" in members assert "jeff_bezos" in members def test_default_template_members_from_registry(self) -> None: """注册表中存在 private_board 模板时应从模板加载成员""" registry = _make_registry_with_experts() # Register a private_board template with bound_skills as members board_config = ExpertConfig( name="private_board", agent_type="expert", persona="私董会模板", bound_skills=["elon_musk", "jeff_bezos", "allenzhang"], task_mode="llm_generate", prompt={"identity": "Private Board"}, ) registry.register( ExpertTemplate( name="private_board", config=board_config, is_builtin=True, description="默认私董会模板", ) ) router = BoardRouter(template_registry=registry) members = router._load_default_template_members() assert members == ["elon_musk", "jeff_bezos", "allenzhang"] assert len(members) <= MAX_EXPERTS def test_load_from_configs_directory(self) -> None: """从 configs/experts/ 目录加载 YAML 模板""" from pathlib import Path experts_dir = Path(__file__).parent.parent.parent.parent / "configs" / "experts" if not experts_dir.is_dir(): pytest.skip(f"configs/experts/ not found at {experts_dir}") registry = ExpertTemplateRegistry() loaded = registry.load_from_directory(str(experts_dir)) # load_from_directory returns a list[ExpertTemplate] assert isinstance(loaded, list) assert len(loaded) >= 5, f"expected ≥5 templates, got {len(loaded)}" # 验证关键专家存在 names = {t.config.name for t in loaded} expected_names = {"elon_musk", "jeff_bezos", "allenzhang", "charlie_munger"} assert expected_names.issubset(names), ( f"missing expected experts: {expected_names - names}" ) # ═══════════════════════════════════════════════════════════════════════════ # 5. 停止命令检测 (Stop Command Detection) # ═══════════════════════════════════════════════════════════════════════════ class TestStopCommandDetection: """回测:停止命令检测""" @pytest.mark.parametrize( "command,expected_stop", [ ("/stop", True), ("停止讨论", True), ("stop", True), ("结束讨论", True), ("继续讨论", False), ("", False), ("请继续", False), ("STOP", False), # case-sensitive ], ids=[ "slash_stop", "chinese_stop", "english_stop", "chinese_end", "continue", "empty", "please_continue", "uppercase_not_match", ], ) def test_stop_command_detection(self, command: str, expected_stop: bool) -> None: """验证停止命令检测""" is_stop = command.strip() in BoardOrchestrator.STOP_COMMANDS assert is_stop == expected_stop, ( f"stop detection mismatch for {command!r}: " f"expected {expected_stop}, got {is_stop}" ) def test_stop_commands_count(self) -> None: """STOP_COMMANDS 应包含 4 个命令""" assert len(BoardOrchestrator.STOP_COMMANDS) == 4 # ═══════════════════════════════════════════════════════════════════════════ # 6. BoardTeam 状态机 (BoardTeam State Machine) # ═══════════════════════════════════════════════════════════════════════════ class TestBoardTeamStateMachine: """回测:BoardTeam 生命周期状态""" def test_initial_status_is_forming(self) -> None: """新建 BoardTeam 初始状态应为 FORMING""" team = BoardTeam() assert team.status == BoardStatus.FORMING def test_status_transitions(self) -> None: """状态转换:FORMING → DISCUSSING → CONCLUDING → COMPLETED → DISSOLVED""" team = BoardTeam() assert team.status == BoardStatus.FORMING team.set_status(BoardStatus.DISCUSSING) assert team.status == BoardStatus.DISCUSSING team.set_status(BoardStatus.CONCLUDING) assert team.status == BoardStatus.CONCLUDING team.set_status(BoardStatus.COMPLETED) assert team.status == BoardStatus.COMPLETED team.set_status(BoardStatus.DISSOLVED) assert team.status == BoardStatus.DISSOLVED def test_team_id_is_unique(self) -> None: """每个 BoardTeam 应有唯一 team_id""" team1 = BoardTeam() team2 = BoardTeam() assert team1.team_id != team2.team_id def test_team_channel_format(self) -> None: """team_channel 应为 'board:{team_id}' 格式""" team = BoardTeam() assert team.team_channel == f"board:{team.team_id}" def test_max_rounds_configurable(self) -> None: """max_rounds 应可配置""" team = BoardTeam(max_rounds=3) assert team.max_rounds == 3 team2 = BoardTeam(max_rounds=10) assert team2.max_rounds == 10 def test_default_max_rounds(self) -> None: """默认 max_rounds 应为 5""" team = BoardTeam() assert team.max_rounds == 5 # ═══════════════════════════════════════════════════════════════════════════ # 7. 讨论历史管理 (Discussion History Management) # ═══════════════════════════════════════════════════════════════════════════ class TestDiscussionHistory: """回测:讨论历史管理""" @pytest.fixture def team(self) -> BoardTeam: return BoardTeam() async def test_add_to_history(self, team: BoardTeam) -> None: """添加发言到历史""" await team.add_to_history(round=1, expert_name="elon_musk", content="First speech") history = team.history assert len(history) == 1 assert history[0]["round"] == 1 assert history[0]["expert_name"] == "elon_musk" assert history[0]["content"] == "First speech" assert history[0]["role"] == "expert" async def test_add_moderator_speech(self, team: BoardTeam) -> None: """添加主持人发言""" await team.add_to_history( round=1, expert_name="moderator", content="Summary", role="moderator" ) history = team.history assert history[0]["role"] == "moderator" async def test_add_user_intervention(self, team: BoardTeam) -> None: """添加用户干预""" await team.add_user_intervention("Please focus on cost") history = team.history assert len(history) == 1 assert history[0]["role"] == "user" assert history[0]["expert_name"] == "user" assert history[0]["content"] == "Please focus on cost" async def test_history_text_format(self, team: BoardTeam) -> None: """历史文本格式化""" await team.add_to_history(round=1, expert_name="elon_musk", content="Speech 1") await team.add_to_history( round=1, expert_name="moderator", content="Round 1 summary", role="moderator" ) await team.add_to_history(round=2, expert_name="jeff_bezos", content="Speech 2") text = team.get_history_text() assert "第1轮" in text assert "elon_musk" in text assert "Speech 1" in text assert "主持人小结" in text assert "第2轮" in text assert "jeff_bezos" in text async def test_history_text_up_to_round(self, team: BoardTeam) -> None: """按轮次过滤历史文本""" await team.add_to_history(round=1, expert_name="a", content="R1") await team.add_to_history(round=2, expert_name="b", content="R2") await team.add_to_history(round=3, expert_name="c", content="R3") text_r2 = team.get_history_text(up_to_round=2) assert "R1" in text_r2 assert "R2" in text_r2 assert "R3" not in text_r2 async def test_consume_user_interventions(self, team: BoardTeam) -> None: """消费用户干预列表""" await team.add_user_intervention("Intervention 1") await team.add_user_intervention("Intervention 2") interventions = team.consume_user_interventions() assert len(interventions) == 2 assert "Intervention 1" in interventions assert "Intervention 2" in interventions # 二次消费应为空 assert team.consume_user_interventions() == [] def test_empty_history_text(self, team: BoardTeam) -> None: """空历史的文本应为空字符串""" assert team.get_history_text() == "" def test_increment_round(self, team: BoardTeam) -> None: """轮次递增""" assert team.current_round == 0 r1 = team.increment_round() assert r1 == 1 assert team.current_round == 1 r2 = team.increment_round() assert r2 == 2 assert team.current_round == 2 # ═══════════════════════════════════════════════════════════════════════════ # 8. BoardRoutingResult 数据结构 (Data Structure Integrity) # ═══════════════════════════════════════════════════════════════════════════ class TestBoardRoutingResult: """回测:BoardRoutingResult 数据结构完整性""" def test_default_values(self) -> None: """默认值应为空/False""" result = BoardRoutingResult() assert result.matched is False assert result.board_mode is False assert result.specified_experts == [] assert result.topic == "" assert result.use_default_template is False assert result.match_method == "" def test_matched_result_fields(self) -> None: """匹配结果的字段应正确填充""" router = BoardRouter(template_registry=ExpertTemplateRegistry()) result = router.resolve("@board:elon_musk,jeff_bezos 讨论主题") assert result.matched is True assert result.board_mode is True assert result.specified_experts == ["elon_musk", "jeff_bezos"] assert result.topic == "讨论主题" assert result.use_default_template is False assert result.match_method == "explicit_board" # ═══════════════════════════════════════════════════════════════════════════ # 9. 回归测试 — 确保不破坏现有路由 (Regression: No Interference) # ═══════════════════════════════════════════════════════════════════════════ class TestNoInterferenceRegression: """回归测试:@board 路由不应干扰其他前缀路由""" def setup_method(self) -> None: self.router = BoardRouter(template_registry=ExpertTemplateRegistry()) @pytest.mark.parametrize( "content", [ "@team:analyst,writer 协作完成任务", "@skill:react_agent 查看ip", "@skill:chat_only 你好", "你好", "搜索golang教程", "执行ls命令", ], ids=[ "team_prefix", "skill_react_prefix", "skill_chat_prefix", "greeting", "search", "shell", ], ) def test_non_board_inputs_not_matched(self, content: str) -> None: """非 @board 输入不应被 BoardRouter 匹配""" result = self.router.resolve(content) assert result.matched is False assert result.board_mode is False assert result.use_default_template is False