feat: optimize劣势项 — 拍卖开关/审计采样/线程安全/评分锚定

1. 拍卖机制: 已有配置开关(marketplace.auction_enabled), 默认关闭 2. LLM审计采样: 新增 audit_sample_rate (0.0-1.0), 默认1.0, 可降低审计频率 3. AlignmentConfig.from_dict: 忽略未知键, 防止YAML额外字段崩溃 4. 配置热重载线程安全: 用 threading.Event 替代布尔标志, 消除数据竞态 5. Reflexion评分锚定: 添加评分维度(Completeness/Correctness/Clarity)和锚定点
2026-06-11 13:04:36 +08:00 · 2026-06-11 13:04:36 +08:00 · ec51dbb259
parent cc2cd414c9
commit ec51dbb259
3 changed files with 37 additions and 7 deletions
--- a/src/agentkit/core/reflexion.py
+++ b/src/agentkit/core/reflexion.py
@ -564,10 +564,21 @@ class ReflexionEngine:
            f"## Task\n{task_description[:500]}\n\n"
            f"## Result\n{react_result.output[:1000]}\n\n"
            f"## Status\n{react_result.status}\n\n"
+            "## Scoring Rubric\n"
+            "Score based on these dimensions (equal weight):\n"
+            "- **Completeness**: Does the result address all aspects of the task?\n"
+            "- **Correctness**: Is the information accurate and logically sound?\n"
+            "- **Clarity**: Is the result well-structured and easy to understand?\n\n"
+            "## Anchor Points\n"
+            "- 0.9-1.0: Fully addresses task, accurate, clear, no significant issues\n"
+            "- 0.7-0.89: Addresses most of task, minor gaps or imprecisions\n"
+            "- 0.5-0.69: Partially addresses task, notable gaps or errors\n"
+            "- 0.3-0.49: Significant gaps or errors, incomplete\n"
+            "- 0.0-0.29: Fails to address task, major errors or irrelevant\n\n"
            "## Required Output Format\n"
            "Provide your evaluation in the following JSON format:\n"
            "```json\n"
-            '{"score": 0.0-1.0, "reasoning": "brief explanation"}\n'
+            '{"score": 0.0-1.0, "reasoning": "brief explanation referencing rubric dimensions"}\n'
            "```"
        )

--- a/src/agentkit/quality/alignment.py
+++ b/src/agentkit/quality/alignment.py
@ -18,6 +18,14 @@ class AlignmentConfig:
    cascade_max_depth: int = 3
    audit_enabled: bool = False
    audit_model: str = "default"
+    audit_sample_rate: float = 1.0  # 审计采样率 0.0-1.0，1.0=每次都审计
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "AlignmentConfig":
+        """从字典创建，忽略未知键"""
+        known_fields = {f.name for f in cls.__dataclass_fields__.values()}
+        filtered = {k: v for k, v in data.items() if k in known_fields}
+        return cls(**filtered)


@dataclass
@ -93,9 +101,13 @@ class AlignmentGuard:
                checked_by="rule",
            )

-        # 2. LLM 语义检查（仅当 audit_enabled=True 且有 llm_gateway）
+        # 2. LLM 语义检查（仅当 audit_enabled=True 且有 llm_gateway，按采样率执行）
        if self._config.audit_enabled and self._llm_gateway is not None:
-            return await self._llm_check(output, effective_constraints)
+            import random
+            if random.random() < self._config.audit_sample_rate:
+                return await self._llm_check(output, effective_constraints)
+            # 采样未命中，信任规则检查结果
+            logger.debug("LLM audit skipped (sample rate=%.2f)", self._config.audit_sample_rate)

        return AlignmentCheckResult(passed=True, checked_by="rule")

--- a/src/agentkit/server/app.py
+++ b/src/agentkit/server/app.py
@ -229,17 +229,24 @@ def _on_config_change(app: FastAPI, config: ServerConfig) -> None:
    - Config version is incremented for audit tracking

    Uses a lock to prevent concurrent config reloads from racing.
+    Thread-safe: uses threading.Event for cross-thread signaling.
    """
+    import threading
+
    lock: asyncio.Lock = app.state._config_reload_lock

-    app.state._config_reload_pending = True
+    # Thread-safe: set pending flag via threading.Event or call_soon_threadsafe
+    if not hasattr(app.state, "_config_reload_event"):
+        app.state._config_reload_event = threading.Event()
+
+    app.state._config_reload_event.set()

    async def _reload():
        if lock.locked():
            return  # Another reload running; it will check pending flag
        async with lock:
-            while getattr(app.state, "_config_reload_pending", False):
-                app.state._config_reload_pending = False
+            while app.state._config_reload_event.is_set():
+                app.state._config_reload_event.clear()
                # Increment config version for audit
                current_version = getattr(app.state, "config_version", 0) + 1
                app.state.config_version = current_version
@ -452,7 +459,7 @@ def create_app(
    alignment_config_data = {}
    if server_config and hasattr(server_config, "alignment") and server_config.alignment:
        alignment_config_data = server_config.alignment
-    alignment_config = AlignmentConfig(**alignment_config_data)
+    alignment_config = AlignmentConfig.from_dict(alignment_config_data)
    alignment_guard = AlignmentGuard(config=alignment_config, llm_gateway=app.state.llm_gateway)
    app.state.alignment_guard = alignment_guard