From ec51dbb259c0410283d004d1610c4acd61ccc537 Mon Sep 17 00:00:00 2001
From: chiguyong <chiguyong@beyondsoft.com>
Date: Thu, 11 Jun 2026 13:04:36 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20optimize=E5=8A=A3=E5=8A=BF=E9=A1=B9=20?=
 =?UTF-8?q?=E2=80=94=20=E6=8B=8D=E5=8D=96=E5=BC=80=E5=85=B3/=E5=AE=A1?=
 =?UTF-8?q?=E8=AE=A1=E9=87=87=E6=A0=B7/=E7=BA=BF=E7=A8=8B=E5=AE=89?=
 =?UTF-8?q?=E5=85=A8/=E8=AF=84=E5=88=86=E9=94=9A=E5=AE=9A?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. 拍卖机制: 已有配置开关(marketplace.auction_enabled), 默认关闭
2. LLM审计采样: 新增 audit_sample_rate (0.0-1.0), 默认1.0, 可降低审计频率
3. AlignmentConfig.from_dict: 忽略未知键, 防止YAML额外字段崩溃
4. 配置热重载线程安全: 用 threading.Event 替代布尔标志, 消除数据竞态
5. Reflexion评分锚定: 添加评分维度(Completeness/Correctness/Clarity)和锚定点
---
 src/agentkit/core/reflexion.py    | 13 ++++++++++++-
 src/agentkit/quality/alignment.py | 16 ++++++++++++++--
 src/agentkit/server/app.py        | 15 +++++++++++----
 3 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/src/agentkit/core/reflexion.py b/src/agentkit/core/reflexion.py
index 100ecd2..ac5152d 100644
--- a/src/agentkit/core/reflexion.py
+++ b/src/agentkit/core/reflexion.py
@@ -564,10 +564,21 @@ class ReflexionEngine:
             f"## Task\n{task_description[:500]}\n\n"
             f"## Result\n{react_result.output[:1000]}\n\n"
             f"## Status\n{react_result.status}\n\n"
+            "## Scoring Rubric\n"
+            "Score based on these dimensions (equal weight):\n"
+            "- **Completeness**: Does the result address all aspects of the task?\n"
+            "- **Correctness**: Is the information accurate and logically sound?\n"
+            "- **Clarity**: Is the result well-structured and easy to understand?\n\n"
+            "## Anchor Points\n"
+            "- 0.9-1.0: Fully addresses task, accurate, clear, no significant issues\n"
+            "- 0.7-0.89: Addresses most of task, minor gaps or imprecisions\n"
+            "- 0.5-0.69: Partially addresses task, notable gaps or errors\n"
+            "- 0.3-0.49: Significant gaps or errors, incomplete\n"
+            "- 0.0-0.29: Fails to address task, major errors or irrelevant\n\n"
             "## Required Output Format\n"
             "Provide your evaluation in the following JSON format:\n"
             "```json\n"
-            '{"score": 0.0-1.0, "reasoning": "brief explanation"}\n'
+            '{"score": 0.0-1.0, "reasoning": "brief explanation referencing rubric dimensions"}\n'
             "```"
         )
 
diff --git a/src/agentkit/quality/alignment.py b/src/agentkit/quality/alignment.py
index 56fa347..fb5f1cf 100644
--- a/src/agentkit/quality/alignment.py
+++ b/src/agentkit/quality/alignment.py
@@ -18,6 +18,14 @@ class AlignmentConfig:
     cascade_max_depth: int = 3
     audit_enabled: bool = False
     audit_model: str = "default"
+    audit_sample_rate: float = 1.0  # 审计采样率 0.0-1.0，1.0=每次都审计
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "AlignmentConfig":
+        """从字典创建，忽略未知键"""
+        known_fields = {f.name for f in cls.__dataclass_fields__.values()}
+        filtered = {k: v for k, v in data.items() if k in known_fields}
+        return cls(**filtered)
 
 
 @dataclass
@@ -93,9 +101,13 @@ class AlignmentGuard:
                 checked_by="rule",
             )
 
-        # 2. LLM 语义检查（仅当 audit_enabled=True 且有 llm_gateway）
+        # 2. LLM 语义检查（仅当 audit_enabled=True 且有 llm_gateway，按采样率执行）
         if self._config.audit_enabled and self._llm_gateway is not None:
-            return await self._llm_check(output, effective_constraints)
+            import random
+            if random.random() < self._config.audit_sample_rate:
+                return await self._llm_check(output, effective_constraints)
+            # 采样未命中，信任规则检查结果
+            logger.debug("LLM audit skipped (sample rate=%.2f)", self._config.audit_sample_rate)
 
         return AlignmentCheckResult(passed=True, checked_by="rule")
 
diff --git a/src/agentkit/server/app.py b/src/agentkit/server/app.py
index 9f47bc4..297d4b9 100644
--- a/src/agentkit/server/app.py
+++ b/src/agentkit/server/app.py
@@ -229,17 +229,24 @@ def _on_config_change(app: FastAPI, config: ServerConfig) -> None:
     - Config version is incremented for audit tracking
 
     Uses a lock to prevent concurrent config reloads from racing.
+    Thread-safe: uses threading.Event for cross-thread signaling.
     """
+    import threading
+
     lock: asyncio.Lock = app.state._config_reload_lock
 
-    app.state._config_reload_pending = True
+    # Thread-safe: set pending flag via threading.Event or call_soon_threadsafe
+    if not hasattr(app.state, "_config_reload_event"):
+        app.state._config_reload_event = threading.Event()
+
+    app.state._config_reload_event.set()
 
     async def _reload():
         if lock.locked():
             return  # Another reload running; it will check pending flag
         async with lock:
-            while getattr(app.state, "_config_reload_pending", False):
-                app.state._config_reload_pending = False
+            while app.state._config_reload_event.is_set():
+                app.state._config_reload_event.clear()
                 # Increment config version for audit
                 current_version = getattr(app.state, "config_version", 0) + 1
                 app.state.config_version = current_version
@@ -452,7 +459,7 @@ def create_app(
     alignment_config_data = {}
     if server_config and hasattr(server_config, "alignment") and server_config.alignment:
         alignment_config_data = server_config.alignment
-    alignment_config = AlignmentConfig(**alignment_config_data)
+    alignment_config = AlignmentConfig.from_dict(alignment_config_data)
     alignment_guard = AlignmentGuard(config=alignment_config, llm_gateway=app.state.llm_gateway)
     app.state.alignment_guard = alignment_guard