feat: optimize劣势项 — 拍卖开关/审计采样/线程安全/评分锚定
1. 拍卖机制: 已有配置开关(marketplace.auction_enabled), 默认关闭 2. LLM审计采样: 新增 audit_sample_rate (0.0-1.0), 默认1.0, 可降低审计频率 3. AlignmentConfig.from_dict: 忽略未知键, 防止YAML额外字段崩溃 4. 配置热重载线程安全: 用 threading.Event 替代布尔标志, 消除数据竞态 5. Reflexion评分锚定: 添加评分维度(Completeness/Correctness/Clarity)和锚定点
This commit is contained in:
parent
cc2cd414c9
commit
ec51dbb259
|
|
@ -564,10 +564,21 @@ class ReflexionEngine:
|
|||
f"## Task\n{task_description[:500]}\n\n"
|
||||
f"## Result\n{react_result.output[:1000]}\n\n"
|
||||
f"## Status\n{react_result.status}\n\n"
|
||||
"## Scoring Rubric\n"
|
||||
"Score based on these dimensions (equal weight):\n"
|
||||
"- **Completeness**: Does the result address all aspects of the task?\n"
|
||||
"- **Correctness**: Is the information accurate and logically sound?\n"
|
||||
"- **Clarity**: Is the result well-structured and easy to understand?\n\n"
|
||||
"## Anchor Points\n"
|
||||
"- 0.9-1.0: Fully addresses task, accurate, clear, no significant issues\n"
|
||||
"- 0.7-0.89: Addresses most of task, minor gaps or imprecisions\n"
|
||||
"- 0.5-0.69: Partially addresses task, notable gaps or errors\n"
|
||||
"- 0.3-0.49: Significant gaps or errors, incomplete\n"
|
||||
"- 0.0-0.29: Fails to address task, major errors or irrelevant\n\n"
|
||||
"## Required Output Format\n"
|
||||
"Provide your evaluation in the following JSON format:\n"
|
||||
"```json\n"
|
||||
'{"score": 0.0-1.0, "reasoning": "brief explanation"}\n'
|
||||
'{"score": 0.0-1.0, "reasoning": "brief explanation referencing rubric dimensions"}\n'
|
||||
"```"
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,14 @@ class AlignmentConfig:
|
|||
cascade_max_depth: int = 3
|
||||
audit_enabled: bool = False
|
||||
audit_model: str = "default"
|
||||
audit_sample_rate: float = 1.0 # 审计采样率 0.0-1.0,1.0=每次都审计
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any]) -> "AlignmentConfig":
|
||||
"""从字典创建,忽略未知键"""
|
||||
known_fields = {f.name for f in cls.__dataclass_fields__.values()}
|
||||
filtered = {k: v for k, v in data.items() if k in known_fields}
|
||||
return cls(**filtered)
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -93,9 +101,13 @@ class AlignmentGuard:
|
|||
checked_by="rule",
|
||||
)
|
||||
|
||||
# 2. LLM 语义检查(仅当 audit_enabled=True 且有 llm_gateway)
|
||||
# 2. LLM 语义检查(仅当 audit_enabled=True 且有 llm_gateway,按采样率执行)
|
||||
if self._config.audit_enabled and self._llm_gateway is not None:
|
||||
return await self._llm_check(output, effective_constraints)
|
||||
import random
|
||||
if random.random() < self._config.audit_sample_rate:
|
||||
return await self._llm_check(output, effective_constraints)
|
||||
# 采样未命中,信任规则检查结果
|
||||
logger.debug("LLM audit skipped (sample rate=%.2f)", self._config.audit_sample_rate)
|
||||
|
||||
return AlignmentCheckResult(passed=True, checked_by="rule")
|
||||
|
||||
|
|
|
|||
|
|
@ -229,17 +229,24 @@ def _on_config_change(app: FastAPI, config: ServerConfig) -> None:
|
|||
- Config version is incremented for audit tracking
|
||||
|
||||
Uses a lock to prevent concurrent config reloads from racing.
|
||||
Thread-safe: uses threading.Event for cross-thread signaling.
|
||||
"""
|
||||
import threading
|
||||
|
||||
lock: asyncio.Lock = app.state._config_reload_lock
|
||||
|
||||
app.state._config_reload_pending = True
|
||||
# Thread-safe: set pending flag via threading.Event or call_soon_threadsafe
|
||||
if not hasattr(app.state, "_config_reload_event"):
|
||||
app.state._config_reload_event = threading.Event()
|
||||
|
||||
app.state._config_reload_event.set()
|
||||
|
||||
async def _reload():
|
||||
if lock.locked():
|
||||
return # Another reload running; it will check pending flag
|
||||
async with lock:
|
||||
while getattr(app.state, "_config_reload_pending", False):
|
||||
app.state._config_reload_pending = False
|
||||
while app.state._config_reload_event.is_set():
|
||||
app.state._config_reload_event.clear()
|
||||
# Increment config version for audit
|
||||
current_version = getattr(app.state, "config_version", 0) + 1
|
||||
app.state.config_version = current_version
|
||||
|
|
@ -452,7 +459,7 @@ def create_app(
|
|||
alignment_config_data = {}
|
||||
if server_config and hasattr(server_config, "alignment") and server_config.alignment:
|
||||
alignment_config_data = server_config.alignment
|
||||
alignment_config = AlignmentConfig(**alignment_config_data)
|
||||
alignment_config = AlignmentConfig.from_dict(alignment_config_data)
|
||||
alignment_guard = AlignmentGuard(config=alignment_config, llm_gateway=app.state.llm_gateway)
|
||||
app.state.alignment_guard = alignment_guard
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue