fix: resolve all code review issues from cross-validation
1. Critical: Add missing TaskResult import in plan_exec_engine.py 2. Critical: Fix ReWOOEngine param name (max_steps → max_plan_steps) 3. Major: Remove duplicate token counting in reflexion.py 4. Major: LLM audit failure now passes (trusts rule check) instead of failing 5. Major: Fix dict iteration with del using list() copy in lifecycle.py 6. Major: Fix Chinese content tokenization using regex split instead of space split 7. Minor: _is_positive_mention now checks all occurrences, not just the first
This commit is contained in:
parent
79eb8469f9
commit
cc2cd414c9
|
|
@ -281,7 +281,10 @@ class CostAwareRouter:
|
|||
try:
|
||||
# Extract capability-like keywords from content for matching
|
||||
# find_best_agent expects list[str] of required capabilities
|
||||
content_words = [w for w in content.split() if len(w) > 2][:5]
|
||||
# Support both space-separated (English) and punctuation-separated (Chinese) content
|
||||
import re
|
||||
tokens = re.split(r'[\s,,。!?、;:\n]+', content)
|
||||
content_words = [t for t in tokens if len(t) > 1][:5]
|
||||
best_agent = self._org_context.find_best_agent(required_capabilities=content_words)
|
||||
if best_agent is not None:
|
||||
agent_name = best_agent if isinstance(best_agent, str) else getattr(best_agent, "name", str(best_agent))
|
||||
|
|
|
|||
|
|
@ -700,7 +700,7 @@ class ConfigDrivenAgent(BaseAgent, EvolutionMixin):
|
|||
|
||||
rewoo_engine = ReWOOEngine(
|
||||
llm_gateway=self._llm_gateway,
|
||||
max_steps=self._skill_config.max_steps if self._skill_config else 5,
|
||||
max_plan_steps=self._skill_config.max_steps if self._skill_config else 5,
|
||||
default_timeout=300.0,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ from agentkit.core.exceptions import TaskCancelledError, TaskTimeoutError
|
|||
from agentkit.core.goal_planner import GoalPlanner
|
||||
from agentkit.core.plan_executor import PlanExecutor, PlanExecutionResult, StepExecutionResult
|
||||
from agentkit.core.plan_schema import ExecutionPlan, PlanStep, PlanStepStatus
|
||||
from agentkit.core.protocol import CancellationToken, TaskMessage, TaskStatus
|
||||
from agentkit.core.protocol import CancellationToken, TaskMessage, TaskResult, TaskStatus
|
||||
from agentkit.core.react import ReActEvent, ReActResult, ReActStep
|
||||
from agentkit.orchestrator.reflection import PipelineReflector, PipelineReplanner
|
||||
from agentkit.orchestrator.pipeline_schema import Pipeline, PipelineResult, ReflectionReport, StageResult, StageStatus
|
||||
|
|
|
|||
|
|
@ -237,7 +237,6 @@ class ReflexionEngine:
|
|||
agent_name=agent_name,
|
||||
task_type=task_type,
|
||||
)
|
||||
total_tokens += self._extract_usage_tokens(react_result)
|
||||
|
||||
# Track best result
|
||||
if score > best_score:
|
||||
|
|
@ -269,7 +268,6 @@ class ReflexionEngine:
|
|||
agent_name=agent_name,
|
||||
task_type=task_type,
|
||||
)
|
||||
total_tokens += self._extract_usage_tokens(react_result)
|
||||
|
||||
if reflection_text is None:
|
||||
# 反思失败,返回当前最佳结果
|
||||
|
|
@ -672,27 +670,6 @@ class ReflexionEngine:
|
|||
logger.warning(f"Reflection LLM call failed, skipping reflection: {e}")
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _extract_usage_tokens(result: ReActResult) -> int:
|
||||
"""从 LLM 响应中提取实际 token 用量,降级时估算
|
||||
|
||||
尝试从 ReActResult 的 trajectory 中获取最后一步的 usage 信息。
|
||||
如果不可用,基于输出长度估算。
|
||||
"""
|
||||
# 尝试从 trajectory 中获取 usage
|
||||
if result.trajectory:
|
||||
last_step = result.trajectory[-1]
|
||||
# ReActStep 可能携带 usage 信息
|
||||
usage = getattr(last_step, "usage", None) or getattr(last_step, "token_usage", None)
|
||||
if usage and isinstance(usage, dict):
|
||||
total = usage.get("total_tokens", 0)
|
||||
if total > 0:
|
||||
return total
|
||||
|
||||
# 降级:基于输出长度估算(约 4 字符 = 1 token)
|
||||
estimated = max(1, len(result.output) // 4)
|
||||
return estimated
|
||||
|
||||
def _build_reflection_prompt(
|
||||
self,
|
||||
original_prompt: str | None,
|
||||
|
|
|
|||
|
|
@ -409,7 +409,7 @@ class EvolutionMixin:
|
|||
self.pending_soul_updates[pattern].append(reflection)
|
||||
|
||||
# 检查是否有同一类别累积 >= 3 次反思
|
||||
for category, reflections in self.pending_soul_updates.items():
|
||||
for category, reflections in list(self.pending_soul_updates.items()):
|
||||
if len(reflections) >= 3:
|
||||
# 触发 soul 更新
|
||||
from agentkit.tools.memory_tool import MemoryTool
|
||||
|
|
|
|||
|
|
@ -156,23 +156,27 @@ class AlignmentGuard:
|
|||
"""判断 keyword 在 content 中是否为肯定性提及(实际执行/输出)
|
||||
|
||||
如果 keyword 出现在否定语境中(如"我们不会存储X"),不算违规。
|
||||
遍历所有出现位置,只要有一次肯定性提及即返回 True。
|
||||
"""
|
||||
# 找到 keyword 在 content 中的位置
|
||||
idx = content.find(keyword)
|
||||
if idx == -1:
|
||||
return False
|
||||
|
||||
# 检查 keyword 前面是否有否定词
|
||||
prefix = content[max(0, idx - 20) : idx]
|
||||
neg_prefixes = [
|
||||
"不会", "不能", "不要", "没有", "并未", "并未", "无法",
|
||||
"won't", "don't", "not ", "never ", "no ",
|
||||
]
|
||||
for neg in neg_prefixes:
|
||||
if neg in prefix:
|
||||
start = 0
|
||||
while True:
|
||||
idx = content.find(keyword, start)
|
||||
if idx == -1:
|
||||
return False
|
||||
|
||||
return True
|
||||
# 检查 keyword 前面是否有否定词
|
||||
prefix = content[max(0, idx - 20) : idx]
|
||||
neg_prefixes = [
|
||||
"不会", "不能", "不要", "没有", "并未", "并未", "无法",
|
||||
"won't", "don't", "not ", "never ", "no ",
|
||||
]
|
||||
is_negated = any(neg in prefix for neg in neg_prefixes)
|
||||
|
||||
if not is_negated:
|
||||
return True
|
||||
|
||||
# 继续搜索下一个出现位置
|
||||
start = idx + len(keyword)
|
||||
|
||||
@staticmethod
|
||||
def _extract_text(output: dict[str, Any]) -> str:
|
||||
|
|
@ -223,8 +227,8 @@ class AlignmentGuard:
|
|||
except Exception as e:
|
||||
logger.warning(f"LLM audit failed: {e}")
|
||||
return AlignmentCheckResult(
|
||||
passed=False,
|
||||
violations=[f"LLM audit unavailable: {e}"],
|
||||
passed=True,
|
||||
violations=[f"LLM audit unavailable (delegated to rule check): {e}"],
|
||||
checked_by="rule",
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue