fix: resolve all code review issues from cross-validation

1. Critical: Add missing TaskResult import in plan_exec_engine.py 2. Critical: Fix ReWOOEngine param name (max_steps → max_plan_steps) 3. Major: Remove duplicate token counting in reflexion.py 4. Major: LLM audit failure now passes (trusts rule check) instead of failing 5. Major: Fix dict iteration with del using list() copy in lifecycle.py 6. Major: Fix Chinese content tokenization using regex split instead of space split 7. Minor: _is_positive_mention now checks all occurrences, not just the first
2026-06-11 06:22:35 +08:00 · 2026-06-11 06:22:35 +08:00 · cc2cd414c9
parent 79eb8469f9
commit cc2cd414c9
6 changed files with 27 additions and 43 deletions
--- a/src/agentkit/chat/skill_routing.py
+++ b/src/agentkit/chat/skill_routing.py
@ -281,7 +281,10 @@ class CostAwareRouter:
            try:
                # Extract capability-like keywords from content for matching
                # find_best_agent expects list[str] of required capabilities
-                content_words = [w for w in content.split() if len(w) > 2][:5]
+                # Support both space-separated (English) and punctuation-separated (Chinese) content
+                import re
+                tokens = re.split(r'[\s,，。！？、；：\n]+', content)
+                content_words = [t for t in tokens if len(t) > 1][:5]
                best_agent = self._org_context.find_best_agent(required_capabilities=content_words)
                if best_agent is not None:
                    agent_name = best_agent if isinstance(best_agent, str) else getattr(best_agent, "name", str(best_agent))
--- a/src/agentkit/core/config_driven.py
+++ b/src/agentkit/core/config_driven.py
@ -700,7 +700,7 @@ class ConfigDrivenAgent(BaseAgent, EvolutionMixin):

        rewoo_engine = ReWOOEngine(
            llm_gateway=self._llm_gateway,
-            max_steps=self._skill_config.max_steps if self._skill_config else 5,
+            max_plan_steps=self._skill_config.max_steps if self._skill_config else 5,
            default_timeout=300.0,
        )

--- a/src/agentkit/core/plan_exec_engine.py
+++ b/src/agentkit/core/plan_exec_engine.py
@ -23,7 +23,7 @@ from agentkit.core.exceptions import TaskCancelledError, TaskTimeoutError
 from agentkit.core.goal_planner import GoalPlanner
 from agentkit.core.plan_executor import PlanExecutor, PlanExecutionResult, StepExecutionResult
 from agentkit.core.plan_schema import ExecutionPlan, PlanStep, PlanStepStatus
-from agentkit.core.protocol import CancellationToken, TaskMessage, TaskStatus
+from agentkit.core.protocol import CancellationToken, TaskMessage, TaskResult, TaskStatus
 from agentkit.core.react import ReActEvent, ReActResult, ReActStep
 from agentkit.orchestrator.reflection import PipelineReflector, PipelineReplanner
 from agentkit.orchestrator.pipeline_schema import Pipeline, PipelineResult, ReflectionReport, StageResult, StageStatus
--- a/src/agentkit/core/reflexion.py
+++ b/src/agentkit/core/reflexion.py
@ -237,7 +237,6 @@ class ReflexionEngine:
                    agent_name=agent_name,
                    task_type=task_type,
                )
-                total_tokens += self._extract_usage_tokens(react_result)

                # Track best result
                if score > best_score:
@ -269,7 +268,6 @@ class ReflexionEngine:
                    agent_name=agent_name,
                    task_type=task_type,
                )
-                total_tokens += self._extract_usage_tokens(react_result)

                if reflection_text is None:
                    # 反思失败，返回当前最佳结果
@ -672,27 +670,6 @@ class ReflexionEngine:
            logger.warning(f"Reflection LLM call failed, skipping reflection: {e}")
            return None

-    @staticmethod
-    def _extract_usage_tokens(result: ReActResult) -> int:
-        """从 LLM 响应中提取实际 token 用量，降级时估算
-
-        尝试从 ReActResult 的 trajectory 中获取最后一步的 usage 信息。
-        如果不可用，基于输出长度估算。
-        """
-        # 尝试从 trajectory 中获取 usage
-        if result.trajectory:
-            last_step = result.trajectory[-1]
-            # ReActStep 可能携带 usage 信息
-            usage = getattr(last_step, "usage", None) or getattr(last_step, "token_usage", None)
-            if usage and isinstance(usage, dict):
-                total = usage.get("total_tokens", 0)
-                if total > 0:
-                    return total
-
-        # 降级：基于输出长度估算（约 4 字符 = 1 token）
-        estimated = max(1, len(result.output) // 4)
-        return estimated
-
    def _build_reflection_prompt(
        self,
        original_prompt: str | None,
--- a/src/agentkit/evolution/lifecycle.py
+++ b/src/agentkit/evolution/lifecycle.py
@ -409,7 +409,7 @@ class EvolutionMixin:
            self.pending_soul_updates[pattern].append(reflection)

        # 检查是否有同一类别累积 >= 3 次反思
-        for category, reflections in self.pending_soul_updates.items():
+        for category, reflections in list(self.pending_soul_updates.items()):
            if len(reflections) >= 3:
                # 触发 soul 更新
                from agentkit.tools.memory_tool import MemoryTool
--- a/src/agentkit/quality/alignment.py
+++ b/src/agentkit/quality/alignment.py
@ -156,23 +156,27 @@ class AlignmentGuard:
        """判断 keyword 在 content 中是否为肯定性提及（实际执行/输出）

        如果 keyword 出现在否定语境中（如"我们不会存储X"），不算违规。
+        遍历所有出现位置，只要有一次肯定性提及即返回 True。
        """
-        # 找到 keyword 在 content 中的位置
-        idx = content.find(keyword)
-        if idx == -1:
-            return False
-
-        # 检查 keyword 前面是否有否定词
-        prefix = content[max(0, idx - 20) : idx]
-        neg_prefixes = [
-            "不会", "不能", "不要", "没有", "并未", "并未", "无法",
-            "won't", "don't", "not ", "never ", "no ",
-        ]
-        for neg in neg_prefixes:
-            if neg in prefix:
+        start = 0
+        while True:
+            idx = content.find(keyword, start)
+            if idx == -1:
                return False

-        return True
+            # 检查 keyword 前面是否有否定词
+            prefix = content[max(0, idx - 20) : idx]
+            neg_prefixes = [
+                "不会", "不能", "不要", "没有", "并未", "并未", "无法",
+                "won't", "don't", "not ", "never ", "no ",
+            ]
+            is_negated = any(neg in prefix for neg in neg_prefixes)
+
+            if not is_negated:
+                return True
+
+            # 继续搜索下一个出现位置
+            start = idx + len(keyword)

    @staticmethod
    def _extract_text(output: dict[str, Any]) -> str:
@ -223,8 +227,8 @@ class AlignmentGuard:
        except Exception as e:
            logger.warning(f"LLM audit failed: {e}")
            return AlignmentCheckResult(
-                passed=False,
-                violations=[f"LLM audit unavailable: {e}"],
+                passed=True,
+                violations=[f"LLM audit unavailable (delegated to rule check): {e}"],
                checked_by="rule",
            )