feat(skills): SkillHarness 前置条件 + 风险守卫学习增强
- cli/skill.py: skill learn 子命令增强 - evolution/risk_guard_learner.py: 风险守卫学习改进 - memory/models.py: 记忆模型扩展 - skills/base.py + loader.py: SkillHarness 前置条件支持 - 对应测试更新
This commit is contained in:
parent
574db8458f
commit
20a4c55d5b
|
|
@ -2,12 +2,16 @@
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
from typing import Optional
|
from typing import TYPE_CHECKING, Optional
|
||||||
|
|
||||||
import typer
|
import typer
|
||||||
from rich import print as rprint
|
from rich import print as rprint
|
||||||
from rich.table import Table
|
from rich.table import Table
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from agentkit.evolution.experience_store import ExperienceStore
|
||||||
|
from agentkit.evolution.risk_guard_learner import RiskGuardLearner
|
||||||
|
|
||||||
skill_app = typer.Typer(name="skill", help="Skill management commands", no_args_is_help=True)
|
skill_app = typer.Typer(name="skill", help="Skill management commands", no_args_is_help=True)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -19,6 +23,7 @@ def list_skills(
|
||||||
if server_url:
|
if server_url:
|
||||||
# Remote mode: call API
|
# Remote mode: call API
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with httpx.Client(timeout=10.0) as client:
|
with httpx.Client(timeout=10.0) as client:
|
||||||
response = client.get(f"{server_url}/api/v1/skills")
|
response = client.get(f"{server_url}/api/v1/skills")
|
||||||
|
|
@ -35,7 +40,9 @@ def list_skills(
|
||||||
|
|
||||||
registry = SkillRegistry()
|
registry = SkillRegistry()
|
||||||
# Load skills from the default configs/skills/ directory if it exists
|
# Load skills from the default configs/skills/ directory if it exists
|
||||||
default_skills_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "configs", "skills")
|
default_skills_dir = os.path.join(
|
||||||
|
os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "configs", "skills"
|
||||||
|
)
|
||||||
if os.path.isdir(default_skills_dir):
|
if os.path.isdir(default_skills_dir):
|
||||||
loader = SkillLoader(registry, ToolRegistry())
|
loader = SkillLoader(registry, ToolRegistry())
|
||||||
loader.load_from_directory(default_skills_dir)
|
loader.load_from_directory(default_skills_dir)
|
||||||
|
|
@ -139,6 +146,7 @@ def skill_info(
|
||||||
"""Show skill details"""
|
"""Show skill details"""
|
||||||
if server_url:
|
if server_url:
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with httpx.Client(timeout=10.0) as client:
|
with httpx.Client(timeout=10.0) as client:
|
||||||
response = client.get(f"{server_url}/api/v1/skills/{name}")
|
response = client.get(f"{server_url}/api/v1/skills/{name}")
|
||||||
|
|
@ -149,6 +157,7 @@ def skill_info(
|
||||||
raise typer.Exit(code=1)
|
raise typer.Exit(code=1)
|
||||||
else:
|
else:
|
||||||
from agentkit.skills.registry import SkillRegistry
|
from agentkit.skills.registry import SkillRegistry
|
||||||
|
|
||||||
registry = SkillRegistry()
|
registry = SkillRegistry()
|
||||||
try:
|
try:
|
||||||
skill = registry.get(name)
|
skill = registry.get(name)
|
||||||
|
|
@ -189,63 +198,104 @@ def learn_risk_guards(
|
||||||
|
|
||||||
learner = _build_risk_guard_learner()
|
learner = _build_risk_guard_learner()
|
||||||
if learner is None:
|
if learner is None:
|
||||||
rprint("[red]Error: 无法构建 RiskGuardLearner——需要 PostgreSQL 与 LLM 配置。[/red]")
|
|
||||||
rprint("[dim]请确保 agentkit.yaml 中已配置数据库与 LLM provider。[/dim]")
|
|
||||||
raise typer.Exit(code=1)
|
raise typer.Exit(code=1)
|
||||||
|
|
||||||
suggestions = asyncio.run(learner.learn(skill_name=skill, top_k=top_k))
|
suggestions = asyncio.run(learner.learn(skill_name=skill, top_k=top_k))
|
||||||
_render_risk_guard_suggestions(suggestions)
|
_render_risk_guard_suggestions(suggestions)
|
||||||
|
|
||||||
|
|
||||||
def _build_risk_guard_learner():
|
def _build_risk_guard_learner() -> "RiskGuardLearner | None":
|
||||||
"""从本地配置构建 RiskGuardLearner,失败返回 None"""
|
"""从本地配置构建 RiskGuardLearner,失败返回 None 并打印真实错误"""
|
||||||
|
from agentkit.cli.chat import _build_gateway
|
||||||
|
from agentkit.evolution.risk_guard_learner import RiskGuardLearner
|
||||||
|
from agentkit.server.config import find_config_path, load_config_with_dotenv
|
||||||
|
|
||||||
|
config_path = find_config_path()
|
||||||
|
if config_path is None:
|
||||||
|
rprint("[red]Error: 未找到 agentkit.yaml 配置文件。[/red]")
|
||||||
|
rprint("[dim]请运行 `agentkit init` 生成配置,或使用 --config 指定路径。[/dim]")
|
||||||
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from agentkit.cli.chat import _build_gateway
|
|
||||||
from agentkit.evolution.risk_guard_learner import RiskGuardLearner
|
|
||||||
from agentkit.server.config import find_config_path, load_config_with_dotenv
|
|
||||||
|
|
||||||
config_path = find_config_path()
|
|
||||||
server_config = load_config_with_dotenv(config_path)
|
server_config = load_config_with_dotenv(config_path)
|
||||||
gateway = _build_gateway(server_config)
|
except Exception as e:
|
||||||
|
rprint(f"[red]Error: 加载配置失败: {e}[/red]")
|
||||||
|
return None
|
||||||
|
|
||||||
# ExperienceStore 需要 PostgreSQL + ORM model;尝试从 server app 获取
|
try:
|
||||||
experience_store = _try_get_experience_store(server_config)
|
gateway = _build_gateway(server_config)
|
||||||
if experience_store is None:
|
except Exception as e:
|
||||||
return None
|
rprint(f"[red]Error: 构建 LLM Gateway 失败: {e}[/red]")
|
||||||
return RiskGuardLearner(experience_store, gateway)
|
rprint("[dim]请检查 agentkit.yaml 中的 llm 配置(providers + api_key)。[/dim]")
|
||||||
|
return None
|
||||||
|
|
||||||
|
experience_store = _try_get_experience_store(server_config)
|
||||||
|
if experience_store is None:
|
||||||
|
rprint("[red]Error: 无法连接 PostgreSQL ExperienceStore。[/red]")
|
||||||
|
rprint(
|
||||||
|
"[dim]请在 agentkit.yaml 的 evolution.database_url 或 "
|
||||||
|
"memory.episodic.database_url 中配置 PostgreSQL 连接串,"
|
||||||
|
"或设置 DATABASE_URL 环境变量。[/dim]"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
return RiskGuardLearner(experience_store, gateway)
|
||||||
|
|
||||||
|
|
||||||
|
def _try_get_experience_store(server_config) -> "ExperienceStore | None":
|
||||||
|
"""尝试从 server_config 构建 PostgreSQL ExperienceStore,不可用时返回 None
|
||||||
|
|
||||||
|
查找 database_url 的优先级:
|
||||||
|
1. server_config.evolution.database_url
|
||||||
|
2. server_config.memory.episodic.database_url
|
||||||
|
3. DATABASE_URL 环境变量
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
|
||||||
|
database_url: str | None = None
|
||||||
|
|
||||||
|
# 1. evolution config
|
||||||
|
evo_conf = getattr(server_config, "evolution", None) or {}
|
||||||
|
database_url = evo_conf.get("database_url") if isinstance(evo_conf, dict) else None
|
||||||
|
|
||||||
|
# 2. episodic memory config
|
||||||
|
if not database_url:
|
||||||
|
epi_conf = (getattr(server_config, "memory", None) or {}).get("episodic", {})
|
||||||
|
database_url = epi_conf.get("database_url") if isinstance(epi_conf, dict) else None
|
||||||
|
|
||||||
|
# 3. env var
|
||||||
|
if not database_url:
|
||||||
|
database_url = os.environ.get("DATABASE_URL")
|
||||||
|
|
||||||
|
if not database_url:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
from agentkit.evolution.experience_store import ExperienceStore
|
||||||
|
from agentkit.memory.models import ExperienceModel, create_experience_session_factory
|
||||||
|
|
||||||
|
session_factory = create_experience_session_factory(database_url)
|
||||||
|
return ExperienceStore(
|
||||||
|
session_factory=session_factory,
|
||||||
|
experience_model=ExperienceModel,
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
logging.getLogger(__name__).warning(f"Failed to build RiskGuardLearner: {e}")
|
logging.getLogger(__name__).warning(f"Failed to create PostgreSQL ExperienceStore: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _try_get_experience_store(_server_config):
|
def _render_risk_guard_suggestions(suggestions: list) -> None:
|
||||||
"""尝试构建 ExperienceStore,PostgreSQL 不可用时返回 None
|
|
||||||
|
|
||||||
ponytail: 当前 codebase 未提供 PostgreSQL ExperienceStore 的 CLI 构建路径
|
|
||||||
(无 ORM model + session factory 的 CLI helper)。回退到 InMemoryExperienceStore,
|
|
||||||
它在无数据时返回空列表——命令会提示"未学习到建议"。
|
|
||||||
升级路径:未来接入 PostgreSQL 后替换为真实 store。
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
from agentkit.evolution.experience_store import InMemoryExperienceStore
|
|
||||||
|
|
||||||
return InMemoryExperienceStore()
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _render_risk_guard_suggestions(suggestions) -> None:
|
|
||||||
"""渲染 RiskGuardSuggestion 列表到终端"""
|
"""渲染 RiskGuardSuggestion 列表到终端"""
|
||||||
rprint(
|
|
||||||
"[bold yellow]⚠ 以下为自动生成的风险守卫建议,"
|
|
||||||
"必须人工审查后手动编辑 YAML 应用,不会自动生效。[/bold yellow]\n"
|
|
||||||
)
|
|
||||||
if not suggestions:
|
if not suggestions:
|
||||||
rprint("[dim]未从失败轨迹中学习到风险守卫建议[/dim]")
|
rprint("[dim]未从失败轨迹中学习到风险守卫建议[/dim]")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
rprint(
|
||||||
|
"[bold yellow]⚠ 以下为自动生成的风险守卫建议,"
|
||||||
|
"必须人工审查后手动编辑 YAML 应用,不会自动生效。[/bold yellow]\n"
|
||||||
|
)
|
||||||
table = Table(title="Risk Guard Suggestions (待人工审查)")
|
table = Table(title="Risk Guard Suggestions (待人工审查)")
|
||||||
table.add_column("Skill", style="cyan")
|
table.add_column("Skill", style="cyan")
|
||||||
table.add_column("Precondition")
|
table.add_column("Precondition")
|
||||||
|
|
|
||||||
|
|
@ -93,7 +93,11 @@ class RiskGuardLearner:
|
||||||
source_ids = [e.experience_id for e in failures if e.experience_id]
|
source_ids = [e.experience_id for e in failures if e.experience_id]
|
||||||
|
|
||||||
# 2. 构建 LLM prompt
|
# 2. 构建 LLM prompt
|
||||||
prompt = self._build_prompt(failures)
|
try:
|
||||||
|
prompt = self._build_prompt(failures)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"RiskGuardLearner: failed to build prompt: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
# 3. 调用 LLM
|
# 3. 调用 LLM
|
||||||
system_message = (
|
system_message = (
|
||||||
|
|
@ -118,7 +122,11 @@ class RiskGuardLearner:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# 4. 解析响应
|
# 4. 解析响应
|
||||||
return self._parse_response(response.content, failures, source_ids)
|
try:
|
||||||
|
return self._parse_response(response.content, source_ids)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"RiskGuardLearner: failed to parse response: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
def _build_prompt(self, failures: list[TaskExperience]) -> str:
|
def _build_prompt(self, failures: list[TaskExperience]) -> str:
|
||||||
"""构建 LLM 提示词"""
|
"""构建 LLM 提示词"""
|
||||||
|
|
@ -132,9 +140,15 @@ class RiskGuardLearner:
|
||||||
lines.append(f"- skill (task_type): {self._sanitize(exp.task_type)}")
|
lines.append(f"- skill (task_type): {self._sanitize(exp.task_type)}")
|
||||||
lines.append(f"- goal: {self._sanitize(exp.goal)}")
|
lines.append(f"- goal: {self._sanitize(exp.goal)}")
|
||||||
lines.append(f"- steps_summary: {self._sanitize(exp.steps_summary)}")
|
lines.append(f"- steps_summary: {self._sanitize(exp.steps_summary)}")
|
||||||
reasons = "; ".join(exp.failure_reasons) if exp.failure_reasons else "(none)"
|
reasons = (
|
||||||
|
"; ".join(str(r) for r in exp.failure_reasons) if exp.failure_reasons else "(none)"
|
||||||
|
)
|
||||||
lines.append(f"- failure_reasons: {self._sanitize(reasons)}")
|
lines.append(f"- failure_reasons: {self._sanitize(reasons)}")
|
||||||
tips = "; ".join(exp.optimization_tips) if exp.optimization_tips else "(none)"
|
tips = (
|
||||||
|
"; ".join(str(t) for t in exp.optimization_tips)
|
||||||
|
if exp.optimization_tips
|
||||||
|
else "(none)"
|
||||||
|
)
|
||||||
lines.append(f"- optimization_tips: {self._sanitize(tips)}")
|
lines.append(f"- optimization_tips: {self._sanitize(tips)}")
|
||||||
lines.append("")
|
lines.append("")
|
||||||
|
|
||||||
|
|
@ -149,7 +163,6 @@ class RiskGuardLearner:
|
||||||
def _parse_response(
|
def _parse_response(
|
||||||
self,
|
self,
|
||||||
content: str,
|
content: str,
|
||||||
failures: list[TaskExperience],
|
|
||||||
source_ids: list[str],
|
source_ids: list[str],
|
||||||
) -> list[RiskGuardSuggestion]:
|
) -> list[RiskGuardSuggestion]:
|
||||||
"""解析 LLM 响应为 RiskGuardSuggestion 列表"""
|
"""解析 LLM 响应为 RiskGuardSuggestion 列表"""
|
||||||
|
|
@ -214,7 +227,7 @@ class RiskGuardLearner:
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _sanitize(cls, value: Any, max_length: int = _MAX_FIELD_LENGTH) -> str:
|
def _sanitize(cls, value: Any, max_length: int = _MAX_FIELD_LENGTH) -> str:
|
||||||
""" sanitize a value for safe interpolation into LLM prompts."""
|
"""sanitize a value for safe interpolation into LLM prompts."""
|
||||||
text = str(value)
|
text = str(value)
|
||||||
text = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]", "", text)
|
text = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]", "", text)
|
||||||
if len(text) > max_length:
|
if len(text) > max_length:
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
from sqlalchemy import Column, DateTime, Float, String, Text, create_engine
|
from sqlalchemy import Column, DateTime, Float, String, Text
|
||||||
from sqlalchemy.dialects.postgresql import JSONB
|
from sqlalchemy.dialects.postgresql import JSONB
|
||||||
from sqlalchemy.orm import declarative_base, sessionmaker
|
from sqlalchemy.orm import declarative_base, sessionmaker
|
||||||
|
|
||||||
|
|
@ -27,11 +27,11 @@ class EpisodeModel(Base):
|
||||||
outcome = Column(String, default="success") # "success", "failure", "partial"
|
outcome = Column(String, default="success") # "success", "failure", "partial"
|
||||||
quality_score = Column(Float, default=0.5)
|
quality_score = Column(Float, default=0.5)
|
||||||
reflection = Column(Text, default="")
|
reflection = Column(Text, default="")
|
||||||
embedding = Column(Text, nullable=True) # JSON-encoded float list; pgvector if extension available
|
embedding = Column(
|
||||||
|
Text, nullable=True
|
||||||
|
) # JSON-encoded float list; pgvector if extension available
|
||||||
metadata_ = Column("metadata", JSONB, nullable=True) # Additional metadata
|
metadata_ = Column("metadata", JSONB, nullable=True) # Additional metadata
|
||||||
created_at = Column(
|
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), index=True)
|
||||||
DateTime, default=lambda: datetime.now(timezone.utc), index=True
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def create_episodic_session_factory(database_url: str):
|
def create_episodic_session_factory(database_url: str):
|
||||||
|
|
@ -51,6 +51,45 @@ def create_episodic_session_factory(database_url: str):
|
||||||
return async_session
|
return async_session
|
||||||
|
|
||||||
|
|
||||||
|
class ExperienceModel(Base):
|
||||||
|
"""Task experience ORM model for RiskGuardLearner / ExperienceStore.
|
||||||
|
|
||||||
|
Stores task execution outcomes (success/failure/partial) with optional
|
||||||
|
pgvector embeddings for semantic similarity search.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__tablename__ = "task_experiences"
|
||||||
|
|
||||||
|
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||||
|
task_type = Column(String, index=True)
|
||||||
|
goal = Column(Text, default="")
|
||||||
|
steps_summary = Column(Text, default="")
|
||||||
|
outcome = Column(String, default="success") # "success", "failure", "partial"
|
||||||
|
duration_seconds = Column(Float, default=0.0)
|
||||||
|
success_rate = Column(Float, default=1.0)
|
||||||
|
failure_reasons = Column(JSONB, default=list) # list[str]
|
||||||
|
optimization_tips = Column(JSONB, default=list) # list[str]
|
||||||
|
embedding = Column(Text, nullable=True) # JSON-encoded float list
|
||||||
|
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), index=True)
|
||||||
|
|
||||||
|
|
||||||
|
def create_experience_session_factory(database_url: str):
|
||||||
|
"""Create an async session factory for task experiences.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
database_url: PostgreSQL connection string,
|
||||||
|
e.g. "postgresql+asyncpg://user:pass@localhost/dbname"
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
async_sessionmaker bound to the engine.
|
||||||
|
"""
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
|
||||||
|
|
||||||
|
engine = create_async_engine(database_url, echo=False)
|
||||||
|
async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
||||||
|
return async_session
|
||||||
|
|
||||||
|
|
||||||
async def ensure_episodic_table(database_url: str) -> None:
|
async def ensure_episodic_table(database_url: str) -> None:
|
||||||
"""Create the episodic_memories table if it does not exist.
|
"""Create the episodic_memories table if it does not exist.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -126,6 +126,12 @@ class SkillConfig(AgentConfig):
|
||||||
# v6: ReWOO fallback 策略(None 时 ReWOOEngine 用默认值)
|
# v6: ReWOO fallback 策略(None 时 ReWOOEngine 用默认值)
|
||||||
self.fallback_strategies = fallback_strategies
|
self.fallback_strategies = fallback_strategies
|
||||||
# v7: 激活前置条件(软检查,由 build_skill_system_prompt 注入)+ 来源标记
|
# v7: 激活前置条件(软检查,由 build_skill_system_prompt 注入)+ 来源标记
|
||||||
|
if preconditions is not None and not isinstance(preconditions, list):
|
||||||
|
raise ConfigValidationError(
|
||||||
|
agent_name=name,
|
||||||
|
key="preconditions",
|
||||||
|
reason=f"preconditions must be list[str] or None, got {type(preconditions).__name__}",
|
||||||
|
)
|
||||||
self.preconditions = preconditions
|
self.preconditions = preconditions
|
||||||
self.provenance = provenance
|
self.provenance = provenance
|
||||||
self._validate_v2()
|
self._validate_v2()
|
||||||
|
|
@ -152,10 +158,7 @@ class SkillConfig(AgentConfig):
|
||||||
raise ConfigValidationError(
|
raise ConfigValidationError(
|
||||||
agent_name=self.name,
|
agent_name=self.name,
|
||||||
key="fallback_strategies",
|
key="fallback_strategies",
|
||||||
reason=(
|
reason=(f"Invalid fallback_strategies {invalid}, must be subset of {valid}"),
|
||||||
f"Invalid fallback_strategies {invalid}, "
|
|
||||||
f"must be subset of {valid}"
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
|
||||||
|
|
@ -17,9 +17,14 @@ logger = logging.getLogger(__name__)
|
||||||
SKILL_ENTRY_POINT_GROUP = "agentkit.skills"
|
SKILL_ENTRY_POINT_GROUP = "agentkit.skills"
|
||||||
|
|
||||||
# v7: 危险能力标签——entry_points 加载第三方 Skill 时命中则 logger.warning
|
# v7: 危险能力标签——entry_points 加载第三方 Skill 时命中则 logger.warning
|
||||||
|
# 同时检查 capabilities 声明和 tools 绑定,防止恶意 skill 隐瞒能力声明
|
||||||
_DANGEROUS_CAPABILITIES = frozenset(
|
_DANGEROUS_CAPABILITIES = frozenset(
|
||||||
{"terminal", "code_execution", "file_write", "shell", "system_admin"}
|
{"terminal", "code_execution", "file_write", "shell", "system_admin"}
|
||||||
)
|
)
|
||||||
|
# tools 列表中可能出现的危险工具名(与 _DANGEROUS_CAPABILITIES 部分重叠)
|
||||||
|
_DANGEROUS_TOOL_NAMES = frozenset(
|
||||||
|
{"shell", "terminal", "code_execution", "file_write", "file_system", "subprocess"}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SkillLoader:
|
class SkillLoader:
|
||||||
|
|
@ -95,13 +100,18 @@ class SkillLoader:
|
||||||
|
|
||||||
frontmatter, sections, body = SkillMdParser.parse(path)
|
frontmatter, sections, body = SkillMdParser.parse(path)
|
||||||
config = SkillMdParser.to_skill_config(
|
config = SkillMdParser.to_skill_config(
|
||||||
frontmatter, sections, path, disclosure_level=disclosure_level,
|
frontmatter,
|
||||||
|
sections,
|
||||||
|
path,
|
||||||
|
disclosure_level=disclosure_level,
|
||||||
)
|
)
|
||||||
config.provenance = f"skill_md:{path}"
|
config.provenance = f"skill_md:{path}"
|
||||||
tools = self._bind_tools(config)
|
tools = self._bind_tools(config)
|
||||||
skill = Skill(config, tools=tools)
|
skill = Skill(config, tools=tools)
|
||||||
self._skill_registry.register(skill)
|
self._skill_registry.register(skill)
|
||||||
logger.info(f"Loaded skill '{skill.name}' from SKILL.md '{path}' (level={disclosure_level})")
|
logger.info(
|
||||||
|
f"Loaded skill '{skill.name}' from SKILL.md '{path}' (level={disclosure_level})"
|
||||||
|
)
|
||||||
return skill
|
return skill
|
||||||
|
|
||||||
def load_from_entry_points(self, group: str | None = None) -> list[Skill]:
|
def load_from_entry_points(self, group: str | None = None) -> list[Skill]:
|
||||||
|
|
@ -128,9 +138,11 @@ class SkillLoader:
|
||||||
# Python 3.12+ 使用 importlib.metadata
|
# Python 3.12+ 使用 importlib.metadata
|
||||||
if sys.version_info >= (3, 12):
|
if sys.version_info >= (3, 12):
|
||||||
from importlib.metadata import entry_points as _entry_points
|
from importlib.metadata import entry_points as _entry_points
|
||||||
|
|
||||||
eps = _entry_points(group=group_name)
|
eps = _entry_points(group=group_name)
|
||||||
else:
|
else:
|
||||||
from importlib.metadata import entry_points as _entry_points
|
from importlib.metadata import entry_points as _entry_points
|
||||||
|
|
||||||
eps = _entry_points().get(group_name, [])
|
eps = _entry_points().get(group_name, [])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Failed to discover entry_points for group '{group_name}': {e}")
|
logger.warning(f"Failed to discover entry_points for group '{group_name}': {e}")
|
||||||
|
|
@ -159,28 +171,29 @@ class SkillLoader:
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# v7: 记录 provenance + 危险能力告警
|
# v7: 记录 provenance + 危险能力告警(同时检查 capabilities 和 tools)
|
||||||
skill.config.provenance = f"entry_point:{ep.name}"
|
skill.config.provenance = f"entry_point:{ep.name}"
|
||||||
dangerous = [
|
dangerous_caps = [
|
||||||
cap.tag
|
cap.tag
|
||||||
for cap in (skill.config.capabilities or [])
|
for cap in (skill.config.capabilities or [])
|
||||||
if cap.tag in _DANGEROUS_CAPABILITIES
|
if cap.tag in _DANGEROUS_CAPABILITIES
|
||||||
]
|
]
|
||||||
|
dangerous_tools = [
|
||||||
|
t for t in (skill.config.tools or []) if t in _DANGEROUS_TOOL_NAMES
|
||||||
|
]
|
||||||
|
dangerous = dangerous_caps + dangerous_tools
|
||||||
if dangerous:
|
if dangerous:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Skill '{skill.name}' from entry_point '{ep.name}' "
|
f"Skill '{skill.name}' from entry_point '{ep.name}' "
|
||||||
f"declares dangerous capabilities: {dangerous}"
|
f"declares dangerous capabilities/tools: {dangerous}"
|
||||||
)
|
)
|
||||||
self._skill_registry.register(skill)
|
self._skill_registry.register(skill)
|
||||||
skills.append(skill)
|
skills.append(skill)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Loaded skill '{skill.name}' v{skill.version} "
|
f"Loaded skill '{skill.name}' v{skill.version} from entry_point '{ep.name}'"
|
||||||
f"from entry_point '{ep.name}'"
|
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(
|
logger.warning(f"Failed to load skill from entry_point '{ep.name}': {e}")
|
||||||
f"Failed to load skill from entry_point '{ep.name}': {e}"
|
|
||||||
)
|
|
||||||
|
|
||||||
return skills
|
return skills
|
||||||
|
|
||||||
|
|
@ -196,7 +209,5 @@ class SkillLoader:
|
||||||
tools.append(tool)
|
tools.append(tool)
|
||||||
logger.info(f"Bound tool '{tool_name}' to skill '{config.name}'")
|
logger.info(f"Bound tool '{tool_name}' to skill '{config.name}'")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(
|
logger.warning(f"Failed to bind tool '{tool_name}' to skill '{config.name}': {e}")
|
||||||
f"Failed to bind tool '{tool_name}' to skill '{config.name}': {e}"
|
|
||||||
)
|
|
||||||
return tools
|
return tools
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@
|
||||||
|
|
||||||
from unittest.mock import AsyncMock, MagicMock, patch
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
import pytest
|
|
||||||
from typer.testing import CliRunner
|
from typer.testing import CliRunner
|
||||||
|
|
||||||
from agentkit.evolution.risk_guard_learner import RiskGuardSuggestion
|
from agentkit.evolution.risk_guard_learner import RiskGuardSuggestion
|
||||||
|
|
@ -10,7 +9,9 @@ from agentkit.evolution.risk_guard_learner import RiskGuardSuggestion
|
||||||
runner = CliRunner()
|
runner = CliRunner()
|
||||||
|
|
||||||
|
|
||||||
def _make_suggestion(skill_name="code_reviewer", precondition="需要代码输入", confidence=0.8, reason="避免空输入"):
|
def _make_suggestion(
|
||||||
|
skill_name="code_reviewer", precondition="需要代码输入", confidence=0.8, reason="避免空输入"
|
||||||
|
):
|
||||||
return RiskGuardSuggestion(
|
return RiskGuardSuggestion(
|
||||||
skill_name=skill_name,
|
skill_name=skill_name,
|
||||||
precondition=precondition,
|
precondition=precondition,
|
||||||
|
|
@ -26,7 +27,9 @@ class TestLearnRiskGuardsCommand:
|
||||||
from agentkit.cli.main import app
|
from agentkit.cli.main import app
|
||||||
|
|
||||||
mock_learner = MagicMock()
|
mock_learner = MagicMock()
|
||||||
mock_learner.learn = AsyncMock(return_value=[_make_suggestion(), _make_suggestion("monitor", "需要网络", 0.6)])
|
mock_learner.learn = AsyncMock(
|
||||||
|
return_value=[_make_suggestion(), _make_suggestion("monitor", "需要网络", 0.6)]
|
||||||
|
)
|
||||||
with patch("agentkit.cli.skill._build_risk_guard_learner", return_value=mock_learner):
|
with patch("agentkit.cli.skill._build_risk_guard_learner", return_value=mock_learner):
|
||||||
result = runner.invoke(app, ["skill", "learn-risk-guards"])
|
result = runner.invoke(app, ["skill", "learn-risk-guards"])
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
|
|
@ -47,13 +50,12 @@ class TestLearnRiskGuardsCommand:
|
||||||
assert "未从失败轨迹中学习到风险守卫建议" in result.stdout
|
assert "未从失败轨迹中学习到风险守卫建议" in result.stdout
|
||||||
|
|
||||||
def test_learner_build_failure_exits_nonzero(self):
|
def test_learner_build_failure_exits_nonzero(self):
|
||||||
"""_build_risk_guard_learner 返回 None → 错误信息 + 非零退出"""
|
"""_build_risk_guard_learner 返回 None → 非零退出码"""
|
||||||
from agentkit.cli.main import app
|
from agentkit.cli.main import app
|
||||||
|
|
||||||
with patch("agentkit.cli.skill._build_risk_guard_learner", return_value=None):
|
with patch("agentkit.cli.skill._build_risk_guard_learner", return_value=None):
|
||||||
result = runner.invoke(app, ["skill", "learn-risk-guards"])
|
result = runner.invoke(app, ["skill", "learn-risk-guards"])
|
||||||
assert result.exit_code == 1
|
assert result.exit_code == 1
|
||||||
assert "无法构建" in result.stdout or "Error" in result.stdout
|
|
||||||
|
|
||||||
def test_skill_option_passed_to_learn(self):
|
def test_skill_option_passed_to_learn(self):
|
||||||
"""--skill 参数透传给 learn(skill_name=...)"""
|
"""--skill 参数透传给 learn(skill_name=...)"""
|
||||||
|
|
@ -80,5 +82,75 @@ class TestLearnRiskGuardsCommand:
|
||||||
"""--server-url 远程模式暂不支持"""
|
"""--server-url 远程模式暂不支持"""
|
||||||
from agentkit.cli.main import app
|
from agentkit.cli.main import app
|
||||||
|
|
||||||
result = runner.invoke(app, ["skill", "learn-risk-guards", "--server-url", "http://localhost:8001"])
|
result = runner.invoke(
|
||||||
|
app, ["skill", "learn-risk-guards", "--server-url", "http://localhost:8001"]
|
||||||
|
)
|
||||||
assert result.exit_code == 1
|
assert result.exit_code == 1
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuildRiskGuardLearnerErrorPaths:
|
||||||
|
"""测试 _build_risk_guard_learner 的真实错误路径(不 mock 函数本身)"""
|
||||||
|
|
||||||
|
def test_no_config_file_returns_none(self):
|
||||||
|
"""find_config_path 返回 None → 打印错误 + 返回 None"""
|
||||||
|
from agentkit.cli import skill as skill_module
|
||||||
|
|
||||||
|
with patch("agentkit.server.config.find_config_path", return_value=None):
|
||||||
|
result = skill_module._build_risk_guard_learner()
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_no_database_url_returns_none(self):
|
||||||
|
"""server_config 无 database_url → 返回 None"""
|
||||||
|
from agentkit.cli import skill as skill_module
|
||||||
|
|
||||||
|
mock_config = MagicMock()
|
||||||
|
mock_config.evolution = {}
|
||||||
|
mock_config.memory = {}
|
||||||
|
with (
|
||||||
|
patch("agentkit.server.config.find_config_path", return_value="/fake/path.yaml"),
|
||||||
|
patch("agentkit.server.config.load_config_with_dotenv", return_value=mock_config),
|
||||||
|
patch("agentkit.cli.chat._build_gateway", return_value=MagicMock()),
|
||||||
|
patch.dict("os.environ", {}, clear=False),
|
||||||
|
):
|
||||||
|
# Ensure DATABASE_URL is not set
|
||||||
|
import os
|
||||||
|
|
||||||
|
old = os.environ.pop("DATABASE_URL", None)
|
||||||
|
try:
|
||||||
|
result = skill_module._build_risk_guard_learner()
|
||||||
|
finally:
|
||||||
|
if old is not None:
|
||||||
|
os.environ["DATABASE_URL"] = old
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_try_get_experience_store_no_database_url(self):
|
||||||
|
"""_try_get_experience_store 无 database_url → 返回 None"""
|
||||||
|
from agentkit.cli import skill as skill_module
|
||||||
|
|
||||||
|
mock_config = MagicMock()
|
||||||
|
mock_config.evolution = {}
|
||||||
|
mock_config.memory = {"episodic": {}}
|
||||||
|
with patch.dict("os.environ", {}, clear=False):
|
||||||
|
import os
|
||||||
|
|
||||||
|
old = os.environ.pop("DATABASE_URL", None)
|
||||||
|
try:
|
||||||
|
result = skill_module._try_get_experience_store(mock_config)
|
||||||
|
finally:
|
||||||
|
if old is not None:
|
||||||
|
os.environ["DATABASE_URL"] = old
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_try_get_experience_store_with_database_url(self):
|
||||||
|
"""_try_get_experience_store 有 database_url → 构建 ExperienceStore"""
|
||||||
|
from agentkit.cli import skill as skill_module
|
||||||
|
|
||||||
|
mock_config = MagicMock()
|
||||||
|
mock_config.evolution = {"database_url": "postgresql+asyncpg://localhost/test"}
|
||||||
|
mock_config.memory = {}
|
||||||
|
with patch(
|
||||||
|
"agentkit.memory.models.create_experience_session_factory",
|
||||||
|
return_value=MagicMock(),
|
||||||
|
):
|
||||||
|
result = skill_module._try_get_experience_store(mock_config)
|
||||||
|
assert result is not None
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ from unittest.mock import AsyncMock
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from agentkit.evolution.experience_schema import TaskExperience
|
from agentkit.evolution.experience_schema import TaskExperience
|
||||||
from agentkit.evolution.risk_guard_learner import RiskGuardLearner, RiskGuardSuggestion
|
from agentkit.evolution.risk_guard_learner import RiskGuardLearner
|
||||||
|
|
||||||
|
|
||||||
def _make_experience(
|
def _make_experience(
|
||||||
|
|
@ -45,20 +45,22 @@ class TestRiskGuardLearner:
|
||||||
]
|
]
|
||||||
llm = AsyncMock()
|
llm = AsyncMock()
|
||||||
llm.chat.return_value = _make_llm_response(
|
llm.chat.return_value = _make_llm_response(
|
||||||
json.dumps([
|
json.dumps(
|
||||||
{
|
[
|
||||||
"skill_name": "code_reviewer",
|
{
|
||||||
"precondition": "输入必须包含待审查的代码片段",
|
"skill_name": "code_reviewer",
|
||||||
"reason": "多次因输入为空导致审查失败",
|
"precondition": "输入必须包含待审查的代码片段",
|
||||||
"confidence": 0.85,
|
"reason": "多次因输入为空导致审查失败",
|
||||||
},
|
"confidence": 0.85,
|
||||||
{
|
},
|
||||||
"skill_name": "code_reviewer",
|
{
|
||||||
"precondition": "代码片段长度 >= 10 字符",
|
"skill_name": "code_reviewer",
|
||||||
"reason": "过短输入无法有效审查",
|
"precondition": "代码片段长度 >= 10 字符",
|
||||||
"confidence": 0.6,
|
"reason": "过短输入无法有效审查",
|
||||||
},
|
"confidence": 0.6,
|
||||||
])
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
)
|
)
|
||||||
learner = RiskGuardLearner(store, llm)
|
learner = RiskGuardLearner(store, llm)
|
||||||
suggestions = await learner.learn()
|
suggestions = await learner.learn()
|
||||||
|
|
@ -77,9 +79,7 @@ class TestRiskGuardLearner:
|
||||||
llm.chat.return_value = _make_llm_response("[]")
|
llm.chat.return_value = _make_llm_response("[]")
|
||||||
learner = RiskGuardLearner(store, llm)
|
learner = RiskGuardLearner(store, llm)
|
||||||
await learner.learn(skill_name="code_reviewer")
|
await learner.learn(skill_name="code_reviewer")
|
||||||
store.search.assert_called_once_with(
|
store.search.assert_called_once_with(query="failure", top_k=20, task_type="code_reviewer")
|
||||||
query="failure", top_k=20, task_type="code_reviewer"
|
|
||||||
)
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_learn_llm_exception_returns_empty(self):
|
async def test_learn_llm_exception_returns_empty(self):
|
||||||
|
|
@ -119,21 +119,20 @@ class TestRiskGuardLearner:
|
||||||
"""只保留 outcome == 'failure' 的轨迹"""
|
"""只保留 outcome == 'failure' 的轨迹"""
|
||||||
store = AsyncMock()
|
store = AsyncMock()
|
||||||
store.search.return_value = [
|
store.search.return_value = [
|
||||||
_make_experience("e1", outcome="failure"),
|
_make_experience("e1", goal="failure-goal", outcome="failure"),
|
||||||
_make_experience("e2", outcome="success"),
|
_make_experience("e2", goal="success-goal", outcome="success"),
|
||||||
_make_experience("e3", outcome="partial"),
|
_make_experience("e3", goal="partial-goal", outcome="partial"),
|
||||||
]
|
]
|
||||||
llm = AsyncMock()
|
llm = AsyncMock()
|
||||||
llm.chat.return_value = _make_llm_response("[]")
|
llm.chat.return_value = _make_llm_response("[]")
|
||||||
learner = RiskGuardLearner(store, llm)
|
learner = RiskGuardLearner(store, llm)
|
||||||
await learner.learn()
|
await learner.learn()
|
||||||
# 只有 e1 是 failure,source_experience_ids 应只含 e1
|
# 只有 e1 是 failure,prompt 中应含 failure-goal,不含 success/partial 的 goal
|
||||||
# 通过检查 prompt 中是否只含 e1 来验证
|
|
||||||
call_args = llm.chat.call_args
|
call_args = llm.chat.call_args
|
||||||
prompt = call_args.kwargs["messages"][1]["content"]
|
prompt = call_args.kwargs["messages"][1]["content"]
|
||||||
assert "e1" in prompt or "review code" in prompt
|
assert "failure-goal" in prompt
|
||||||
# success/partial 的 goal 不应出现(它们 goal 都是 "review code",改用 task_type 区分)
|
assert "success-goal" not in prompt
|
||||||
# 更精确:检查 prompt 中 failure 轨迹数
|
assert "partial-goal" not in prompt
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_confidence_clamped(self):
|
async def test_confidence_clamped(self):
|
||||||
|
|
@ -142,11 +141,13 @@ class TestRiskGuardLearner:
|
||||||
store.search.return_value = [_make_experience("e1")]
|
store.search.return_value = [_make_experience("e1")]
|
||||||
llm = AsyncMock()
|
llm = AsyncMock()
|
||||||
llm.chat.return_value = _make_llm_response(
|
llm.chat.return_value = _make_llm_response(
|
||||||
json.dumps([
|
json.dumps(
|
||||||
{"skill_name": "s", "precondition": "p1", "reason": "r", "confidence": 1.5},
|
[
|
||||||
{"skill_name": "s", "precondition": "p2", "reason": "r", "confidence": -0.3},
|
{"skill_name": "s", "precondition": "p1", "reason": "r", "confidence": 1.5},
|
||||||
{"skill_name": "s", "precondition": "p3", "reason": "r", "confidence": 0.5},
|
{"skill_name": "s", "precondition": "p2", "reason": "r", "confidence": -0.3},
|
||||||
])
|
{"skill_name": "s", "precondition": "p3", "reason": "r", "confidence": 0.5},
|
||||||
|
]
|
||||||
|
)
|
||||||
)
|
)
|
||||||
learner = RiskGuardLearner(store, llm)
|
learner = RiskGuardLearner(store, llm)
|
||||||
suggestions = await learner.learn()
|
suggestions = await learner.learn()
|
||||||
|
|
@ -176,11 +177,13 @@ class TestRiskGuardLearner:
|
||||||
store.search.return_value = [_make_experience("e1")]
|
store.search.return_value = [_make_experience("e1")]
|
||||||
llm = AsyncMock()
|
llm = AsyncMock()
|
||||||
llm.chat.return_value = _make_llm_response(
|
llm.chat.return_value = _make_llm_response(
|
||||||
json.dumps([
|
json.dumps(
|
||||||
{"skill_name": "s", "precondition": "", "reason": "r", "confidence": 0.5},
|
[
|
||||||
{"skill_name": "", "precondition": "p", "reason": "r", "confidence": 0.5},
|
{"skill_name": "s", "precondition": "", "reason": "r", "confidence": 0.5},
|
||||||
{"skill_name": "s", "precondition": "valid", "reason": "r", "confidence": 0.5},
|
{"skill_name": "", "precondition": "p", "reason": "r", "confidence": 0.5},
|
||||||
])
|
{"skill_name": "s", "precondition": "valid", "reason": "r", "confidence": 0.5},
|
||||||
|
]
|
||||||
|
)
|
||||||
)
|
)
|
||||||
learner = RiskGuardLearner(store, llm)
|
learner = RiskGuardLearner(store, llm)
|
||||||
suggestions = await learner.learn()
|
suggestions = await learner.learn()
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,8 @@
|
||||||
"""SkillConfig v7 preconditions + provenance 字段单元测试"""
|
"""SkillConfig v7 preconditions + provenance 字段单元测试"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from agentkit.core.exceptions import ConfigValidationError
|
||||||
from agentkit.skills.base import SkillConfig
|
from agentkit.skills.base import SkillConfig
|
||||||
|
|
||||||
# llm_generate 模式要求 prompt,所有构造提供最小 prompt
|
# llm_generate 模式要求 prompt,所有构造提供最小 prompt
|
||||||
|
|
@ -72,3 +75,25 @@ class TestSkillConfigPreconditions:
|
||||||
out = config.to_dict()
|
out = config.to_dict()
|
||||||
assert out["preconditions"] == ["条件1", "条件2"]
|
assert out["preconditions"] == ["条件1", "条件2"]
|
||||||
assert out["provenance"] == "skill_md:foo.md"
|
assert out["provenance"] == "skill_md:foo.md"
|
||||||
|
|
||||||
|
def test_preconditions_string_type_rejected(self):
|
||||||
|
"""preconditions 传字符串应抛 ConfigValidationError(防止逐字符迭代)"""
|
||||||
|
with pytest.raises(ConfigValidationError, match="preconditions"):
|
||||||
|
SkillConfig(
|
||||||
|
name="x",
|
||||||
|
agent_type="y",
|
||||||
|
task_mode="llm_generate",
|
||||||
|
prompt=_PROMPT,
|
||||||
|
preconditions="必须提供代码", # type: ignore[arg-type]
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_preconditions_dict_type_rejected(self):
|
||||||
|
"""preconditions 传 dict 应抛 ConfigValidationError"""
|
||||||
|
with pytest.raises(ConfigValidationError, match="preconditions"):
|
||||||
|
SkillConfig(
|
||||||
|
name="x",
|
||||||
|
agent_type="y",
|
||||||
|
task_mode="llm_generate",
|
||||||
|
prompt=_PROMPT,
|
||||||
|
preconditions={"key": "val"}, # type: ignore[arg-type]
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,6 @@ import os
|
||||||
import tempfile
|
import tempfile
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
import pytest
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from agentkit.skills.base import Skill, SkillConfig
|
from agentkit.skills.base import Skill, SkillConfig
|
||||||
|
|
@ -30,13 +29,14 @@ class _FakeEntryPoint:
|
||||||
return self._skill
|
return self._skill
|
||||||
|
|
||||||
|
|
||||||
def _make_skill(name: str = "ep_skill", capabilities=None) -> Skill:
|
def _make_skill(name: str = "ep_skill", capabilities=None, tools=None) -> Skill:
|
||||||
config = SkillConfig(
|
config = SkillConfig(
|
||||||
name=name,
|
name=name,
|
||||||
agent_type="test",
|
agent_type="test",
|
||||||
task_mode="llm_generate",
|
task_mode="llm_generate",
|
||||||
prompt={"identity": "test"},
|
prompt={"identity": "test"},
|
||||||
capabilities=capabilities,
|
capabilities=capabilities,
|
||||||
|
tools=tools,
|
||||||
)
|
)
|
||||||
return Skill(config)
|
return Skill(config)
|
||||||
|
|
||||||
|
|
@ -46,19 +46,23 @@ class TestSkillLoaderProvenance:
|
||||||
registry = SkillRegistry()
|
registry = SkillRegistry()
|
||||||
loader = SkillLoader(skill_registry=registry)
|
loader = SkillLoader(skill_registry=registry)
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
path = _write_yaml(tmpdir, "s.yaml", {
|
path = _write_yaml(
|
||||||
"name": "s",
|
tmpdir,
|
||||||
"agent_type": "t",
|
"s.yaml",
|
||||||
"task_mode": "llm_generate",
|
{
|
||||||
"prompt": {"identity": "x"},
|
"name": "s",
|
||||||
})
|
"agent_type": "t",
|
||||||
|
"task_mode": "llm_generate",
|
||||||
|
"prompt": {"identity": "x"},
|
||||||
|
},
|
||||||
|
)
|
||||||
skill = loader.load_from_file(path)
|
skill = loader.load_from_file(path)
|
||||||
assert skill.config.provenance == f"yaml:{path}"
|
assert skill.config.provenance == f"yaml:{path}"
|
||||||
|
|
||||||
def test_load_from_skill_md_sets_provenance(self):
|
def test_load_from_skill_md_sets_provenance(self):
|
||||||
registry = SkillRegistry()
|
registry = SkillRegistry()
|
||||||
loader = SkillLoader(skill_registry=registry)
|
loader = SkillLoader(skill_registry=registry)
|
||||||
skill_md = '''\
|
skill_md = """\
|
||||||
---
|
---
|
||||||
name: md-skill
|
name: md-skill
|
||||||
description: "test"
|
description: "test"
|
||||||
|
|
@ -77,7 +81,7 @@ execution_mode: react
|
||||||
|
|
||||||
# Verification
|
# Verification
|
||||||
- ok
|
- ok
|
||||||
'''
|
"""
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
path = os.path.join(tmpdir, "SKILL.md")
|
path = os.path.join(tmpdir, "SKILL.md")
|
||||||
with open(path, "w", encoding="utf-8") as f:
|
with open(path, "w", encoding="utf-8") as f:
|
||||||
|
|
@ -113,7 +117,28 @@ execution_mode: react
|
||||||
assert skills[0].config.provenance == "entry_point:dangerous_ep"
|
assert skills[0].config.provenance == "entry_point:dangerous_ep"
|
||||||
# warning 包含 skill 名与危险能力
|
# warning 包含 skill 名与危险能力
|
||||||
warnings = [r for r in caplog.records if r.levelno == logging.WARNING]
|
warnings = [r for r in caplog.records if r.levelno == logging.WARNING]
|
||||||
assert any("dangerous_skill" in r.getMessage() and "shell" in r.getMessage() for r in warnings)
|
assert any(
|
||||||
|
"dangerous_skill" in r.getMessage() and "shell" in r.getMessage() for r in warnings
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_entry_points_dangerous_tools_warning(self, caplog):
|
||||||
|
"""entry_points 加载绑定 shell 工具但未声明 capabilities 的 Skill 时触发 warning"""
|
||||||
|
import logging
|
||||||
|
|
||||||
|
registry = SkillRegistry()
|
||||||
|
loader = SkillLoader(skill_registry=registry)
|
||||||
|
# 有危险 tools 但无 capabilities 声明——旧逻辑会漏检
|
||||||
|
dangerous_skill = _make_skill("stealthy_skill", capabilities=None, tools=["shell"])
|
||||||
|
fake_ep = _FakeEntryPoint("stealthy_ep", dangerous_skill)
|
||||||
|
with patch("agentkit.skills.loader.sys.version_info", (3, 12, 0)):
|
||||||
|
with patch("importlib.metadata.entry_points", return_value=[fake_ep]):
|
||||||
|
with caplog.at_level(logging.WARNING):
|
||||||
|
skills = loader.load_from_entry_points()
|
||||||
|
assert len(skills) == 1
|
||||||
|
warnings = [r for r in caplog.records if r.levelno == logging.WARNING]
|
||||||
|
assert any(
|
||||||
|
"stealthy_skill" in r.getMessage() and "shell" in r.getMessage() for r in warnings
|
||||||
|
)
|
||||||
|
|
||||||
def test_entry_points_no_capabilities_no_warning(self, caplog):
|
def test_entry_points_no_capabilities_no_warning(self, caplog):
|
||||||
import logging
|
import logging
|
||||||
|
|
@ -129,7 +154,8 @@ execution_mode: react
|
||||||
assert len(skills) == 1
|
assert len(skills) == 1
|
||||||
# 不应有危险能力 warning(只可能有其他 warning)
|
# 不应有危险能力 warning(只可能有其他 warning)
|
||||||
dangerous_warnings = [
|
dangerous_warnings = [
|
||||||
r for r in caplog.records
|
r
|
||||||
|
for r in caplog.records
|
||||||
if r.levelno == logging.WARNING and "dangerous capabilities" in r.getMessage()
|
if r.levelno == logging.WARNING and "dangerous capabilities" in r.getMessage()
|
||||||
]
|
]
|
||||||
assert dangerous_warnings == []
|
assert dangerous_warnings == []
|
||||||
|
|
@ -139,13 +165,17 @@ execution_mode: react
|
||||||
registry = SkillRegistry()
|
registry = SkillRegistry()
|
||||||
loader = SkillLoader(skill_registry=registry)
|
loader = SkillLoader(skill_registry=registry)
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
path = _write_yaml(tmpdir, "s.yaml", {
|
path = _write_yaml(
|
||||||
"name": "s",
|
tmpdir,
|
||||||
"agent_type": "t",
|
"s.yaml",
|
||||||
"task_mode": "llm_generate",
|
{
|
||||||
"prompt": {"identity": "x"},
|
"name": "s",
|
||||||
"provenance": "user_supplied:should_be_overridden",
|
"agent_type": "t",
|
||||||
})
|
"task_mode": "llm_generate",
|
||||||
|
"prompt": {"identity": "x"},
|
||||||
|
"provenance": "user_supplied:should_be_overridden",
|
||||||
|
},
|
||||||
|
)
|
||||||
skill = loader.load_from_file(path)
|
skill = loader.load_from_file(path)
|
||||||
assert skill.config.provenance == f"yaml:{path}"
|
assert skill.config.provenance == f"yaml:{path}"
|
||||||
assert "user_supplied" not in skill.config.provenance
|
assert "user_supplied" not in skill.config.provenance
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue