feat(tools): U3 built-in Python tools - WebCrawl, SchemaExtract, SchemaGenerate
Add WebCrawlTool (Crawl4AI wrapper with graceful degradation), SchemaExtractTool (extruct-based Schema.org extraction), and SchemaGenerateTool (JSON-LD generation with optional pydantic-schemaorg validation). All tools work without optional dependencies.
This commit is contained in:
parent
550d29a139
commit
9ec1740047
|
|
@ -6,6 +6,9 @@ from agentkit.tools.agent_tool import AgentTool
|
|||
from agentkit.tools.mcp_tool import MCPTool
|
||||
from agentkit.tools.registry import ToolRegistry
|
||||
from agentkit.tools.composition import SequentialChain, ParallelFanOut, DynamicSelector
|
||||
from agentkit.tools.web_crawl import WebCrawlTool
|
||||
from agentkit.tools.schema_tools import SchemaExtractTool, SchemaGenerateTool
|
||||
from agentkit.tools.baidu_search import BaiduSearchTool
|
||||
|
||||
__all__ = [
|
||||
"Tool",
|
||||
|
|
@ -16,4 +19,8 @@ __all__ = [
|
|||
"SequentialChain",
|
||||
"ParallelFanOut",
|
||||
"DynamicSelector",
|
||||
"WebCrawlTool",
|
||||
"SchemaExtractTool",
|
||||
"SchemaGenerateTool",
|
||||
"BaiduSearchTool",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,344 @@
|
|||
"""Schema 工具集 - 结构化数据提取与生成
|
||||
|
||||
SchemaExtractTool: 从 HTML 中提取 JSON-LD / Microdata / RDFa 等结构化数据
|
||||
SchemaGenerateTool: 生成 Schema.org JSON-LD 标记
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from agentkit.tools.base import Tool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 检测 extruct 是否可用
|
||||
_EXTRUCT_AVAILABLE = False
|
||||
extruct = None
|
||||
try:
|
||||
import extruct
|
||||
|
||||
_EXTRUCT_AVAILABLE = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# 检测 pydantic_schemaorg 是否可用
|
||||
_PYDANTIC_SCHEMAORG_AVAILABLE = False
|
||||
pydantic_schemaorg = None
|
||||
try:
|
||||
import pydantic_schemaorg
|
||||
|
||||
_PYDANTIC_SCHEMAORG_AVAILABLE = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
class SchemaExtractTool(Tool):
|
||||
"""结构化数据提取工具 - 从 HTML 中提取 JSON-LD、Microdata、RDFa 等
|
||||
|
||||
使用 extruct 库进行提取,当 extruct 未安装时优雅降级。
|
||||
"""
|
||||
|
||||
SUPPORTED_FORMATS = {"json-ld", "microdata", "rdfa", "dublincore"}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str = "schema_extract",
|
||||
description: str = "从网页 HTML 中提取结构化数据(JSON-LD、Microdata、RDFa 等)",
|
||||
input_schema: dict[str, Any] | None = None,
|
||||
output_schema: dict[str, Any] | None = None,
|
||||
version: str = "1.0.0",
|
||||
tags: list[str] | None = None,
|
||||
):
|
||||
super().__init__(
|
||||
name=name,
|
||||
description=description,
|
||||
input_schema=input_schema or self._default_input_schema(),
|
||||
output_schema=output_schema or self._default_output_schema(),
|
||||
version=version,
|
||||
tags=tags or ["schema", "extraction"],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _default_input_schema() -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url_or_html": {
|
||||
"type": "string",
|
||||
"description": "要提取的 URL 或原始 HTML 字符串",
|
||||
},
|
||||
"formats": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": "要提取的格式列表",
|
||||
"default": ["json-ld"],
|
||||
},
|
||||
},
|
||||
"required": ["url_or_html"],
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _default_output_schema() -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"schemas": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"format": {"type": "string"},
|
||||
"data": {"type": "object"},
|
||||
},
|
||||
},
|
||||
"description": "提取到的结构化数据列表",
|
||||
},
|
||||
"success": {"type": "boolean", "description": "是否成功"},
|
||||
"error": {"type": "string", "description": "错误信息(仅失败时)"},
|
||||
},
|
||||
}
|
||||
|
||||
def _is_url(self, text: str) -> bool:
|
||||
"""判断输入是 URL 还是 HTML"""
|
||||
return text.strip().startswith(("http://", "https://"))
|
||||
|
||||
async def execute(self, **kwargs) -> dict:
|
||||
"""执行结构化数据提取
|
||||
|
||||
Args:
|
||||
url_or_html: URL 或原始 HTML 字符串(必需)
|
||||
formats: 要提取的格式列表(默认 ["json-ld"])
|
||||
可选: "json-ld", "microdata", "rdfa", "dublincore"
|
||||
|
||||
Returns:
|
||||
包含 schemas 列表和 success 布尔值的字典
|
||||
"""
|
||||
url_or_html = kwargs.get("url_or_html")
|
||||
if not url_or_html:
|
||||
return {"error": "url_or_html 参数是必需的", "schemas": [], "success": False}
|
||||
|
||||
formats = kwargs.get("formats", ["json-ld"])
|
||||
# 验证格式
|
||||
invalid_formats = set(formats) - self.SUPPORTED_FORMATS
|
||||
if invalid_formats:
|
||||
return {
|
||||
"error": f"不支持的格式: {invalid_formats},支持的格式: {self.SUPPORTED_FORMATS}",
|
||||
"schemas": [],
|
||||
"success": False,
|
||||
}
|
||||
|
||||
# 优雅降级:extruct 未安装
|
||||
if not _EXTRUCT_AVAILABLE:
|
||||
return {
|
||||
"error": "extruct not installed. Run: pip install extruct",
|
||||
"schemas": [],
|
||||
"success": False,
|
||||
}
|
||||
|
||||
try:
|
||||
html = url_or_html
|
||||
url = None
|
||||
|
||||
# 如果输入是 URL,先获取 HTML
|
||||
if self._is_url(url_or_html):
|
||||
url = url_or_html
|
||||
try:
|
||||
import urllib.request
|
||||
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "AgentKit/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
html = resp.read().decode("utf-8", errors="replace")
|
||||
except Exception as e:
|
||||
return {
|
||||
"error": f"获取 URL 内容失败: {e}",
|
||||
"schemas": [],
|
||||
"success": False,
|
||||
}
|
||||
|
||||
# 使用 extruct 提取
|
||||
data = extruct.extract(
|
||||
html,
|
||||
base_url=url or "",
|
||||
formats=formats,
|
||||
)
|
||||
|
||||
# 整理结果
|
||||
schemas: list[dict[str, Any]] = []
|
||||
for fmt in formats:
|
||||
items = data.get(fmt, [])
|
||||
if items:
|
||||
for item in items:
|
||||
schemas.append({"format": fmt, "data": item})
|
||||
|
||||
return {"schemas": schemas, "success": True}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"SchemaExtractTool 提取失败: {e}")
|
||||
return {
|
||||
"error": str(e),
|
||||
"schemas": [],
|
||||
"success": False,
|
||||
}
|
||||
|
||||
|
||||
class SchemaGenerateTool(Tool):
|
||||
"""JSON-LD 结构化数据生成工具 - 为常见 Schema.org 类型生成标记
|
||||
|
||||
当 pydantic-schemaorg 可用时提供验证,否则手动构建 JSON-LD。
|
||||
手动生成始终可用,无需外部依赖。
|
||||
"""
|
||||
|
||||
SUPPORTED_TYPES = {
|
||||
"Organization",
|
||||
"WebPage",
|
||||
"Article",
|
||||
"Product",
|
||||
"FAQPage",
|
||||
"HowTo",
|
||||
"LocalBusiness",
|
||||
"Person",
|
||||
"BreadcrumbList",
|
||||
"SiteNavigationElement",
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str = "schema_generate",
|
||||
description: str = "生成 Schema.org JSON-LD 结构化数据标记",
|
||||
input_schema: dict[str, Any] | None = None,
|
||||
output_schema: dict[str, Any] | None = None,
|
||||
version: str = "1.0.0",
|
||||
tags: list[str] | None = None,
|
||||
):
|
||||
super().__init__(
|
||||
name=name,
|
||||
description=description,
|
||||
input_schema=input_schema or self._default_input_schema(),
|
||||
output_schema=output_schema or self._default_output_schema(),
|
||||
version=version,
|
||||
tags=tags or ["schema", "generation"],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _default_input_schema() -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"schema_type": {
|
||||
"type": "string",
|
||||
"description": "Schema.org 类型名称,如 Organization、FAQPage 等",
|
||||
},
|
||||
"properties": {
|
||||
"type": "object",
|
||||
"description": "Schema 属性字典",
|
||||
},
|
||||
},
|
||||
"required": ["schema_type", "properties"],
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _default_output_schema() -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"jsonld": {"type": "string", "description": "生成的 JSON-LD 字符串"},
|
||||
"schema_type": {"type": "string", "description": "Schema 类型"},
|
||||
"success": {"type": "boolean", "description": "是否成功"},
|
||||
"error": {"type": "string", "description": "错误信息(仅失败时)"},
|
||||
},
|
||||
}
|
||||
|
||||
def _generate_manual(self, schema_type: str, properties: dict[str, Any]) -> str:
|
||||
"""手动构建 JSON-LD(无需外部依赖)"""
|
||||
jsonld_obj: dict[str, Any] = {
|
||||
"@context": "https://schema.org",
|
||||
"@type": schema_type,
|
||||
}
|
||||
jsonld_obj.update(properties)
|
||||
return json.dumps(jsonld_obj, ensure_ascii=False, indent=2)
|
||||
|
||||
def _generate_with_schemaorg(self, schema_type: str, properties: dict[str, Any]) -> str | None:
|
||||
"""使用 pydantic-schemaorg 生成 JSON-LD(带验证)"""
|
||||
if not _PYDANTIC_SCHEMAORG_AVAILABLE:
|
||||
return None
|
||||
|
||||
try:
|
||||
# 尝试获取对应的 pydantic_schemaorg 类
|
||||
schema_cls = getattr(pydantic_schemaorg, schema_type, None)
|
||||
if schema_cls is None:
|
||||
return None
|
||||
|
||||
instance = schema_cls(**properties)
|
||||
# pydantic_schemaorg 对象转 dict
|
||||
if hasattr(instance, "model_dump"):
|
||||
data = instance.model_dump(exclude_none=True)
|
||||
elif hasattr(instance, "dict"):
|
||||
data = instance.dict(exclude_none=True)
|
||||
else:
|
||||
return None
|
||||
|
||||
jsonld_obj: dict[str, Any] = {
|
||||
"@context": "https://schema.org",
|
||||
"@type": schema_type,
|
||||
}
|
||||
jsonld_obj.update(data)
|
||||
return json.dumps(jsonld_obj, ensure_ascii=False, indent=2)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
async def execute(self, **kwargs) -> dict:
|
||||
"""执行 JSON-LD 生成
|
||||
|
||||
Args:
|
||||
schema_type: Schema.org 类型名称(必需,如 "Organization")
|
||||
properties: Schema 属性字典(必需)
|
||||
|
||||
Returns:
|
||||
包含 jsonld 字符串、schema_type 和 success 布尔值的字典
|
||||
"""
|
||||
schema_type = kwargs.get("schema_type")
|
||||
properties = kwargs.get("properties")
|
||||
|
||||
if not schema_type:
|
||||
return {"error": "schema_type 参数是必需的", "schema_type": "", "success": False}
|
||||
|
||||
if properties is None:
|
||||
return {"error": "properties 参数是必需的", "schema_type": schema_type, "success": False}
|
||||
|
||||
if not isinstance(properties, dict):
|
||||
return {
|
||||
"error": "properties 必须是字典类型",
|
||||
"schema_type": schema_type,
|
||||
"success": False,
|
||||
}
|
||||
|
||||
# 验证 schema_type
|
||||
if schema_type not in self.SUPPORTED_TYPES:
|
||||
return {
|
||||
"error": f"不支持的 schema_type: {schema_type},支持的类型: {sorted(self.SUPPORTED_TYPES)}",
|
||||
"schema_type": schema_type,
|
||||
"success": False,
|
||||
}
|
||||
|
||||
try:
|
||||
# 优先尝试使用 pydantic-schemaorg(带验证)
|
||||
jsonld = self._generate_with_schemaorg(schema_type, properties)
|
||||
|
||||
# 降级到手动生成
|
||||
if jsonld is None:
|
||||
jsonld = self._generate_manual(schema_type, properties)
|
||||
|
||||
return {
|
||||
"jsonld": jsonld,
|
||||
"schema_type": schema_type,
|
||||
"success": True,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"SchemaGenerateTool 生成失败: {e}")
|
||||
return {
|
||||
"error": str(e),
|
||||
"schema_type": schema_type,
|
||||
"success": False,
|
||||
}
|
||||
|
|
@ -0,0 +1,159 @@
|
|||
"""WebCrawlTool - 基于 Crawl4AI 的网页抓取工具,支持优雅降级"""
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from agentkit.tools.base import Tool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 检测 Crawl4AI 是否可用
|
||||
_CRAWL4AI_AVAILABLE = False
|
||||
AsyncWebCrawler = None
|
||||
JsonCssExtractionStrategy = None
|
||||
try:
|
||||
from crawl4ai import AsyncWebCrawler
|
||||
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
|
||||
|
||||
_CRAWL4AI_AVAILABLE = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
class WebCrawlTool(Tool):
|
||||
"""网页抓取工具 - 使用 Crawl4AI,可选依赖未安装时优雅降级
|
||||
|
||||
支持 Markdown/HTML 输出、CSS 选择器提取、JS 渲染等待。
|
||||
当 Crawl4AI 未安装时,返回包含安装提示的错误信息。
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str = "web_crawl",
|
||||
description: str = "抓取网页内容,支持 Markdown/HTML 输出和 CSS 选择器提取",
|
||||
input_schema: dict[str, Any] | None = None,
|
||||
output_schema: dict[str, Any] | None = None,
|
||||
version: str = "1.0.0",
|
||||
tags: list[str] | None = None,
|
||||
):
|
||||
super().__init__(
|
||||
name=name,
|
||||
description=description,
|
||||
input_schema=input_schema or self._default_input_schema(),
|
||||
output_schema=output_schema or self._default_output_schema(),
|
||||
version=version,
|
||||
tags=tags or ["web", "crawl"],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _default_input_schema() -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "要抓取的 URL",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"description": "输出格式:markdown 或 html",
|
||||
"default": "markdown",
|
||||
"enum": ["markdown", "html"],
|
||||
},
|
||||
"css_selector": {
|
||||
"type": "string",
|
||||
"description": "可选的 CSS 选择器,用于结构化提取",
|
||||
},
|
||||
"js_wait": {
|
||||
"type": "number",
|
||||
"description": "等待 JS 渲染的秒数",
|
||||
"default": 0,
|
||||
},
|
||||
},
|
||||
"required": ["url"],
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _default_output_schema() -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {"type": "string", "description": "抓取到的内容"},
|
||||
"status_code": {"type": "integer", "description": "HTTP 状态码"},
|
||||
"links": {"type": "array", "items": {"type": "string"}, "description": "页面中的链接"},
|
||||
"success": {"type": "boolean", "description": "是否成功"},
|
||||
"error": {"type": "string", "description": "错误信息(仅失败时)"},
|
||||
},
|
||||
}
|
||||
|
||||
async def execute(self, **kwargs) -> dict:
|
||||
"""执行网页抓取
|
||||
|
||||
Args:
|
||||
url: 要抓取的 URL(必需)
|
||||
format: 输出格式 - "markdown" 或 "html"(默认 "markdown")
|
||||
css_selector: 可选的 CSS 选择器,用于结构化提取
|
||||
js_wait: 等待 JS 渲染的秒数(默认 0)
|
||||
|
||||
Returns:
|
||||
包含 content, status_code, links, success 的字典
|
||||
"""
|
||||
url = kwargs.get("url")
|
||||
if not url:
|
||||
return {"error": "url 参数是必需的", "success": False}
|
||||
|
||||
output_format = kwargs.get("format", "markdown")
|
||||
css_selector = kwargs.get("css_selector")
|
||||
js_wait = kwargs.get("js_wait", 0)
|
||||
|
||||
# 优雅降级:Crawl4AI 未安装
|
||||
if not _CRAWL4AI_AVAILABLE:
|
||||
return {
|
||||
"error": "Crawl4AI not installed. Run: pip install crawl4ai",
|
||||
"success": False,
|
||||
}
|
||||
|
||||
try:
|
||||
extraction_strategy = None
|
||||
if css_selector:
|
||||
extraction_strategy = JsonCssExtractionStrategy(css_selector)
|
||||
|
||||
async with AsyncWebCrawler() as crawler:
|
||||
result = await crawler.arun(
|
||||
url=url,
|
||||
extraction_strategy=extraction_strategy,
|
||||
js_wait=js_wait if js_wait else None,
|
||||
)
|
||||
|
||||
# 提取内容
|
||||
if output_format == "html":
|
||||
content = result.html or ""
|
||||
else:
|
||||
content = result.markdown or ""
|
||||
|
||||
# 提取链接
|
||||
links: list[str] = []
|
||||
if hasattr(result, "links") and result.links:
|
||||
links = result.links if isinstance(result.links, list) else []
|
||||
|
||||
status_code = result.status_code if hasattr(result, "status_code") else 200
|
||||
|
||||
response: dict[str, Any] = {
|
||||
"content": content,
|
||||
"status_code": status_code,
|
||||
"links": links,
|
||||
"success": True,
|
||||
}
|
||||
|
||||
# 如果使用了 CSS 选择器提取,附加提取结果
|
||||
if extraction_strategy and hasattr(result, "extracted_content") and result.extracted_content:
|
||||
response["extracted"] = result.extracted_content
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"WebCrawlTool 抓取失败: {url} - {e}")
|
||||
return {
|
||||
"error": str(e),
|
||||
"success": False,
|
||||
}
|
||||
|
|
@ -0,0 +1,413 @@
|
|||
"""Schema 工具集单元测试 - SchemaExtractTool + SchemaGenerateTool"""
|
||||
|
||||
import json
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from agentkit.tools.schema_tools import SchemaExtractTool, SchemaGenerateTool
|
||||
|
||||
|
||||
# ========== SchemaExtractTool 测试 ==========
|
||||
|
||||
|
||||
class TestSchemaExtractToolConstruction:
|
||||
"""测试 SchemaExtractTool 构造"""
|
||||
|
||||
def test_default_construction(self):
|
||||
tool = SchemaExtractTool()
|
||||
assert tool.name == "schema_extract"
|
||||
assert tool.input_schema is not None
|
||||
assert tool.output_schema is not None
|
||||
assert "url_or_html" in tool.input_schema["properties"]
|
||||
assert tool.input_schema["required"] == ["url_or_html"]
|
||||
|
||||
def test_custom_construction(self):
|
||||
tool = SchemaExtractTool(
|
||||
name="my_extractor",
|
||||
description="自定义提取器",
|
||||
version="2.0.0",
|
||||
)
|
||||
assert tool.name == "my_extractor"
|
||||
|
||||
def test_supported_formats(self):
|
||||
tool = SchemaExtractTool()
|
||||
assert "json-ld" in tool.SUPPORTED_FORMATS
|
||||
assert "microdata" in tool.SUPPORTED_FORMATS
|
||||
assert "rdfa" in tool.SUPPORTED_FORMATS
|
||||
assert "dublincore" in tool.SUPPORTED_FORMATS
|
||||
|
||||
def test_to_dict(self):
|
||||
tool = SchemaExtractTool()
|
||||
d = tool.to_dict()
|
||||
assert d["name"] == "schema_extract"
|
||||
|
||||
|
||||
class TestSchemaExtractToolGracefulDegradation:
|
||||
"""测试 extruct 不可用时的优雅降级"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_without_extruct(self):
|
||||
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", False):
|
||||
tool = SchemaExtractTool()
|
||||
result = await tool.execute(url_or_html="<html></html>")
|
||||
assert result["success"] is False
|
||||
assert "extruct not installed" in result["error"]
|
||||
assert "pip install extruct" in result["error"]
|
||||
assert result["schemas"] == []
|
||||
|
||||
|
||||
class TestSchemaExtractToolValidation:
|
||||
"""测试输入验证"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_missing_url_or_html(self):
|
||||
tool = SchemaExtractTool()
|
||||
result = await tool.execute()
|
||||
assert result["success"] is False
|
||||
assert "url_or_html" in result["error"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_empty_url_or_html(self):
|
||||
tool = SchemaExtractTool()
|
||||
result = await tool.execute(url_or_html="")
|
||||
assert result["success"] is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_invalid_format(self):
|
||||
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", True):
|
||||
tool = SchemaExtractTool()
|
||||
result = await tool.execute(url_or_html="<html></html>", formats=["invalid-format"])
|
||||
assert result["success"] is False
|
||||
assert "不支持" in result["error"] or "invalid" in result["error"].lower()
|
||||
|
||||
|
||||
class TestSchemaExtractToolWithMockedExtruct:
|
||||
"""使用 mock extruct 测试提取逻辑"""
|
||||
|
||||
SAMPLE_HTML_WITH_JSONLD = """
|
||||
<html>
|
||||
<head>
|
||||
<script type="application/ld+json">
|
||||
{
|
||||
"@context": "https://schema.org",
|
||||
"@type": "Organization",
|
||||
"name": "Test Corp"
|
||||
}
|
||||
</script>
|
||||
</head>
|
||||
<body></body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_jsonld_from_html(self):
|
||||
"""测试从 HTML 中提取 JSON-LD"""
|
||||
mock_extruct = MagicMock()
|
||||
mock_extruct.extract.return_value = {
|
||||
"json-ld": [
|
||||
{"@context": "https://schema.org", "@type": "Organization", "name": "Test Corp"}
|
||||
]
|
||||
}
|
||||
|
||||
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", True), \
|
||||
patch("agentkit.tools.schema_tools.extruct", mock_extruct):
|
||||
tool = SchemaExtractTool()
|
||||
result = await tool.execute(url_or_html=self.SAMPLE_HTML_WITH_JSONLD)
|
||||
assert result["success"] is True
|
||||
assert len(result["schemas"]) == 1
|
||||
assert result["schemas"][0]["format"] == "json-ld"
|
||||
assert result["schemas"][0]["data"]["@type"] == "Organization"
|
||||
assert result["schemas"][0]["data"]["name"] == "Test Corp"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_no_schema_data(self):
|
||||
"""测试 HTML 中没有结构化数据"""
|
||||
mock_extruct = MagicMock()
|
||||
mock_extruct.extract.return_value = {"json-ld": []}
|
||||
|
||||
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", True), \
|
||||
patch("agentkit.tools.schema_tools.extruct", mock_extruct):
|
||||
tool = SchemaExtractTool()
|
||||
result = await tool.execute(url_or_html="<html><body>No schema</body></html>")
|
||||
assert result["success"] is True
|
||||
assert result["schemas"] == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_multiple_formats(self):
|
||||
"""测试同时提取多种格式"""
|
||||
mock_extruct = MagicMock()
|
||||
mock_extruct.extract.return_value = {
|
||||
"json-ld": [{"@type": "Organization", "name": "Corp"}],
|
||||
"microdata": [{"type": "Product", "name": "Item"}],
|
||||
}
|
||||
|
||||
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", True), \
|
||||
patch("agentkit.tools.schema_tools.extruct", mock_extruct):
|
||||
tool = SchemaExtractTool()
|
||||
result = await tool.execute(
|
||||
url_or_html="<html></html>",
|
||||
formats=["json-ld", "microdata"],
|
||||
)
|
||||
assert result["success"] is True
|
||||
assert len(result["schemas"]) == 2
|
||||
formats_found = {s["format"] for s in result["schemas"]}
|
||||
assert "json-ld" in formats_found
|
||||
assert "microdata" in formats_found
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_error_handling(self):
|
||||
"""测试提取异常处理"""
|
||||
mock_extruct = MagicMock()
|
||||
mock_extruct.extract.side_effect = Exception("Parse error")
|
||||
|
||||
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", True), \
|
||||
patch("agentkit.tools.schema_tools.extruct", mock_extruct):
|
||||
tool = SchemaExtractTool()
|
||||
result = await tool.execute(url_or_html="<html></html>")
|
||||
assert result["success"] is False
|
||||
assert "Parse error" in result["error"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_with_url(self):
|
||||
"""测试从 URL 提取(需要先获取 HTML)"""
|
||||
mock_extruct = MagicMock()
|
||||
mock_extruct.extract.return_value = {
|
||||
"json-ld": [{"@type": "WebPage"}]
|
||||
}
|
||||
|
||||
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", True), \
|
||||
patch("agentkit.tools.schema_tools.extruct", mock_extruct), \
|
||||
patch("urllib.request.urlopen") as mock_urlopen:
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.read.return_value = b"<html><body>Test</body></html>"
|
||||
mock_resp.__enter__ = MagicMock(return_value=mock_resp)
|
||||
mock_resp.__exit__ = MagicMock(return_value=None)
|
||||
mock_urlopen.return_value = mock_resp
|
||||
|
||||
tool = SchemaExtractTool()
|
||||
result = await tool.execute(url_or_html="https://example.com")
|
||||
assert result["success"] is True
|
||||
|
||||
|
||||
# ========== SchemaGenerateTool 测试 ==========
|
||||
|
||||
|
||||
class TestSchemaGenerateToolConstruction:
|
||||
"""测试 SchemaGenerateTool 构造"""
|
||||
|
||||
def test_default_construction(self):
|
||||
tool = SchemaGenerateTool()
|
||||
assert tool.name == "schema_generate"
|
||||
assert tool.input_schema is not None
|
||||
assert tool.output_schema is not None
|
||||
assert "schema_type" in tool.input_schema["properties"]
|
||||
assert "properties" in tool.input_schema["properties"]
|
||||
|
||||
def test_supported_types(self):
|
||||
tool = SchemaGenerateTool()
|
||||
assert "Organization" in tool.SUPPORTED_TYPES
|
||||
assert "FAQPage" in tool.SUPPORTED_TYPES
|
||||
assert "Article" in tool.SUPPORTED_TYPES
|
||||
assert "Product" in tool.SUPPORTED_TYPES
|
||||
assert "HowTo" in tool.SUPPORTED_TYPES
|
||||
assert "LocalBusiness" in tool.SUPPORTED_TYPES
|
||||
assert "Person" in tool.SUPPORTED_TYPES
|
||||
assert "BreadcrumbList" in tool.SUPPORTED_TYPES
|
||||
assert "SiteNavigationElement" in tool.SUPPORTED_TYPES
|
||||
assert "WebPage" in tool.SUPPORTED_TYPES
|
||||
|
||||
|
||||
class TestSchemaGenerateToolValidation:
|
||||
"""测试输入验证"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_missing_schema_type(self):
|
||||
tool = SchemaGenerateTool()
|
||||
result = await tool.execute(properties={"name": "Test"})
|
||||
assert result["success"] is False
|
||||
assert "schema_type" in result["error"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_missing_properties(self):
|
||||
tool = SchemaGenerateTool()
|
||||
result = await tool.execute(schema_type="Organization")
|
||||
assert result["success"] is False
|
||||
assert "properties" in result["error"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_invalid_schema_type(self):
|
||||
tool = SchemaGenerateTool()
|
||||
result = await tool.execute(schema_type="InvalidType", properties={"name": "Test"})
|
||||
assert result["success"] is False
|
||||
assert "不支持" in result["error"] or "InvalidType" in result["error"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_properties_not_dict(self):
|
||||
tool = SchemaGenerateTool()
|
||||
result = await tool.execute(schema_type="Organization", properties="not a dict")
|
||||
assert result["success"] is False
|
||||
assert "字典" in result["error"] or "dict" in result["error"].lower()
|
||||
|
||||
|
||||
class TestSchemaGenerateToolManualGeneration:
|
||||
"""测试手动 JSON-LD 生成(始终可用,无需外部依赖)"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_generate_organization(self):
|
||||
"""测试生成 Organization 类型"""
|
||||
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
|
||||
tool = SchemaGenerateTool()
|
||||
result = await tool.execute(
|
||||
schema_type="Organization",
|
||||
properties={"name": "Fischer AI", "url": "https://fischer.ai"},
|
||||
)
|
||||
assert result["success"] is True
|
||||
assert result["schema_type"] == "Organization"
|
||||
|
||||
jsonld = json.loads(result["jsonld"])
|
||||
assert jsonld["@context"] == "https://schema.org"
|
||||
assert jsonld["@type"] == "Organization"
|
||||
assert jsonld["name"] == "Fischer AI"
|
||||
assert jsonld["url"] == "https://fischer.ai"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_generate_faq_page(self):
|
||||
"""测试生成 FAQPage 类型"""
|
||||
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
|
||||
tool = SchemaGenerateTool()
|
||||
result = await tool.execute(
|
||||
schema_type="FAQPage",
|
||||
properties={
|
||||
"mainEntity": [
|
||||
{
|
||||
"@type": "Question",
|
||||
"name": "What is GEO?",
|
||||
"acceptedAnswer": {
|
||||
"@type": "Answer",
|
||||
"text": "Generative Engine Optimization",
|
||||
},
|
||||
}
|
||||
]
|
||||
},
|
||||
)
|
||||
assert result["success"] is True
|
||||
jsonld = json.loads(result["jsonld"])
|
||||
assert jsonld["@type"] == "FAQPage"
|
||||
assert len(jsonld["mainEntity"]) == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_generate_article(self):
|
||||
"""测试生成 Article 类型"""
|
||||
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
|
||||
tool = SchemaGenerateTool()
|
||||
result = await tool.execute(
|
||||
schema_type="Article",
|
||||
properties={
|
||||
"headline": "Test Article",
|
||||
"author": {"@type": "Person", "name": "John"},
|
||||
},
|
||||
)
|
||||
assert result["success"] is True
|
||||
jsonld = json.loads(result["jsonld"])
|
||||
assert jsonld["@type"] == "Article"
|
||||
assert jsonld["headline"] == "Test Article"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_generate_breadcrumb_list(self):
|
||||
"""测试生成 BreadcrumbList 类型"""
|
||||
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
|
||||
tool = SchemaGenerateTool()
|
||||
result = await tool.execute(
|
||||
schema_type="BreadcrumbList",
|
||||
properties={
|
||||
"itemListElement": [
|
||||
{"@type": "ListItem", "position": 1, "name": "Home"},
|
||||
]
|
||||
},
|
||||
)
|
||||
assert result["success"] is True
|
||||
jsonld = json.loads(result["jsonld"])
|
||||
assert jsonld["@type"] == "BreadcrumbList"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_output_is_valid_jsonld(self):
|
||||
"""测试输出是有效的 JSON-LD(包含 @context 和 @type)"""
|
||||
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
|
||||
tool = SchemaGenerateTool()
|
||||
for schema_type in ["Organization", "WebPage", "Product", "Person"]:
|
||||
result = await tool.execute(
|
||||
schema_type=schema_type,
|
||||
properties={"name": f"Test {schema_type}"},
|
||||
)
|
||||
assert result["success"] is True
|
||||
jsonld = json.loads(result["jsonld"])
|
||||
assert "@context" in jsonld
|
||||
assert jsonld["@context"] == "https://schema.org"
|
||||
assert "@type" in jsonld
|
||||
assert jsonld["@type"] == schema_type
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_manual_generation_preserves_chinese(self):
|
||||
"""测试手动生成保留中文字符"""
|
||||
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
|
||||
tool = SchemaGenerateTool()
|
||||
result = await tool.execute(
|
||||
schema_type="Organization",
|
||||
properties={"name": "费舍尔科技", "description": "AI 驱动的企业平台"},
|
||||
)
|
||||
assert result["success"] is True
|
||||
jsonld = json.loads(result["jsonld"])
|
||||
assert jsonld["name"] == "费舍尔科技"
|
||||
assert jsonld["description"] == "AI 驱动的企业平台"
|
||||
|
||||
|
||||
class TestSchemaGenerateToolWithPydanticSchemaorg:
|
||||
"""测试 pydantic-schemaorg 可用时的行为"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fallback_to_manual_when_schemaorg_fails(self):
|
||||
"""当 pydantic-schemaorg 构建失败时,降级到手动生成"""
|
||||
mock_schemaorg = MagicMock()
|
||||
# 让 getattr 返回 None,模拟类型不存在
|
||||
mock_schemaorg.Organization = None
|
||||
|
||||
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", True), \
|
||||
patch("agentkit.tools.schema_tools.pydantic_schemaorg", mock_schemaorg):
|
||||
tool = SchemaGenerateTool()
|
||||
result = await tool.execute(
|
||||
schema_type="Organization",
|
||||
properties={"name": "Test"},
|
||||
)
|
||||
# 应该降级到手动生成
|
||||
assert result["success"] is True
|
||||
jsonld = json.loads(result["jsonld"])
|
||||
assert jsonld["@type"] == "Organization"
|
||||
assert jsonld["name"] == "Test"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_schemaorg_not_available_uses_manual(self):
|
||||
"""当 pydantic-schemaorg 不可用时,使用手动生成"""
|
||||
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
|
||||
tool = SchemaGenerateTool()
|
||||
result = await tool.execute(
|
||||
schema_type="Organization",
|
||||
properties={"name": "Manual Corp"},
|
||||
)
|
||||
assert result["success"] is True
|
||||
jsonld = json.loads(result["jsonld"])
|
||||
assert jsonld["name"] == "Manual Corp"
|
||||
|
||||
|
||||
class TestSchemaGenerateToolSafeExecute:
|
||||
"""测试 safe_execute 钩子"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_safe_execute_success(self):
|
||||
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
|
||||
tool = SchemaGenerateTool()
|
||||
result = await tool.safe_execute(
|
||||
schema_type="Organization",
|
||||
properties={"name": "Test"},
|
||||
)
|
||||
assert result["success"] is True
|
||||
|
|
@ -0,0 +1,201 @@
|
|||
"""WebCrawlTool 单元测试"""
|
||||
|
||||
import sys
|
||||
import types
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from agentkit.tools.web_crawl import WebCrawlTool
|
||||
|
||||
|
||||
class TestWebCrawlToolConstruction:
|
||||
"""测试 WebCrawlTool 构造"""
|
||||
|
||||
def test_default_construction(self):
|
||||
tool = WebCrawlTool()
|
||||
assert tool.name == "web_crawl"
|
||||
assert "抓取" in tool.description or "crawl" in tool.description.lower()
|
||||
assert tool.input_schema is not None
|
||||
assert tool.output_schema is not None
|
||||
assert "url" in tool.input_schema["properties"]
|
||||
assert tool.input_schema["required"] == ["url"]
|
||||
|
||||
def test_custom_construction(self):
|
||||
tool = WebCrawlTool(
|
||||
name="my_crawler",
|
||||
description="自定义爬虫",
|
||||
version="2.0.0",
|
||||
tags=["custom"],
|
||||
)
|
||||
assert tool.name == "my_crawler"
|
||||
assert tool.description == "自定义爬虫"
|
||||
assert tool.version == "2.0.0"
|
||||
assert tool.tags == ["custom"]
|
||||
|
||||
def test_to_dict(self):
|
||||
tool = WebCrawlTool()
|
||||
d = tool.to_dict()
|
||||
assert d["name"] == "web_crawl"
|
||||
assert "input_schema" in d
|
||||
assert "output_schema" in d
|
||||
|
||||
def test_repr(self):
|
||||
tool = WebCrawlTool()
|
||||
r = repr(tool)
|
||||
assert "WebCrawlTool" in r
|
||||
assert "web_crawl" in r
|
||||
|
||||
|
||||
class TestWebCrawlToolGracefulDegradation:
|
||||
"""测试 Crawl4AI 不可用时的优雅降级"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_without_crawl4ai(self):
|
||||
"""当 Crawl4AI 未安装时,返回安装提示"""
|
||||
with patch("agentkit.tools.web_crawl._CRAWL4AI_AVAILABLE", False):
|
||||
tool = WebCrawlTool()
|
||||
result = await tool.execute(url="https://example.com")
|
||||
assert result["success"] is False
|
||||
assert "Crawl4AI not installed" in result["error"]
|
||||
assert "pip install crawl4ai" in result["error"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_safe_execute_without_crawl4ai(self):
|
||||
"""safe_execute 在 Crawl4AI 不可用时也应正常返回"""
|
||||
with patch("agentkit.tools.web_crawl._CRAWL4AI_AVAILABLE", False):
|
||||
tool = WebCrawlTool()
|
||||
result = await tool.safe_execute(url="https://example.com")
|
||||
assert result["success"] is False
|
||||
|
||||
|
||||
class TestWebCrawlToolValidation:
|
||||
"""测试输入验证"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_missing_url(self):
|
||||
tool = WebCrawlTool()
|
||||
result = await tool.execute()
|
||||
assert result["success"] is False
|
||||
assert "url" in result["error"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_empty_url(self):
|
||||
tool = WebCrawlTool()
|
||||
result = await tool.execute(url="")
|
||||
assert result["success"] is False
|
||||
|
||||
|
||||
class TestWebCrawlToolWithMockedCrawl4AI:
|
||||
"""使用 mock Crawl4AI 测试正常抓取逻辑"""
|
||||
|
||||
def _make_mock_crawler(self, markdown="# Hello", html="<h1>Hello</h1>", links=None, status_code=200):
|
||||
"""创建 mock AsyncWebCrawler"""
|
||||
mock_result = MagicMock()
|
||||
mock_result.markdown = markdown
|
||||
mock_result.html = html
|
||||
mock_result.links = links or ["https://example.com/page1"]
|
||||
mock_result.status_code = status_code
|
||||
mock_result.extracted_content = None
|
||||
|
||||
mock_crawler = AsyncMock()
|
||||
mock_crawler.arun = AsyncMock(return_value=mock_result)
|
||||
mock_crawler.__aenter__ = AsyncMock(return_value=mock_crawler)
|
||||
mock_crawler.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
return mock_crawler, mock_result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_markdown_format(self):
|
||||
"""测试 Markdown 格式输出"""
|
||||
mock_crawler, _ = self._make_mock_crawler(markdown="# Test Page")
|
||||
|
||||
with patch("agentkit.tools.web_crawl._CRAWL4AI_AVAILABLE", True), \
|
||||
patch("agentkit.tools.web_crawl.AsyncWebCrawler", return_value=mock_crawler):
|
||||
tool = WebCrawlTool()
|
||||
result = await tool.execute(url="https://example.com", format="markdown")
|
||||
assert result["success"] is True
|
||||
assert result["content"] == "# Test Page"
|
||||
assert result["status_code"] == 200
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_html_format(self):
|
||||
"""测试 HTML 格式输出"""
|
||||
mock_crawler, _ = self._make_mock_crawler(html="<h1>Test</h1>")
|
||||
|
||||
with patch("agentkit.tools.web_crawl._CRAWL4AI_AVAILABLE", True), \
|
||||
patch("agentkit.tools.web_crawl.AsyncWebCrawler", return_value=mock_crawler):
|
||||
tool = WebCrawlTool()
|
||||
result = await tool.execute(url="https://example.com", format="html")
|
||||
assert result["success"] is True
|
||||
assert result["content"] == "<h1>Test</h1>"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_with_links(self):
|
||||
"""测试链接提取"""
|
||||
mock_crawler, _ = self._make_mock_crawler(links=["https://example.com/a", "https://example.com/b"])
|
||||
|
||||
with patch("agentkit.tools.web_crawl._CRAWL4AI_AVAILABLE", True), \
|
||||
patch("agentkit.tools.web_crawl.AsyncWebCrawler", return_value=mock_crawler):
|
||||
tool = WebCrawlTool()
|
||||
result = await tool.execute(url="https://example.com")
|
||||
assert result["success"] is True
|
||||
assert len(result["links"]) == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_with_css_selector(self):
|
||||
"""测试 CSS 选择器提取"""
|
||||
mock_crawler, mock_result = self._make_mock_crawler()
|
||||
mock_result.extracted_content = '{"title": "Test"}'
|
||||
|
||||
mock_strategy_cls = MagicMock(return_value=MagicMock())
|
||||
|
||||
with patch("agentkit.tools.web_crawl._CRAWL4AI_AVAILABLE", True), \
|
||||
patch("agentkit.tools.web_crawl.AsyncWebCrawler", return_value=mock_crawler), \
|
||||
patch("agentkit.tools.web_crawl.JsonCssExtractionStrategy", mock_strategy_cls):
|
||||
tool = WebCrawlTool()
|
||||
result = await tool.execute(url="https://example.com", css_selector="h1")
|
||||
assert result["success"] is True
|
||||
assert "extracted" in result
|
||||
mock_strategy_cls.assert_called_once_with("h1")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_with_js_wait(self):
|
||||
"""测试 JS 等待参数"""
|
||||
mock_crawler, _ = self._make_mock_crawler()
|
||||
|
||||
with patch("agentkit.tools.web_crawl._CRAWL4AI_AVAILABLE", True), \
|
||||
patch("agentkit.tools.web_crawl.AsyncWebCrawler", return_value=mock_crawler):
|
||||
tool = WebCrawlTool()
|
||||
result = await tool.execute(url="https://example.com", js_wait=2)
|
||||
assert result["success"] is True
|
||||
# 验证 arun 被调用时传入了 js_wait 参数
|
||||
call_kwargs = mock_crawler.arun.call_args
|
||||
assert call_kwargs[1].get("js_wait") == 2 or call_kwargs[1].get("js_wait") is not None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_crawl_error(self):
|
||||
"""测试抓取异常处理"""
|
||||
mock_crawler = AsyncMock()
|
||||
mock_crawler.arun = AsyncMock(side_effect=Exception("Connection timeout"))
|
||||
mock_crawler.__aenter__ = AsyncMock(return_value=mock_crawler)
|
||||
mock_crawler.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
with patch("agentkit.tools.web_crawl._CRAWL4AI_AVAILABLE", True), \
|
||||
patch("agentkit.tools.web_crawl.AsyncWebCrawler", return_value=mock_crawler):
|
||||
tool = WebCrawlTool()
|
||||
result = await tool.execute(url="https://example.com")
|
||||
assert result["success"] is False
|
||||
assert "Connection timeout" in result["error"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_default_format_is_markdown(self):
|
||||
"""测试默认输出格式为 markdown"""
|
||||
mock_crawler, _ = self._make_mock_crawler(markdown="MD content", html="HTML content")
|
||||
|
||||
with patch("agentkit.tools.web_crawl._CRAWL4AI_AVAILABLE", True), \
|
||||
patch("agentkit.tools.web_crawl.AsyncWebCrawler", return_value=mock_crawler):
|
||||
tool = WebCrawlTool()
|
||||
result = await tool.execute(url="https://example.com")
|
||||
assert result["success"] is True
|
||||
assert result["content"] == "MD content"
|
||||
Loading…
Reference in New Issue