414 lines
16 KiB
Python
414 lines
16 KiB
Python
"""Schema 工具集单元测试 - SchemaExtractTool + SchemaGenerateTool"""
|
||
|
||
import json
|
||
from unittest.mock import AsyncMock, MagicMock, patch
|
||
|
||
import pytest
|
||
|
||
from agentkit.tools.schema_tools import SchemaExtractTool, SchemaGenerateTool
|
||
|
||
|
||
# ========== SchemaExtractTool 测试 ==========
|
||
|
||
|
||
class TestSchemaExtractToolConstruction:
|
||
"""测试 SchemaExtractTool 构造"""
|
||
|
||
def test_default_construction(self):
|
||
tool = SchemaExtractTool()
|
||
assert tool.name == "schema_extract"
|
||
assert tool.input_schema is not None
|
||
assert tool.output_schema is not None
|
||
assert "url_or_html" in tool.input_schema["properties"]
|
||
assert tool.input_schema["required"] == ["url_or_html"]
|
||
|
||
def test_custom_construction(self):
|
||
tool = SchemaExtractTool(
|
||
name="my_extractor",
|
||
description="自定义提取器",
|
||
version="2.0.0",
|
||
)
|
||
assert tool.name == "my_extractor"
|
||
|
||
def test_supported_formats(self):
|
||
tool = SchemaExtractTool()
|
||
assert "json-ld" in tool.SUPPORTED_FORMATS
|
||
assert "microdata" in tool.SUPPORTED_FORMATS
|
||
assert "rdfa" in tool.SUPPORTED_FORMATS
|
||
assert "dublincore" in tool.SUPPORTED_FORMATS
|
||
|
||
def test_to_dict(self):
|
||
tool = SchemaExtractTool()
|
||
d = tool.to_dict()
|
||
assert d["name"] == "schema_extract"
|
||
|
||
|
||
class TestSchemaExtractToolGracefulDegradation:
|
||
"""测试 extruct 不可用时的优雅降级"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_execute_without_extruct(self):
|
||
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", False):
|
||
tool = SchemaExtractTool()
|
||
result = await tool.execute(url_or_html="<html></html>")
|
||
assert result["success"] is False
|
||
assert "extruct not installed" in result["error"]
|
||
assert "pip install extruct" in result["error"]
|
||
assert result["schemas"] == []
|
||
|
||
|
||
class TestSchemaExtractToolValidation:
|
||
"""测试输入验证"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_execute_missing_url_or_html(self):
|
||
tool = SchemaExtractTool()
|
||
result = await tool.execute()
|
||
assert result["success"] is False
|
||
assert "url_or_html" in result["error"]
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_execute_empty_url_or_html(self):
|
||
tool = SchemaExtractTool()
|
||
result = await tool.execute(url_or_html="")
|
||
assert result["success"] is False
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_execute_invalid_format(self):
|
||
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", True):
|
||
tool = SchemaExtractTool()
|
||
result = await tool.execute(url_or_html="<html></html>", formats=["invalid-format"])
|
||
assert result["success"] is False
|
||
assert "不支持" in result["error"] or "invalid" in result["error"].lower()
|
||
|
||
|
||
class TestSchemaExtractToolWithMockedExtruct:
|
||
"""使用 mock extruct 测试提取逻辑"""
|
||
|
||
SAMPLE_HTML_WITH_JSONLD = """
|
||
<html>
|
||
<head>
|
||
<script type="application/ld+json">
|
||
{
|
||
"@context": "https://schema.org",
|
||
"@type": "Organization",
|
||
"name": "Test Corp"
|
||
}
|
||
</script>
|
||
</head>
|
||
<body></body>
|
||
</html>
|
||
"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_extract_jsonld_from_html(self):
|
||
"""测试从 HTML 中提取 JSON-LD"""
|
||
mock_extruct = MagicMock()
|
||
mock_extruct.extract.return_value = {
|
||
"json-ld": [
|
||
{"@context": "https://schema.org", "@type": "Organization", "name": "Test Corp"}
|
||
]
|
||
}
|
||
|
||
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", True), \
|
||
patch("agentkit.tools.schema_tools.extruct", mock_extruct):
|
||
tool = SchemaExtractTool()
|
||
result = await tool.execute(url_or_html=self.SAMPLE_HTML_WITH_JSONLD)
|
||
assert result["success"] is True
|
||
assert len(result["schemas"]) == 1
|
||
assert result["schemas"][0]["format"] == "json-ld"
|
||
assert result["schemas"][0]["data"]["@type"] == "Organization"
|
||
assert result["schemas"][0]["data"]["name"] == "Test Corp"
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_extract_no_schema_data(self):
|
||
"""测试 HTML 中没有结构化数据"""
|
||
mock_extruct = MagicMock()
|
||
mock_extruct.extract.return_value = {"json-ld": []}
|
||
|
||
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", True), \
|
||
patch("agentkit.tools.schema_tools.extruct", mock_extruct):
|
||
tool = SchemaExtractTool()
|
||
result = await tool.execute(url_or_html="<html><body>No schema</body></html>")
|
||
assert result["success"] is True
|
||
assert result["schemas"] == []
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_extract_multiple_formats(self):
|
||
"""测试同时提取多种格式"""
|
||
mock_extruct = MagicMock()
|
||
mock_extruct.extract.return_value = {
|
||
"json-ld": [{"@type": "Organization", "name": "Corp"}],
|
||
"microdata": [{"type": "Product", "name": "Item"}],
|
||
}
|
||
|
||
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", True), \
|
||
patch("agentkit.tools.schema_tools.extruct", mock_extruct):
|
||
tool = SchemaExtractTool()
|
||
result = await tool.execute(
|
||
url_or_html="<html></html>",
|
||
formats=["json-ld", "microdata"],
|
||
)
|
||
assert result["success"] is True
|
||
assert len(result["schemas"]) == 2
|
||
formats_found = {s["format"] for s in result["schemas"]}
|
||
assert "json-ld" in formats_found
|
||
assert "microdata" in formats_found
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_extract_error_handling(self):
|
||
"""测试提取异常处理"""
|
||
mock_extruct = MagicMock()
|
||
mock_extruct.extract.side_effect = Exception("Parse error")
|
||
|
||
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", True), \
|
||
patch("agentkit.tools.schema_tools.extruct", mock_extruct):
|
||
tool = SchemaExtractTool()
|
||
result = await tool.execute(url_or_html="<html></html>")
|
||
assert result["success"] is False
|
||
assert "Parse error" in result["error"]
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_extract_with_url(self):
|
||
"""测试从 URL 提取(需要先获取 HTML)"""
|
||
mock_extruct = MagicMock()
|
||
mock_extruct.extract.return_value = {
|
||
"json-ld": [{"@type": "WebPage"}]
|
||
}
|
||
|
||
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", True), \
|
||
patch("agentkit.tools.schema_tools.extruct", mock_extruct), \
|
||
patch("urllib.request.urlopen") as mock_urlopen:
|
||
mock_resp = MagicMock()
|
||
mock_resp.read.return_value = b"<html><body>Test</body></html>"
|
||
mock_resp.__enter__ = MagicMock(return_value=mock_resp)
|
||
mock_resp.__exit__ = MagicMock(return_value=None)
|
||
mock_urlopen.return_value = mock_resp
|
||
|
||
tool = SchemaExtractTool()
|
||
result = await tool.execute(url_or_html="https://example.com")
|
||
assert result["success"] is True
|
||
|
||
|
||
# ========== SchemaGenerateTool 测试 ==========
|
||
|
||
|
||
class TestSchemaGenerateToolConstruction:
|
||
"""测试 SchemaGenerateTool 构造"""
|
||
|
||
def test_default_construction(self):
|
||
tool = SchemaGenerateTool()
|
||
assert tool.name == "schema_generate"
|
||
assert tool.input_schema is not None
|
||
assert tool.output_schema is not None
|
||
assert "schema_type" in tool.input_schema["properties"]
|
||
assert "properties" in tool.input_schema["properties"]
|
||
|
||
def test_supported_types(self):
|
||
tool = SchemaGenerateTool()
|
||
assert "Organization" in tool.SUPPORTED_TYPES
|
||
assert "FAQPage" in tool.SUPPORTED_TYPES
|
||
assert "Article" in tool.SUPPORTED_TYPES
|
||
assert "Product" in tool.SUPPORTED_TYPES
|
||
assert "HowTo" in tool.SUPPORTED_TYPES
|
||
assert "LocalBusiness" in tool.SUPPORTED_TYPES
|
||
assert "Person" in tool.SUPPORTED_TYPES
|
||
assert "BreadcrumbList" in tool.SUPPORTED_TYPES
|
||
assert "SiteNavigationElement" in tool.SUPPORTED_TYPES
|
||
assert "WebPage" in tool.SUPPORTED_TYPES
|
||
|
||
|
||
class TestSchemaGenerateToolValidation:
|
||
"""测试输入验证"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_execute_missing_schema_type(self):
|
||
tool = SchemaGenerateTool()
|
||
result = await tool.execute(properties={"name": "Test"})
|
||
assert result["success"] is False
|
||
assert "schema_type" in result["error"]
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_execute_missing_properties(self):
|
||
tool = SchemaGenerateTool()
|
||
result = await tool.execute(schema_type="Organization")
|
||
assert result["success"] is False
|
||
assert "properties" in result["error"]
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_execute_invalid_schema_type(self):
|
||
tool = SchemaGenerateTool()
|
||
result = await tool.execute(schema_type="InvalidType", properties={"name": "Test"})
|
||
assert result["success"] is False
|
||
assert "不支持" in result["error"] or "InvalidType" in result["error"]
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_execute_properties_not_dict(self):
|
||
tool = SchemaGenerateTool()
|
||
result = await tool.execute(schema_type="Organization", properties="not a dict")
|
||
assert result["success"] is False
|
||
assert "字典" in result["error"] or "dict" in result["error"].lower()
|
||
|
||
|
||
class TestSchemaGenerateToolManualGeneration:
|
||
"""测试手动 JSON-LD 生成(始终可用,无需外部依赖)"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_generate_organization(self):
|
||
"""测试生成 Organization 类型"""
|
||
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
|
||
tool = SchemaGenerateTool()
|
||
result = await tool.execute(
|
||
schema_type="Organization",
|
||
properties={"name": "Fischer AI", "url": "https://fischer.ai"},
|
||
)
|
||
assert result["success"] is True
|
||
assert result["schema_type"] == "Organization"
|
||
|
||
jsonld = json.loads(result["jsonld"])
|
||
assert jsonld["@context"] == "https://schema.org"
|
||
assert jsonld["@type"] == "Organization"
|
||
assert jsonld["name"] == "Fischer AI"
|
||
assert jsonld["url"] == "https://fischer.ai"
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_generate_faq_page(self):
|
||
"""测试生成 FAQPage 类型"""
|
||
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
|
||
tool = SchemaGenerateTool()
|
||
result = await tool.execute(
|
||
schema_type="FAQPage",
|
||
properties={
|
||
"mainEntity": [
|
||
{
|
||
"@type": "Question",
|
||
"name": "What is GEO?",
|
||
"acceptedAnswer": {
|
||
"@type": "Answer",
|
||
"text": "Generative Engine Optimization",
|
||
},
|
||
}
|
||
]
|
||
},
|
||
)
|
||
assert result["success"] is True
|
||
jsonld = json.loads(result["jsonld"])
|
||
assert jsonld["@type"] == "FAQPage"
|
||
assert len(jsonld["mainEntity"]) == 1
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_generate_article(self):
|
||
"""测试生成 Article 类型"""
|
||
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
|
||
tool = SchemaGenerateTool()
|
||
result = await tool.execute(
|
||
schema_type="Article",
|
||
properties={
|
||
"headline": "Test Article",
|
||
"author": {"@type": "Person", "name": "John"},
|
||
},
|
||
)
|
||
assert result["success"] is True
|
||
jsonld = json.loads(result["jsonld"])
|
||
assert jsonld["@type"] == "Article"
|
||
assert jsonld["headline"] == "Test Article"
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_generate_breadcrumb_list(self):
|
||
"""测试生成 BreadcrumbList 类型"""
|
||
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
|
||
tool = SchemaGenerateTool()
|
||
result = await tool.execute(
|
||
schema_type="BreadcrumbList",
|
||
properties={
|
||
"itemListElement": [
|
||
{"@type": "ListItem", "position": 1, "name": "Home"},
|
||
]
|
||
},
|
||
)
|
||
assert result["success"] is True
|
||
jsonld = json.loads(result["jsonld"])
|
||
assert jsonld["@type"] == "BreadcrumbList"
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_output_is_valid_jsonld(self):
|
||
"""测试输出是有效的 JSON-LD(包含 @context 和 @type)"""
|
||
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
|
||
tool = SchemaGenerateTool()
|
||
for schema_type in ["Organization", "WebPage", "Product", "Person"]:
|
||
result = await tool.execute(
|
||
schema_type=schema_type,
|
||
properties={"name": f"Test {schema_type}"},
|
||
)
|
||
assert result["success"] is True
|
||
jsonld = json.loads(result["jsonld"])
|
||
assert "@context" in jsonld
|
||
assert jsonld["@context"] == "https://schema.org"
|
||
assert "@type" in jsonld
|
||
assert jsonld["@type"] == schema_type
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_manual_generation_preserves_chinese(self):
|
||
"""测试手动生成保留中文字符"""
|
||
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
|
||
tool = SchemaGenerateTool()
|
||
result = await tool.execute(
|
||
schema_type="Organization",
|
||
properties={"name": "费舍尔科技", "description": "AI 驱动的企业平台"},
|
||
)
|
||
assert result["success"] is True
|
||
jsonld = json.loads(result["jsonld"])
|
||
assert jsonld["name"] == "费舍尔科技"
|
||
assert jsonld["description"] == "AI 驱动的企业平台"
|
||
|
||
|
||
class TestSchemaGenerateToolWithPydanticSchemaorg:
|
||
"""测试 pydantic-schemaorg 可用时的行为"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_fallback_to_manual_when_schemaorg_fails(self):
|
||
"""当 pydantic-schemaorg 构建失败时,降级到手动生成"""
|
||
mock_schemaorg = MagicMock()
|
||
# 让 getattr 返回 None,模拟类型不存在
|
||
mock_schemaorg.Organization = None
|
||
|
||
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", True), \
|
||
patch("agentkit.tools.schema_tools.pydantic_schemaorg", mock_schemaorg):
|
||
tool = SchemaGenerateTool()
|
||
result = await tool.execute(
|
||
schema_type="Organization",
|
||
properties={"name": "Test"},
|
||
)
|
||
# 应该降级到手动生成
|
||
assert result["success"] is True
|
||
jsonld = json.loads(result["jsonld"])
|
||
assert jsonld["@type"] == "Organization"
|
||
assert jsonld["name"] == "Test"
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_schemaorg_not_available_uses_manual(self):
|
||
"""当 pydantic-schemaorg 不可用时,使用手动生成"""
|
||
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
|
||
tool = SchemaGenerateTool()
|
||
result = await tool.execute(
|
||
schema_type="Organization",
|
||
properties={"name": "Manual Corp"},
|
||
)
|
||
assert result["success"] is True
|
||
jsonld = json.loads(result["jsonld"])
|
||
assert jsonld["name"] == "Manual Corp"
|
||
|
||
|
||
class TestSchemaGenerateToolSafeExecute:
|
||
"""测试 safe_execute 钩子"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_safe_execute_success(self):
|
||
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
|
||
tool = SchemaGenerateTool()
|
||
result = await tool.safe_execute(
|
||
schema_type="Organization",
|
||
properties={"name": "Test"},
|
||
)
|
||
assert result["success"] is True
|