fischer-agentkit/tests/unit/test_schema_tools.py

414 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Schema 工具集单元测试 - SchemaExtractTool + SchemaGenerateTool"""
import json
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from agentkit.tools.schema_tools import SchemaExtractTool, SchemaGenerateTool
# ========== SchemaExtractTool 测试 ==========
class TestSchemaExtractToolConstruction:
"""测试 SchemaExtractTool 构造"""
def test_default_construction(self):
tool = SchemaExtractTool()
assert tool.name == "schema_extract"
assert tool.input_schema is not None
assert tool.output_schema is not None
assert "url_or_html" in tool.input_schema["properties"]
assert tool.input_schema["required"] == ["url_or_html"]
def test_custom_construction(self):
tool = SchemaExtractTool(
name="my_extractor",
description="自定义提取器",
version="2.0.0",
)
assert tool.name == "my_extractor"
def test_supported_formats(self):
tool = SchemaExtractTool()
assert "json-ld" in tool.SUPPORTED_FORMATS
assert "microdata" in tool.SUPPORTED_FORMATS
assert "rdfa" in tool.SUPPORTED_FORMATS
assert "dublincore" in tool.SUPPORTED_FORMATS
def test_to_dict(self):
tool = SchemaExtractTool()
d = tool.to_dict()
assert d["name"] == "schema_extract"
class TestSchemaExtractToolGracefulDegradation:
"""测试 extruct 不可用时的优雅降级"""
@pytest.mark.asyncio
async def test_execute_without_extruct(self):
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", False):
tool = SchemaExtractTool()
result = await tool.execute(url_or_html="<html></html>")
assert result["success"] is False
assert "extruct not installed" in result["error"]
assert "pip install extruct" in result["error"]
assert result["schemas"] == []
class TestSchemaExtractToolValidation:
"""测试输入验证"""
@pytest.mark.asyncio
async def test_execute_missing_url_or_html(self):
tool = SchemaExtractTool()
result = await tool.execute()
assert result["success"] is False
assert "url_or_html" in result["error"]
@pytest.mark.asyncio
async def test_execute_empty_url_or_html(self):
tool = SchemaExtractTool()
result = await tool.execute(url_or_html="")
assert result["success"] is False
@pytest.mark.asyncio
async def test_execute_invalid_format(self):
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", True):
tool = SchemaExtractTool()
result = await tool.execute(url_or_html="<html></html>", formats=["invalid-format"])
assert result["success"] is False
assert "不支持" in result["error"] or "invalid" in result["error"].lower()
class TestSchemaExtractToolWithMockedExtruct:
"""使用 mock extruct 测试提取逻辑"""
SAMPLE_HTML_WITH_JSONLD = """
<html>
<head>
<script type="application/ld+json">
{
"@context": "https://schema.org",
"@type": "Organization",
"name": "Test Corp"
}
</script>
</head>
<body></body>
</html>
"""
@pytest.mark.asyncio
async def test_extract_jsonld_from_html(self):
"""测试从 HTML 中提取 JSON-LD"""
mock_extruct = MagicMock()
mock_extruct.extract.return_value = {
"json-ld": [
{"@context": "https://schema.org", "@type": "Organization", "name": "Test Corp"}
]
}
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", True), \
patch("agentkit.tools.schema_tools.extruct", mock_extruct):
tool = SchemaExtractTool()
result = await tool.execute(url_or_html=self.SAMPLE_HTML_WITH_JSONLD)
assert result["success"] is True
assert len(result["schemas"]) == 1
assert result["schemas"][0]["format"] == "json-ld"
assert result["schemas"][0]["data"]["@type"] == "Organization"
assert result["schemas"][0]["data"]["name"] == "Test Corp"
@pytest.mark.asyncio
async def test_extract_no_schema_data(self):
"""测试 HTML 中没有结构化数据"""
mock_extruct = MagicMock()
mock_extruct.extract.return_value = {"json-ld": []}
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", True), \
patch("agentkit.tools.schema_tools.extruct", mock_extruct):
tool = SchemaExtractTool()
result = await tool.execute(url_or_html="<html><body>No schema</body></html>")
assert result["success"] is True
assert result["schemas"] == []
@pytest.mark.asyncio
async def test_extract_multiple_formats(self):
"""测试同时提取多种格式"""
mock_extruct = MagicMock()
mock_extruct.extract.return_value = {
"json-ld": [{"@type": "Organization", "name": "Corp"}],
"microdata": [{"type": "Product", "name": "Item"}],
}
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", True), \
patch("agentkit.tools.schema_tools.extruct", mock_extruct):
tool = SchemaExtractTool()
result = await tool.execute(
url_or_html="<html></html>",
formats=["json-ld", "microdata"],
)
assert result["success"] is True
assert len(result["schemas"]) == 2
formats_found = {s["format"] for s in result["schemas"]}
assert "json-ld" in formats_found
assert "microdata" in formats_found
@pytest.mark.asyncio
async def test_extract_error_handling(self):
"""测试提取异常处理"""
mock_extruct = MagicMock()
mock_extruct.extract.side_effect = Exception("Parse error")
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", True), \
patch("agentkit.tools.schema_tools.extruct", mock_extruct):
tool = SchemaExtractTool()
result = await tool.execute(url_or_html="<html></html>")
assert result["success"] is False
assert "Parse error" in result["error"]
@pytest.mark.asyncio
async def test_extract_with_url(self):
"""测试从 URL 提取(需要先获取 HTML"""
mock_extruct = MagicMock()
mock_extruct.extract.return_value = {
"json-ld": [{"@type": "WebPage"}]
}
with patch("agentkit.tools.schema_tools._EXTRUCT_AVAILABLE", True), \
patch("agentkit.tools.schema_tools.extruct", mock_extruct), \
patch("urllib.request.urlopen") as mock_urlopen:
mock_resp = MagicMock()
mock_resp.read.return_value = b"<html><body>Test</body></html>"
mock_resp.__enter__ = MagicMock(return_value=mock_resp)
mock_resp.__exit__ = MagicMock(return_value=None)
mock_urlopen.return_value = mock_resp
tool = SchemaExtractTool()
result = await tool.execute(url_or_html="https://example.com")
assert result["success"] is True
# ========== SchemaGenerateTool 测试 ==========
class TestSchemaGenerateToolConstruction:
"""测试 SchemaGenerateTool 构造"""
def test_default_construction(self):
tool = SchemaGenerateTool()
assert tool.name == "schema_generate"
assert tool.input_schema is not None
assert tool.output_schema is not None
assert "schema_type" in tool.input_schema["properties"]
assert "properties" in tool.input_schema["properties"]
def test_supported_types(self):
tool = SchemaGenerateTool()
assert "Organization" in tool.SUPPORTED_TYPES
assert "FAQPage" in tool.SUPPORTED_TYPES
assert "Article" in tool.SUPPORTED_TYPES
assert "Product" in tool.SUPPORTED_TYPES
assert "HowTo" in tool.SUPPORTED_TYPES
assert "LocalBusiness" in tool.SUPPORTED_TYPES
assert "Person" in tool.SUPPORTED_TYPES
assert "BreadcrumbList" in tool.SUPPORTED_TYPES
assert "SiteNavigationElement" in tool.SUPPORTED_TYPES
assert "WebPage" in tool.SUPPORTED_TYPES
class TestSchemaGenerateToolValidation:
"""测试输入验证"""
@pytest.mark.asyncio
async def test_execute_missing_schema_type(self):
tool = SchemaGenerateTool()
result = await tool.execute(properties={"name": "Test"})
assert result["success"] is False
assert "schema_type" in result["error"]
@pytest.mark.asyncio
async def test_execute_missing_properties(self):
tool = SchemaGenerateTool()
result = await tool.execute(schema_type="Organization")
assert result["success"] is False
assert "properties" in result["error"]
@pytest.mark.asyncio
async def test_execute_invalid_schema_type(self):
tool = SchemaGenerateTool()
result = await tool.execute(schema_type="InvalidType", properties={"name": "Test"})
assert result["success"] is False
assert "不支持" in result["error"] or "InvalidType" in result["error"]
@pytest.mark.asyncio
async def test_execute_properties_not_dict(self):
tool = SchemaGenerateTool()
result = await tool.execute(schema_type="Organization", properties="not a dict")
assert result["success"] is False
assert "字典" in result["error"] or "dict" in result["error"].lower()
class TestSchemaGenerateToolManualGeneration:
"""测试手动 JSON-LD 生成(始终可用,无需外部依赖)"""
@pytest.mark.asyncio
async def test_generate_organization(self):
"""测试生成 Organization 类型"""
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
tool = SchemaGenerateTool()
result = await tool.execute(
schema_type="Organization",
properties={"name": "Fischer AI", "url": "https://fischer.ai"},
)
assert result["success"] is True
assert result["schema_type"] == "Organization"
jsonld = json.loads(result["jsonld"])
assert jsonld["@context"] == "https://schema.org"
assert jsonld["@type"] == "Organization"
assert jsonld["name"] == "Fischer AI"
assert jsonld["url"] == "https://fischer.ai"
@pytest.mark.asyncio
async def test_generate_faq_page(self):
"""测试生成 FAQPage 类型"""
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
tool = SchemaGenerateTool()
result = await tool.execute(
schema_type="FAQPage",
properties={
"mainEntity": [
{
"@type": "Question",
"name": "What is GEO?",
"acceptedAnswer": {
"@type": "Answer",
"text": "Generative Engine Optimization",
},
}
]
},
)
assert result["success"] is True
jsonld = json.loads(result["jsonld"])
assert jsonld["@type"] == "FAQPage"
assert len(jsonld["mainEntity"]) == 1
@pytest.mark.asyncio
async def test_generate_article(self):
"""测试生成 Article 类型"""
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
tool = SchemaGenerateTool()
result = await tool.execute(
schema_type="Article",
properties={
"headline": "Test Article",
"author": {"@type": "Person", "name": "John"},
},
)
assert result["success"] is True
jsonld = json.loads(result["jsonld"])
assert jsonld["@type"] == "Article"
assert jsonld["headline"] == "Test Article"
@pytest.mark.asyncio
async def test_generate_breadcrumb_list(self):
"""测试生成 BreadcrumbList 类型"""
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
tool = SchemaGenerateTool()
result = await tool.execute(
schema_type="BreadcrumbList",
properties={
"itemListElement": [
{"@type": "ListItem", "position": 1, "name": "Home"},
]
},
)
assert result["success"] is True
jsonld = json.loads(result["jsonld"])
assert jsonld["@type"] == "BreadcrumbList"
@pytest.mark.asyncio
async def test_output_is_valid_jsonld(self):
"""测试输出是有效的 JSON-LD包含 @context 和 @type"""
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
tool = SchemaGenerateTool()
for schema_type in ["Organization", "WebPage", "Product", "Person"]:
result = await tool.execute(
schema_type=schema_type,
properties={"name": f"Test {schema_type}"},
)
assert result["success"] is True
jsonld = json.loads(result["jsonld"])
assert "@context" in jsonld
assert jsonld["@context"] == "https://schema.org"
assert "@type" in jsonld
assert jsonld["@type"] == schema_type
@pytest.mark.asyncio
async def test_manual_generation_preserves_chinese(self):
"""测试手动生成保留中文字符"""
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
tool = SchemaGenerateTool()
result = await tool.execute(
schema_type="Organization",
properties={"name": "费舍尔科技", "description": "AI 驱动的企业平台"},
)
assert result["success"] is True
jsonld = json.loads(result["jsonld"])
assert jsonld["name"] == "费舍尔科技"
assert jsonld["description"] == "AI 驱动的企业平台"
class TestSchemaGenerateToolWithPydanticSchemaorg:
"""测试 pydantic-schemaorg 可用时的行为"""
@pytest.mark.asyncio
async def test_fallback_to_manual_when_schemaorg_fails(self):
"""当 pydantic-schemaorg 构建失败时,降级到手动生成"""
mock_schemaorg = MagicMock()
# 让 getattr 返回 None模拟类型不存在
mock_schemaorg.Organization = None
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", True), \
patch("agentkit.tools.schema_tools.pydantic_schemaorg", mock_schemaorg):
tool = SchemaGenerateTool()
result = await tool.execute(
schema_type="Organization",
properties={"name": "Test"},
)
# 应该降级到手动生成
assert result["success"] is True
jsonld = json.loads(result["jsonld"])
assert jsonld["@type"] == "Organization"
assert jsonld["name"] == "Test"
@pytest.mark.asyncio
async def test_schemaorg_not_available_uses_manual(self):
"""当 pydantic-schemaorg 不可用时,使用手动生成"""
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
tool = SchemaGenerateTool()
result = await tool.execute(
schema_type="Organization",
properties={"name": "Manual Corp"},
)
assert result["success"] is True
jsonld = json.loads(result["jsonld"])
assert jsonld["name"] == "Manual Corp"
class TestSchemaGenerateToolSafeExecute:
"""测试 safe_execute 钩子"""
@pytest.mark.asyncio
async def test_safe_execute_success(self):
with patch("agentkit.tools.schema_tools._PYDANTIC_SCHEMAORG_AVAILABLE", False):
tool = SchemaGenerateTool()
result = await tool.safe_execute(
schema_type="Organization",
properties={"name": "Test"},
)
assert result["success"] is True