"""WebCrawlTool 单元测试""" import sys import types from unittest.mock import AsyncMock, MagicMock, patch import pytest from agentkit.tools.web_crawl import WebCrawlTool class TestWebCrawlToolConstruction: """测试 WebCrawlTool 构造""" def test_default_construction(self): tool = WebCrawlTool() assert tool.name == "web_crawl" assert "抓取" in tool.description or "crawl" in tool.description.lower() assert tool.input_schema is not None assert tool.output_schema is not None assert "url" in tool.input_schema["properties"] assert tool.input_schema["required"] == ["url"] def test_custom_construction(self): tool = WebCrawlTool( name="my_crawler", description="自定义爬虫", version="2.0.0", tags=["custom"], ) assert tool.name == "my_crawler" assert tool.description == "自定义爬虫" assert tool.version == "2.0.0" assert tool.tags == ["custom"] def test_to_dict(self): tool = WebCrawlTool() d = tool.to_dict() assert d["name"] == "web_crawl" assert "input_schema" in d assert "output_schema" in d def test_repr(self): tool = WebCrawlTool() r = repr(tool) assert "WebCrawlTool" in r assert "web_crawl" in r class TestWebCrawlToolGracefulDegradation: """测试 Crawl4AI 不可用时的优雅降级""" @pytest.mark.asyncio async def test_execute_without_crawl4ai(self): """当 Crawl4AI 未安装时,返回安装提示""" with patch("agentkit.tools.web_crawl._CRAWL4AI_AVAILABLE", False): tool = WebCrawlTool() result = await tool.execute(url="https://example.com") assert result["success"] is False assert "Crawl4AI not installed" in result["error"] assert "pip install crawl4ai" in result["error"] @pytest.mark.asyncio async def test_safe_execute_without_crawl4ai(self): """safe_execute 在 Crawl4AI 不可用时也应正常返回""" with patch("agentkit.tools.web_crawl._CRAWL4AI_AVAILABLE", False): tool = WebCrawlTool() result = await tool.safe_execute(url="https://example.com") assert result["success"] is False class TestWebCrawlToolValidation: """测试输入验证""" @pytest.mark.asyncio async def test_execute_missing_url(self): tool = WebCrawlTool() result = await tool.execute() assert result["success"] is False assert "url" in result["error"] @pytest.mark.asyncio async def test_execute_empty_url(self): tool = WebCrawlTool() result = await tool.execute(url="") assert result["success"] is False class TestWebCrawlToolWithMockedCrawl4AI: """使用 mock Crawl4AI 测试正常抓取逻辑""" def _make_mock_crawler(self, markdown="# Hello", html="

Hello

", links=None, status_code=200): """创建 mock AsyncWebCrawler""" mock_result = MagicMock() mock_result.markdown = markdown mock_result.html = html mock_result.links = links or ["https://example.com/page1"] mock_result.status_code = status_code mock_result.extracted_content = None mock_crawler = AsyncMock() mock_crawler.arun = AsyncMock(return_value=mock_result) mock_crawler.__aenter__ = AsyncMock(return_value=mock_crawler) mock_crawler.__aexit__ = AsyncMock(return_value=None) return mock_crawler, mock_result @pytest.mark.asyncio async def test_execute_markdown_format(self): """测试 Markdown 格式输出""" mock_crawler, _ = self._make_mock_crawler(markdown="# Test Page") with patch("agentkit.tools.web_crawl._CRAWL4AI_AVAILABLE", True), \ patch("agentkit.tools.web_crawl.AsyncWebCrawler", return_value=mock_crawler): tool = WebCrawlTool() result = await tool.execute(url="https://example.com", format="markdown") assert result["success"] is True assert result["content"] == "# Test Page" assert result["status_code"] == 200 @pytest.mark.asyncio async def test_execute_html_format(self): """测试 HTML 格式输出""" mock_crawler, _ = self._make_mock_crawler(html="

Test

") with patch("agentkit.tools.web_crawl._CRAWL4AI_AVAILABLE", True), \ patch("agentkit.tools.web_crawl.AsyncWebCrawler", return_value=mock_crawler): tool = WebCrawlTool() result = await tool.execute(url="https://example.com", format="html") assert result["success"] is True assert result["content"] == "

Test

" @pytest.mark.asyncio async def test_execute_with_links(self): """测试链接提取""" mock_crawler, _ = self._make_mock_crawler(links=["https://example.com/a", "https://example.com/b"]) with patch("agentkit.tools.web_crawl._CRAWL4AI_AVAILABLE", True), \ patch("agentkit.tools.web_crawl.AsyncWebCrawler", return_value=mock_crawler): tool = WebCrawlTool() result = await tool.execute(url="https://example.com") assert result["success"] is True assert len(result["links"]) == 2 @pytest.mark.asyncio async def test_execute_with_css_selector(self): """测试 CSS 选择器提取""" mock_crawler, mock_result = self._make_mock_crawler() mock_result.extracted_content = '{"title": "Test"}' mock_strategy_cls = MagicMock(return_value=MagicMock()) with patch("agentkit.tools.web_crawl._CRAWL4AI_AVAILABLE", True), \ patch("agentkit.tools.web_crawl.AsyncWebCrawler", return_value=mock_crawler), \ patch("agentkit.tools.web_crawl.JsonCssExtractionStrategy", mock_strategy_cls): tool = WebCrawlTool() result = await tool.execute(url="https://example.com", css_selector="h1") assert result["success"] is True assert "extracted" in result mock_strategy_cls.assert_called_once_with("h1") @pytest.mark.asyncio async def test_execute_with_js_wait(self): """测试 JS 等待参数""" mock_crawler, _ = self._make_mock_crawler() with patch("agentkit.tools.web_crawl._CRAWL4AI_AVAILABLE", True), \ patch("agentkit.tools.web_crawl.AsyncWebCrawler", return_value=mock_crawler): tool = WebCrawlTool() result = await tool.execute(url="https://example.com", js_wait=2) assert result["success"] is True # 验证 arun 被调用时传入了 js_wait 参数 call_kwargs = mock_crawler.arun.call_args assert call_kwargs[1].get("js_wait") == 2 or call_kwargs[1].get("js_wait") is not None @pytest.mark.asyncio async def test_execute_crawl_error(self): """测试抓取异常处理""" mock_crawler = AsyncMock() mock_crawler.arun = AsyncMock(side_effect=Exception("Connection timeout")) mock_crawler.__aenter__ = AsyncMock(return_value=mock_crawler) mock_crawler.__aexit__ = AsyncMock(return_value=None) with patch("agentkit.tools.web_crawl._CRAWL4AI_AVAILABLE", True), \ patch("agentkit.tools.web_crawl.AsyncWebCrawler", return_value=mock_crawler): tool = WebCrawlTool() result = await tool.execute(url="https://example.com") assert result["success"] is False assert "Connection timeout" in result["error"] @pytest.mark.asyncio async def test_execute_default_format_is_markdown(self): """测试默认输出格式为 markdown""" mock_crawler, _ = self._make_mock_crawler(markdown="MD content", html="HTML content") with patch("agentkit.tools.web_crawl._CRAWL4AI_AVAILABLE", True), \ patch("agentkit.tools.web_crawl.AsyncWebCrawler", return_value=mock_crawler): tool = WebCrawlTool() result = await tool.execute(url="https://example.com") assert result["success"] is True assert result["content"] == "MD content"