"""Tests for DocumentTool — Agent tool wrapper (U6 create + U9 read).""" from __future__ import annotations from pathlib import Path import pytest from agentkit.documents.db import init_documents_db from agentkit.documents.renderers.excel_renderer import ExcelRenderer from agentkit.documents.renderers.pdf_renderer import PDFRenderer from agentkit.documents.renderers.word_renderer import WordRenderer from agentkit.documents.service import DocumentService from agentkit.memory.document_loader import DocumentLoader from agentkit.tools.document_tool import DocumentTool @pytest.fixture def service(tmp_path: Path) -> DocumentService: """Provide a DocumentService with all renderers registered.""" db_path = tmp_path / "test.db" upload_dir = tmp_path / "uploads" import asyncio asyncio.run(init_documents_db(db_path)) svc = DocumentService(upload_dir=upload_dir, db_path=db_path) svc.register_renderer("word", WordRenderer()) svc.register_renderer("excel", ExcelRenderer()) svc.register_renderer("pdf", PDFRenderer()) # TemplateRenderer is used via render_template, not render — but we # register it under "word" so DocumentService can dispatch to it. # Actually, DocumentService uses the same renderer for both paths: # _render_content calls render(), _render_template calls render_template(). # WordRenderer doesn't have render_template, so we need a separate # renderer for the template path. For U6 tests, we register a # TemplateRenderer as a second renderer that DocumentService can use # when template_path is provided. # ponytail: DocumentService._render_template calls renderer.render_template, # so we need the renderer to have that method. We register TemplateRenderer # as the word renderer when template filling is needed. For simplicity, # we use a composite approach: register WordRenderer for content rendering # and handle template separately. But the current service design uses # one renderer per format. Let's just test create without template here. return svc @pytest.fixture def tool(service: DocumentService) -> DocumentTool: return DocumentTool(service=service) # --------------------------------------------------------------------------- # create action — word # --------------------------------------------------------------------------- async def test_create_word(tool: DocumentTool) -> None: """format=word creates a .docx and returns success + document metadata.""" result = await tool.execute( format="word", content="# Test Report\n\nThis is a test paragraph.\n", conversation_id="conv-1", ) assert result["success"] is True assert result["document"]["format"] == "word" assert result["document"]["filename"].endswith(".docx") assert result["document"]["size"] > 0 assert result["document"]["conversation_id"] == "conv-1" assert result["document"]["id"] # UUID is set async def test_create_excel(tool: DocumentTool) -> None: """format=excel creates a .xlsx from JSON input.""" result = await tool.execute( format="excel", content='{"Data": [["A", "B"], ["1", "2"]]}', conversation_id="conv-1", ) assert result["success"] is True assert result["document"]["format"] == "excel" assert result["document"]["filename"].endswith(".xlsx") async def test_create_pdf(tool: DocumentTool) -> None: """format=pdf creates a .pdf from Markdown.""" result = await tool.execute( format="pdf", content="# PDF Title\n\nParagraph text.\n", conversation_id="conv-1", ) assert result["success"] is True assert result["document"]["format"] == "pdf" assert result["document"]["filename"].endswith(".pdf") async def test_create_with_filename(tool: DocumentTool) -> None: """Custom filename is used in the document metadata.""" result = await tool.execute( format="word", content="# Test", conversation_id="conv-1", filename="my-report.docx", ) assert result["success"] is True assert result["document"]["filename"] == "my-report.docx" # --------------------------------------------------------------------------- # error paths # --------------------------------------------------------------------------- async def test_missing_format(tool: DocumentTool) -> None: """Missing format returns success=False.""" result = await tool.execute( content="# Test", conversation_id="conv-1", ) assert result["success"] is False assert "format" in result["error"] async def test_missing_conversation_id(tool: DocumentTool) -> None: """Missing conversation_id returns success=False.""" result = await tool.execute( format="word", content="# Test", ) assert result["success"] is False assert "conversation_id" in result["error"] async def test_missing_content(tool: DocumentTool) -> None: """Missing content returns success=False.""" result = await tool.execute( format="word", content="", conversation_id="conv-1", ) assert result["success"] is False assert "content" in result["error"] async def test_invalid_format(tool: DocumentTool) -> None: """Unsupported format returns success=False.""" result = await tool.execute( format="pptx", content="# Test", conversation_id="conv-1", ) assert result["success"] is False # --------------------------------------------------------------------------- # tool registration # --------------------------------------------------------------------------- def test_tool_name_and_schema(tool: DocumentTool) -> None: """Tool has correct name and input_schema.""" assert tool.name == "document" schema = tool.input_schema assert schema["type"] == "object" assert "action" in schema["properties"] assert "format" in schema["properties"] assert "content" in schema["properties"] assert "conversation_id" in schema["properties"] assert "filename" in schema["properties"] # U9: conversation_id is the only hard-required field; action defaults to "create" assert "conversation_id" in schema["required"] assert schema["properties"]["action"]["enum"] == ["create", "read"] async def test_created_document_persisted(tool: DocumentTool, service: DocumentService) -> None: """Created document is persisted and retrievable via service.""" result = await tool.execute( format="word", content="# Persisted", conversation_id="conv-persist", ) assert result["success"] is True doc_id = result["document"]["id"] # Retrieve via service docs = await service.get_conversation_documents("conv-persist") assert len(docs) == 1 assert docs[0].id == doc_id # Retrieve single doc doc = await service.get_document(doc_id) assert doc is not None assert doc.filename == result["document"]["filename"] # --------------------------------------------------------------------------- # read action (U9) # --------------------------------------------------------------------------- async def test_read_text_file(tool: DocumentTool, tmp_path: Path) -> None: """action='read' extracts text from a .txt file.""" f = tmp_path / "notes.txt" f.write_text("Hello world\nLine two", encoding="utf-8") result = await tool.execute(action="read", filename=str(f), conversation_id="conv-1") assert result["success"] is True assert "Hello world" in result["content"] assert result["metadata"]["format"] == "text" async def test_read_markdown_file(tool: DocumentTool, tmp_path: Path) -> None: """action='read' extracts text from a .md file, preserving content.""" f = tmp_path / "doc.md" f.write_text("# Title\n\nParagraph.\n", encoding="utf-8") result = await tool.execute(action="read", filename=str(f), conversation_id="conv-1") assert result["success"] is True assert "# Title" in result["content"] assert result["metadata"]["format"] == "markdown" assert result["title"] == "Title" async def test_read_word_file(tool: DocumentTool, tmp_path: Path) -> None: """action='read' extracts text from a .docx file created by the tool itself.""" # First create a docx create_result = await tool.execute( action="create", format="word", content="# Read Test\n\nContent for reading.", conversation_id="conv-1", filename="read-test.docx", ) assert create_result["success"] is True # The file is stored in service's upload_dir — find it via service doc_id = create_result["document"]["id"] # ponytail: use service.get_download_path to locate the file on disk svc = tool._service # type: ignore[attr-defined] path = svc.get_download_path(doc_id) assert path is not None and path.exists() result = await tool.execute(action="read", filename=str(path), conversation_id="conv-1") assert result["success"] is True assert "Read Test" in result["content"] assert "Content for reading" in result["content"] assert result["metadata"]["format"] == "docx" async def test_read_excel_file(tool: DocumentTool, tmp_path: Path) -> None: """action='read' extracts text from a .xlsx file created by the tool itself.""" create_result = await tool.execute( action="create", format="excel", content='{"Sheet1": [["Name", "Age"], ["Alice", "30"], ["Bob", "25"]]}', conversation_id="conv-1", filename="read-test.xlsx", ) assert create_result["success"] is True doc_id = create_result["document"]["id"] svc = tool._service # type: ignore[attr-defined] path = svc.get_download_path(doc_id) assert path is not None and path.exists() result = await tool.execute(action="read", filename=str(path), conversation_id="conv-1") assert result["success"] is True assert "Alice" in result["content"] assert "Bob" in result["content"] assert result["metadata"]["format"] == "xlsx" assert result["metadata"]["sheet_count"] >= 1 async def test_read_missing_file(tool: DocumentTool, tmp_path: Path) -> None: """action='read' with non-existent file returns success=False.""" result = await tool.execute( action="read", filename=str(tmp_path / "nonexistent.txt"), conversation_id="conv-1", ) assert result["success"] is False assert "not found" in result["error"].lower() or "no such file" in result["error"].lower() async def test_read_missing_filename(tool: DocumentTool) -> None: """action='read' without filename returns success=False.""" result = await tool.execute(action="read", conversation_id="conv-1") assert result["success"] is False assert "filename" in result["error"].lower() async def test_read_uses_content_as_path_fallback(tool: DocumentTool, tmp_path: Path) -> None: """action='read' falls back to 'content' as file path when filename is absent.""" f = tmp_path / "via-content.txt" f.write_text("content-as-path", encoding="utf-8") result = await tool.execute( action="read", content=str(f), conversation_id="conv-1", ) assert result["success"] is True assert "content-as-path" in result["content"] async def test_unknown_action(tool: DocumentTool) -> None: """Unknown action returns success=False.""" result = await tool.execute(action="delete", conversation_id="conv-1") assert result["success"] is False assert "unknown action" in result["error"].lower() async def test_create_action_explicit(tool: DocumentTool) -> None: """action='create' explicitly works the same as default.""" result = await tool.execute( action="create", format="word", content="# Explicit", conversation_id="conv-1", ) assert result["success"] is True assert result["document"]["format"] == "word" # --------------------------------------------------------------------------- # DocumentLoader Excel support (U9) # --------------------------------------------------------------------------- def test_loader_detects_xlsx() -> None: """DocumentLoader detects .xlsx and .xls as xlsx format.""" from agentkit.memory.document_loader import _detect_format assert _detect_format("data.xlsx") == "xlsx" assert _detect_format("data.XLS") == "xlsx" assert _detect_format("data.xls") == "xlsx" def test_loader_parses_xlsx(tmp_path: Path) -> None: """DocumentLoader._parse_xlsx extracts sheet data as Markdown table.""" import openpyxl f = tmp_path / "test.xlsx" wb = openpyxl.Workbook() ws = wb.active ws.title = "Data" ws.append(["Name", "Age"]) ws.append(["Alice", 30]) ws.append(["Bob", 25]) wb.save(f) wb.close() loader = DocumentLoader() doc = loader.load(f) assert "Alice" in doc.content assert "Bob" in doc.content assert "Name" in doc.content assert doc.metadata["format"] == "xlsx" assert doc.metadata["sheet_count"] == 1 assert doc.metadata["row_count"] == 3 # Markdown table separator should be present assert "---" in doc.content def test_loader_parses_xlsx_multiple_sheets(tmp_path: Path) -> None: """DocumentLoader handles multiple sheets, each as a separate H2 section.""" import openpyxl f = tmp_path / "multi.xlsx" wb = openpyxl.Workbook() ws1 = wb.active ws1.title = "Sheet1" ws1.append(["A", "B"]) ws1.append(["1", "2"]) ws2 = wb.create_sheet("Sheet2") ws2.append(["C", "D"]) ws2.append(["3", "4"]) wb.save(f) wb.close() loader = DocumentLoader() doc = loader.load(f) assert "## Sheet1" in doc.content assert "## Sheet2" in doc.content assert doc.metadata["sheet_count"] == 2 def test_loader_parses_xlsx_empty_cells(tmp_path: Path) -> None: """DocumentLoader handles empty cells gracefully (renders as empty string).""" import openpyxl f = tmp_path / "empty.xlsx" wb = openpyxl.Workbook() ws = wb.active ws.append(["A", "B", "C"]) ws.append(["x", None, "z"]) wb.save(f) wb.close() loader = DocumentLoader() doc = loader.load(f) # Empty cell should not crash; row should still have 3 columns assert "x" in doc.content assert "z" in doc.content