404 lines
14 KiB
Python
404 lines
14 KiB
Python
"""Tests for DocumentTool — Agent tool wrapper (U6 create + U9 read)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from agentkit.documents.db import init_documents_db
|
|
from agentkit.documents.renderers.excel_renderer import ExcelRenderer
|
|
from agentkit.documents.renderers.pdf_renderer import PDFRenderer
|
|
from agentkit.documents.renderers.word_renderer import WordRenderer
|
|
from agentkit.documents.service import DocumentService
|
|
from agentkit.memory.document_loader import DocumentLoader
|
|
from agentkit.tools.document_tool import DocumentTool
|
|
|
|
|
|
@pytest.fixture
|
|
def service(tmp_path: Path) -> DocumentService:
|
|
"""Provide a DocumentService with all renderers registered."""
|
|
db_path = tmp_path / "test.db"
|
|
upload_dir = tmp_path / "uploads"
|
|
import asyncio
|
|
asyncio.run(init_documents_db(db_path))
|
|
|
|
svc = DocumentService(upload_dir=upload_dir, db_path=db_path)
|
|
svc.register_renderer("word", WordRenderer())
|
|
svc.register_renderer("excel", ExcelRenderer())
|
|
svc.register_renderer("pdf", PDFRenderer())
|
|
# TemplateRenderer is used via render_template, not render — but we
|
|
# register it under "word" so DocumentService can dispatch to it.
|
|
# Actually, DocumentService uses the same renderer for both paths:
|
|
# _render_content calls render(), _render_template calls render_template().
|
|
# WordRenderer doesn't have render_template, so we need a separate
|
|
# renderer for the template path. For U6 tests, we register a
|
|
# TemplateRenderer as a second renderer that DocumentService can use
|
|
# when template_path is provided.
|
|
# ponytail: DocumentService._render_template calls renderer.render_template,
|
|
# so we need the renderer to have that method. We register TemplateRenderer
|
|
# as the word renderer when template filling is needed. For simplicity,
|
|
# we use a composite approach: register WordRenderer for content rendering
|
|
# and handle template separately. But the current service design uses
|
|
# one renderer per format. Let's just test create without template here.
|
|
return svc
|
|
|
|
|
|
@pytest.fixture
|
|
def tool(service: DocumentService) -> DocumentTool:
|
|
return DocumentTool(service=service)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# create action — word
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
async def test_create_word(tool: DocumentTool) -> None:
|
|
"""format=word creates a .docx and returns success + document metadata."""
|
|
result = await tool.execute(
|
|
format="word",
|
|
content="# Test Report\n\nThis is a test paragraph.\n",
|
|
conversation_id="conv-1",
|
|
)
|
|
assert result["success"] is True
|
|
assert result["document"]["format"] == "word"
|
|
assert result["document"]["filename"].endswith(".docx")
|
|
assert result["document"]["size"] > 0
|
|
assert result["document"]["conversation_id"] == "conv-1"
|
|
assert result["document"]["id"] # UUID is set
|
|
|
|
|
|
async def test_create_excel(tool: DocumentTool) -> None:
|
|
"""format=excel creates a .xlsx from JSON input."""
|
|
result = await tool.execute(
|
|
format="excel",
|
|
content='{"Data": [["A", "B"], ["1", "2"]]}',
|
|
conversation_id="conv-1",
|
|
)
|
|
assert result["success"] is True
|
|
assert result["document"]["format"] == "excel"
|
|
assert result["document"]["filename"].endswith(".xlsx")
|
|
|
|
|
|
async def test_create_pdf(tool: DocumentTool) -> None:
|
|
"""format=pdf creates a .pdf from Markdown."""
|
|
result = await tool.execute(
|
|
format="pdf",
|
|
content="# PDF Title\n\nParagraph text.\n",
|
|
conversation_id="conv-1",
|
|
)
|
|
assert result["success"] is True
|
|
assert result["document"]["format"] == "pdf"
|
|
assert result["document"]["filename"].endswith(".pdf")
|
|
|
|
|
|
async def test_create_with_filename(tool: DocumentTool) -> None:
|
|
"""Custom filename is used in the document metadata."""
|
|
result = await tool.execute(
|
|
format="word",
|
|
content="# Test",
|
|
conversation_id="conv-1",
|
|
filename="my-report.docx",
|
|
)
|
|
assert result["success"] is True
|
|
assert result["document"]["filename"] == "my-report.docx"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# error paths
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
async def test_missing_format(tool: DocumentTool) -> None:
|
|
"""Missing format returns success=False."""
|
|
result = await tool.execute(
|
|
content="# Test",
|
|
conversation_id="conv-1",
|
|
)
|
|
assert result["success"] is False
|
|
assert "format" in result["error"]
|
|
|
|
|
|
async def test_missing_conversation_id(tool: DocumentTool) -> None:
|
|
"""Missing conversation_id returns success=False."""
|
|
result = await tool.execute(
|
|
format="word",
|
|
content="# Test",
|
|
)
|
|
assert result["success"] is False
|
|
assert "conversation_id" in result["error"]
|
|
|
|
|
|
async def test_missing_content(tool: DocumentTool) -> None:
|
|
"""Missing content returns success=False."""
|
|
result = await tool.execute(
|
|
format="word",
|
|
content="",
|
|
conversation_id="conv-1",
|
|
)
|
|
assert result["success"] is False
|
|
assert "content" in result["error"]
|
|
|
|
|
|
async def test_invalid_format(tool: DocumentTool) -> None:
|
|
"""Unsupported format returns success=False."""
|
|
result = await tool.execute(
|
|
format="pptx",
|
|
content="# Test",
|
|
conversation_id="conv-1",
|
|
)
|
|
assert result["success"] is False
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# tool registration
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_tool_name_and_schema(tool: DocumentTool) -> None:
|
|
"""Tool has correct name and input_schema."""
|
|
assert tool.name == "document"
|
|
schema = tool.input_schema
|
|
assert schema["type"] == "object"
|
|
assert "action" in schema["properties"]
|
|
assert "format" in schema["properties"]
|
|
assert "content" in schema["properties"]
|
|
assert "conversation_id" in schema["properties"]
|
|
assert "filename" in schema["properties"]
|
|
# U9: conversation_id is the only hard-required field; action defaults to "create"
|
|
assert "conversation_id" in schema["required"]
|
|
assert schema["properties"]["action"]["enum"] == ["create", "read"]
|
|
|
|
|
|
async def test_created_document_persisted(tool: DocumentTool, service: DocumentService) -> None:
|
|
"""Created document is persisted and retrievable via service."""
|
|
result = await tool.execute(
|
|
format="word",
|
|
content="# Persisted",
|
|
conversation_id="conv-persist",
|
|
)
|
|
assert result["success"] is True
|
|
doc_id = result["document"]["id"]
|
|
|
|
# Retrieve via service
|
|
docs = await service.get_conversation_documents("conv-persist")
|
|
assert len(docs) == 1
|
|
assert docs[0].id == doc_id
|
|
|
|
# Retrieve single doc
|
|
doc = await service.get_document(doc_id)
|
|
assert doc is not None
|
|
assert doc.filename == result["document"]["filename"]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# read action (U9)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
async def test_read_text_file(tool: DocumentTool, tmp_path: Path) -> None:
|
|
"""action='read' extracts text from a .txt file."""
|
|
f = tmp_path / "notes.txt"
|
|
f.write_text("Hello world\nLine two", encoding="utf-8")
|
|
|
|
result = await tool.execute(action="read", filename=str(f), conversation_id="conv-1")
|
|
assert result["success"] is True
|
|
assert "Hello world" in result["content"]
|
|
assert result["metadata"]["format"] == "text"
|
|
|
|
|
|
async def test_read_markdown_file(tool: DocumentTool, tmp_path: Path) -> None:
|
|
"""action='read' extracts text from a .md file, preserving content."""
|
|
f = tmp_path / "doc.md"
|
|
f.write_text("# Title\n\nParagraph.\n", encoding="utf-8")
|
|
|
|
result = await tool.execute(action="read", filename=str(f), conversation_id="conv-1")
|
|
assert result["success"] is True
|
|
assert "# Title" in result["content"]
|
|
assert result["metadata"]["format"] == "markdown"
|
|
assert result["title"] == "Title"
|
|
|
|
|
|
async def test_read_word_file(tool: DocumentTool, tmp_path: Path) -> None:
|
|
"""action='read' extracts text from a .docx file created by the tool itself."""
|
|
# First create a docx
|
|
create_result = await tool.execute(
|
|
action="create",
|
|
format="word",
|
|
content="# Read Test\n\nContent for reading.",
|
|
conversation_id="conv-1",
|
|
filename="read-test.docx",
|
|
)
|
|
assert create_result["success"] is True
|
|
|
|
# The file is stored in service's upload_dir — find it via service
|
|
doc_id = create_result["document"]["id"]
|
|
# ponytail: use service.get_download_path to locate the file on disk
|
|
svc = tool._service # type: ignore[attr-defined]
|
|
path = svc.get_download_path(doc_id)
|
|
assert path is not None and path.exists()
|
|
|
|
result = await tool.execute(action="read", filename=str(path), conversation_id="conv-1")
|
|
assert result["success"] is True
|
|
assert "Read Test" in result["content"]
|
|
assert "Content for reading" in result["content"]
|
|
assert result["metadata"]["format"] == "docx"
|
|
|
|
|
|
async def test_read_excel_file(tool: DocumentTool, tmp_path: Path) -> None:
|
|
"""action='read' extracts text from a .xlsx file created by the tool itself."""
|
|
create_result = await tool.execute(
|
|
action="create",
|
|
format="excel",
|
|
content='{"Sheet1": [["Name", "Age"], ["Alice", "30"], ["Bob", "25"]]}',
|
|
conversation_id="conv-1",
|
|
filename="read-test.xlsx",
|
|
)
|
|
assert create_result["success"] is True
|
|
|
|
doc_id = create_result["document"]["id"]
|
|
svc = tool._service # type: ignore[attr-defined]
|
|
path = svc.get_download_path(doc_id)
|
|
assert path is not None and path.exists()
|
|
|
|
result = await tool.execute(action="read", filename=str(path), conversation_id="conv-1")
|
|
assert result["success"] is True
|
|
assert "Alice" in result["content"]
|
|
assert "Bob" in result["content"]
|
|
assert result["metadata"]["format"] == "xlsx"
|
|
assert result["metadata"]["sheet_count"] >= 1
|
|
|
|
|
|
async def test_read_missing_file(tool: DocumentTool, tmp_path: Path) -> None:
|
|
"""action='read' with non-existent file returns success=False."""
|
|
result = await tool.execute(
|
|
action="read",
|
|
filename=str(tmp_path / "nonexistent.txt"),
|
|
conversation_id="conv-1",
|
|
)
|
|
assert result["success"] is False
|
|
assert "not found" in result["error"].lower() or "no such file" in result["error"].lower()
|
|
|
|
|
|
async def test_read_missing_filename(tool: DocumentTool) -> None:
|
|
"""action='read' without filename returns success=False."""
|
|
result = await tool.execute(action="read", conversation_id="conv-1")
|
|
assert result["success"] is False
|
|
assert "filename" in result["error"].lower()
|
|
|
|
|
|
async def test_read_uses_content_as_path_fallback(tool: DocumentTool, tmp_path: Path) -> None:
|
|
"""action='read' falls back to 'content' as file path when filename is absent."""
|
|
f = tmp_path / "via-content.txt"
|
|
f.write_text("content-as-path", encoding="utf-8")
|
|
|
|
result = await tool.execute(
|
|
action="read",
|
|
content=str(f),
|
|
conversation_id="conv-1",
|
|
)
|
|
assert result["success"] is True
|
|
assert "content-as-path" in result["content"]
|
|
|
|
|
|
async def test_unknown_action(tool: DocumentTool) -> None:
|
|
"""Unknown action returns success=False."""
|
|
result = await tool.execute(action="delete", conversation_id="conv-1")
|
|
assert result["success"] is False
|
|
assert "unknown action" in result["error"].lower()
|
|
|
|
|
|
async def test_create_action_explicit(tool: DocumentTool) -> None:
|
|
"""action='create' explicitly works the same as default."""
|
|
result = await tool.execute(
|
|
action="create",
|
|
format="word",
|
|
content="# Explicit",
|
|
conversation_id="conv-1",
|
|
)
|
|
assert result["success"] is True
|
|
assert result["document"]["format"] == "word"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# DocumentLoader Excel support (U9)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_loader_detects_xlsx() -> None:
|
|
"""DocumentLoader detects .xlsx and .xls as xlsx format."""
|
|
from agentkit.memory.document_loader import _detect_format
|
|
|
|
assert _detect_format("data.xlsx") == "xlsx"
|
|
assert _detect_format("data.XLS") == "xlsx"
|
|
assert _detect_format("data.xls") == "xlsx"
|
|
|
|
|
|
def test_loader_parses_xlsx(tmp_path: Path) -> None:
|
|
"""DocumentLoader._parse_xlsx extracts sheet data as Markdown table."""
|
|
import openpyxl
|
|
|
|
f = tmp_path / "test.xlsx"
|
|
wb = openpyxl.Workbook()
|
|
ws = wb.active
|
|
ws.title = "Data"
|
|
ws.append(["Name", "Age"])
|
|
ws.append(["Alice", 30])
|
|
ws.append(["Bob", 25])
|
|
wb.save(f)
|
|
wb.close()
|
|
|
|
loader = DocumentLoader()
|
|
doc = loader.load(f)
|
|
assert "Alice" in doc.content
|
|
assert "Bob" in doc.content
|
|
assert "Name" in doc.content
|
|
assert doc.metadata["format"] == "xlsx"
|
|
assert doc.metadata["sheet_count"] == 1
|
|
assert doc.metadata["row_count"] == 3
|
|
# Markdown table separator should be present
|
|
assert "---" in doc.content
|
|
|
|
|
|
def test_loader_parses_xlsx_multiple_sheets(tmp_path: Path) -> None:
|
|
"""DocumentLoader handles multiple sheets, each as a separate H2 section."""
|
|
import openpyxl
|
|
|
|
f = tmp_path / "multi.xlsx"
|
|
wb = openpyxl.Workbook()
|
|
ws1 = wb.active
|
|
ws1.title = "Sheet1"
|
|
ws1.append(["A", "B"])
|
|
ws1.append(["1", "2"])
|
|
ws2 = wb.create_sheet("Sheet2")
|
|
ws2.append(["C", "D"])
|
|
ws2.append(["3", "4"])
|
|
wb.save(f)
|
|
wb.close()
|
|
|
|
loader = DocumentLoader()
|
|
doc = loader.load(f)
|
|
assert "## Sheet1" in doc.content
|
|
assert "## Sheet2" in doc.content
|
|
assert doc.metadata["sheet_count"] == 2
|
|
|
|
|
|
def test_loader_parses_xlsx_empty_cells(tmp_path: Path) -> None:
|
|
"""DocumentLoader handles empty cells gracefully (renders as empty string)."""
|
|
import openpyxl
|
|
|
|
f = tmp_path / "empty.xlsx"
|
|
wb = openpyxl.Workbook()
|
|
ws = wb.active
|
|
ws.append(["A", "B", "C"])
|
|
ws.append(["x", None, "z"])
|
|
wb.save(f)
|
|
wb.close()
|
|
|
|
loader = DocumentLoader()
|
|
doc = loader.load(f)
|
|
# Empty cell should not crash; row should still have 3 columns
|
|
assert "x" in doc.content
|
|
assert "z" in doc.content
|