fischer-agentkit/tests/integration/test_document_e2e.py

425 lines
16 KiB
Python

"""End-to-end integration tests for document processing (F1, F2, F3).
Verifies complete user flows:
- F1: Create document → List → Download → Verify content
- F2: Upload template → Create with template → Download → Verify variables replaced
- F3: Cross-conversation isolation
"""
from __future__ import annotations
import asyncio
import io
from pathlib import Path
import pytest
from docx import Document as DocxDocument
from fastapi import FastAPI
from fastapi.testclient import TestClient
from openpyxl import load_workbook
from agentkit.documents.db import init_documents_db
from agentkit.documents.renderers.excel_renderer import ExcelRenderer
from agentkit.documents.renderers.pdf_renderer import PDFRenderer
from agentkit.documents.renderers.word_renderer import WordRenderer
from agentkit.documents.service import DocumentService
from agentkit.server.routes import documents as documents_routes
@pytest.fixture
def app(tmp_path: Path) -> FastAPI:
"""Test app with all renderers registered.
After Bug 2 fix, TemplateRenderer is lazy-loaded by DocumentService
when template_path is provided — no need to register it separately.
"""
db_path = tmp_path / "test.db"
upload_dir = tmp_path / "uploads"
asyncio.run(init_documents_db(db_path))
service = DocumentService(upload_dir=upload_dir, db_path=db_path)
service.register_renderer("word", WordRenderer())
service.register_renderer("excel", ExcelRenderer())
service.register_renderer("pdf", PDFRenderer())
app = FastAPI()
app.state.document_service = service
app.state.server_config = None # No auth for E2E tests
app.include_router(documents_routes.router, prefix="/api/v1")
return app
@pytest.fixture
def client(app: FastAPI) -> TestClient:
return TestClient(app)
# ---------------------------------------------------------------------------
# F1: Create → List → Download complete flow
# ---------------------------------------------------------------------------
class TestF1CreateListDownload:
"""F1: User creates a document, sees it in the list, downloads it."""
def test_e2e_word_create_list_download(self, client: TestClient) -> None:
"""Word: create → list contains it → download content matches."""
# Step 1: Create
create_resp = client.post(
"/api/v1/documents/create",
json={
"format": "word",
"content": "# E2E Report\n\nThis is the report content.",
"conversation_id": "conv-e2e-1",
},
)
assert create_resp.status_code == 200
doc = create_resp.json()["document"]
doc_id = doc["id"]
assert doc["format"] == "word"
assert doc["filename"].endswith(".docx")
assert doc["size"] > 0
# Step 2: List — document appears in conversation
list_resp = client.get("/api/v1/documents/conversation/conv-e2e-1")
assert list_resp.status_code == 200
docs = list_resp.json()["documents"]
assert len(docs) == 1
assert docs[0]["id"] == doc_id
assert docs[0]["download_url"] == f"/api/v1/documents/download/{doc_id}"
# Step 3: Download — file content is valid
dl_resp = client.get(f"/api/v1/documents/download/{doc_id}")
assert dl_resp.status_code == 200
assert len(dl_resp.content) == doc["size"]
# Step 4: Verify downloaded file is a valid .docx with correct content
docx = DocxDocument(io.BytesIO(dl_resp.content))
text = "\n".join(p.text for p in docx.paragraphs)
assert "E2E Report" in text
assert "This is the report content" in text
def test_e2e_excel_create_list_download(self, client: TestClient) -> None:
"""Excel: create → list → download → verify cell content."""
create_resp = client.post(
"/api/v1/documents/create",
json={
"format": "excel",
"content": '{"Sales": [["Product", "Revenue"], ["Widget", "1000"], ["Gadget", "2000"]]}',
"conversation_id": "conv-e2e-2",
},
)
assert create_resp.status_code == 200
doc_id = create_resp.json()["document"]["id"]
# List
list_resp = client.get("/api/v1/documents/conversation/conv-e2e-2")
assert list_resp.json()["count"] == 1
# Download and verify
dl_resp = client.get(f"/api/v1/documents/download/{doc_id}")
assert dl_resp.status_code == 200
wb = load_workbook(io.BytesIO(dl_resp.content))
ws = wb["Sales"]
assert ws["A1"].value == "Product"
assert ws["B1"].value == "Revenue"
assert ws["A2"].value == "Widget"
assert ws["B2"].value == "1000"
wb.close()
def test_e2e_pdf_create_list_download(self, client: TestClient) -> None:
"""PDF: create → list → download → verify PDF magic bytes."""
create_resp = client.post(
"/api/v1/documents/create",
json={
"format": "pdf",
"content": "# PDF Report\n\nContent here.",
"conversation_id": "conv-e2e-3",
},
)
assert create_resp.status_code == 200
doc_id = create_resp.json()["document"]["id"]
# List
list_resp = client.get("/api/v1/documents/conversation/conv-e2e-3")
assert list_resp.json()["count"] == 1
# Download and verify PDF magic
dl_resp = client.get(f"/api/v1/documents/download/{doc_id}")
assert dl_resp.status_code == 200
assert dl_resp.content[:4] == b"%PDF"
def test_e2e_multiple_documents_same_conversation(self, client: TestClient) -> None:
"""Multiple documents in same conversation — list shows all, ordered."""
conv_id = "conv-multi"
# Create 3 documents
for i, fmt in enumerate(["word", "excel", "pdf"]):
resp = client.post(
"/api/v1/documents/create",
json={
"format": fmt,
"content": f"# Doc {i}",
"conversation_id": conv_id,
},
)
assert resp.status_code == 200
# List — all 3 present
list_resp = client.get(f"/api/v1/documents/conversation/{conv_id}")
assert list_resp.status_code == 200
data = list_resp.json()
assert data["count"] == 3
formats = [d["format"] for d in data["documents"]]
assert set(formats) == {"word", "excel", "pdf"}
# Each has a unique download URL
urls = [d["download_url"] for d in data["documents"]]
assert len(set(urls)) == 3
def test_e2e_download_returns_correct_filename(self, client: TestClient) -> None:
"""Download response includes the original filename in Content-Disposition."""
create_resp = client.post(
"/api/v1/documents/create",
json={
"format": "word",
"content": "# Test",
"conversation_id": "conv-fn",
"filename": "my-report.docx",
},
)
doc_id = create_resp.json()["document"]["id"]
dl_resp = client.get(f"/api/v1/documents/download/{doc_id}")
assert dl_resp.status_code == 200
# FileResponse sets filename in Content-Disposition
assert "my-report.docx" in dl_resp.headers.get("content-disposition", "")
# ---------------------------------------------------------------------------
# F2: Template upload → create with template → download
# ---------------------------------------------------------------------------
class TestF2TemplateWorkflow:
"""F2: Upload template → Create with template → Download → Verify variables.
After Bug 2 fix, template filling works with the standard WordRenderer
registration — DocumentService lazy-loads TemplateRenderer internally.
"""
def test_e2e_upload_template_create_download(
self, client: TestClient, tmp_path: Path
) -> None:
"""Complete template workflow: upload → fill → download → verify."""
# Step 1: Create a .docx template with Jinja2 placeholders
template_doc = DocxDocument()
template_doc.add_heading("Invoice {{invoice_number}}", level=1)
template_doc.add_paragraph("Customer: {{customer_name}}")
template_doc.add_paragraph("Amount: ${{amount}}")
template_path = tmp_path / "invoice_template.docx"
template_doc.save(str(template_path))
# Step 2: Upload the template
with open(template_path, "rb") as f:
upload_resp = client.post(
"/api/v1/documents/upload-template",
files={"file": ("invoice_template.docx", f, "application/octet-stream")},
)
assert upload_resp.status_code == 200
stored_name = upload_resp.json()["stored_name"]
# Step 3: Create document using the template
create_resp = client.post(
"/api/v1/documents/create",
json={
"format": "word",
"content": "", # Ignored when template is provided
"conversation_id": "conv-template",
"template": stored_name,
"template_data": {
"invoice_number": "INV-2026-001",
"customer_name": "Acme Corp",
"amount": "1,234.56",
},
},
)
assert create_resp.status_code == 200, create_resp.text
doc_id = create_resp.json()["document"]["id"]
# Step 4: Download and verify variables were replaced
dl_resp = client.get(f"/api/v1/documents/download/{doc_id}")
assert dl_resp.status_code == 200
docx = DocxDocument(io.BytesIO(dl_resp.content))
text = "\n".join(p.text for p in docx.paragraphs)
assert "INV-2026-001" in text
assert "Acme Corp" in text
assert "1,234.56" in text
# Placeholders should be gone
assert "{{" not in text
assert "}}" not in text
def test_e2e_template_with_loop(
self, client: TestClient, tmp_path: Path
) -> None:
"""Template with {% for %} loop — verify loop expands correctly."""
template_doc = DocxDocument()
template_doc.add_heading("Shopping List", level=1)
# ponytail: docxtpl uses {%p %} for paragraph-level loops, {% %} for inline
template_doc.add_paragraph("{%p for item in items %}")
template_doc.add_paragraph("- {{item}}")
template_doc.add_paragraph("{%p endfor %}")
template_path = tmp_path / "loop_template.docx"
template_doc.save(str(template_path))
with open(template_path, "rb") as f:
upload_resp = client.post(
"/api/v1/documents/upload-template",
files={"file": ("loop_template.docx", f, "application/octet-stream")},
)
stored_name = upload_resp.json()["stored_name"]
create_resp = client.post(
"/api/v1/documents/create",
json={
"format": "word",
"content": "",
"conversation_id": "conv-loop",
"template": stored_name,
"template_data": {
"items": ["Apple", "Banana", "Cherry"],
},
},
)
assert create_resp.status_code == 200, create_resp.text
doc_id = create_resp.json()["document"]["id"]
dl_resp = client.get(f"/api/v1/documents/download/{doc_id}")
assert dl_resp.status_code == 200
docx = DocxDocument(io.BytesIO(dl_resp.content))
text = "\n".join(p.text for p in docx.paragraphs)
assert "Apple" in text
assert "Banana" in text
assert "Cherry" in text
# ---------------------------------------------------------------------------
# F3: Cross-conversation isolation
# ---------------------------------------------------------------------------
class TestF3ConversationIsolation:
"""F3: Documents from one conversation don't leak to another."""
def test_e2e_conversation_isolation(self, client: TestClient) -> None:
"""Documents in conv-A don't appear in conv-B's list."""
# Create in conv-A
client.post(
"/api/v1/documents/create",
json={
"format": "word",
"content": "# Conv A Doc",
"conversation_id": "conv-A",
},
)
# Create in conv-B
client.post(
"/api/v1/documents/create",
json={
"format": "pdf",
"content": "# Conv B Doc",
"conversation_id": "conv-B",
},
)
# List conv-A — only conv-A's doc
resp_a = client.get("/api/v1/documents/conversation/conv-A")
docs_a = resp_a.json()["documents"]
assert len(docs_a) == 1
assert docs_a[0]["format"] == "word"
# List conv-B — only conv-B's doc
resp_b = client.get("/api/v1/documents/conversation/conv-B")
docs_b = resp_b.json()["documents"]
assert len(docs_b) == 1
assert docs_b[0]["format"] == "pdf"
def test_e2e_download_any_document_by_id(self, client: TestClient) -> None:
"""Download works by doc_id regardless of conversation (no ACL in v1)."""
# Create in conv-A
create_resp = client.post(
"/api/v1/documents/create",
json={
"format": "word",
"content": "# Downloadable",
"conversation_id": "conv-X",
},
)
doc_id = create_resp.json()["document"]["id"]
# Download without specifying conversation — works (v1 has no ACL)
dl_resp = client.get(f"/api/v1/documents/download/{doc_id}")
assert dl_resp.status_code == 200
assert len(dl_resp.content) > 0
# ---------------------------------------------------------------------------
# Data consistency checks
# ---------------------------------------------------------------------------
class TestDataConsistency:
"""Verify metadata matches actual files on disk."""
def test_metadata_size_matches_file(self, client: TestClient) -> None:
"""Document metadata size equals actual file size on disk."""
create_resp = client.post(
"/api/v1/documents/create",
json={
"format": "word",
"content": "# Size Check\n\nContent.",
"conversation_id": "conv-size",
},
)
meta_size = create_resp.json()["document"]["size"]
doc_id = create_resp.json()["document"]["id"]
# Download and check actual size
dl_resp = client.get(f"/api/v1/documents/download/{doc_id}")
assert len(dl_resp.content) == meta_size
def test_filename_has_correct_extension(self, client: TestClient) -> None:
"""Each format produces the correct file extension."""
for fmt, ext in [("word", ".docx"), ("excel", ".xlsx"), ("pdf", ".pdf")]:
resp = client.post(
"/api/v1/documents/create",
json={
"format": fmt,
"content": "# Test",
"conversation_id": f"conv-ext-{fmt}",
},
)
filename = resp.json()["document"]["filename"]
assert filename.endswith(ext), f"{fmt} should produce {ext}, got {filename}"
def test_custom_filename_preserved(self, client: TestClient) -> None:
"""Custom filename is preserved in metadata and download."""
resp = client.post(
"/api/v1/documents/create",
json={
"format": "pdf",
"content": "# Custom Name",
"conversation_id": "conv-custom",
"filename": "quarterly-report.pdf",
},
)
assert resp.json()["document"]["filename"] == "quarterly-report.pdf"
doc_id = resp.json()["document"]["id"]
dl_resp = client.get(f"/api/v1/documents/download/{doc_id}")
assert "quarterly-report.pdf" in dl_resp.headers.get("content-disposition", "")