425 lines
16 KiB
Python
425 lines
16 KiB
Python
"""End-to-end integration tests for document processing (F1, F2, F3).
|
|
|
|
Verifies complete user flows:
|
|
- F1: Create document → List → Download → Verify content
|
|
- F2: Upload template → Create with template → Download → Verify variables replaced
|
|
- F3: Cross-conversation isolation
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import io
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
from docx import Document as DocxDocument
|
|
from fastapi import FastAPI
|
|
from fastapi.testclient import TestClient
|
|
from openpyxl import load_workbook
|
|
|
|
from agentkit.documents.db import init_documents_db
|
|
from agentkit.documents.renderers.excel_renderer import ExcelRenderer
|
|
from agentkit.documents.renderers.pdf_renderer import PDFRenderer
|
|
from agentkit.documents.renderers.word_renderer import WordRenderer
|
|
from agentkit.documents.service import DocumentService
|
|
from agentkit.server.routes import documents as documents_routes
|
|
|
|
|
|
@pytest.fixture
|
|
def app(tmp_path: Path) -> FastAPI:
|
|
"""Test app with all renderers registered.
|
|
|
|
After Bug 2 fix, TemplateRenderer is lazy-loaded by DocumentService
|
|
when template_path is provided — no need to register it separately.
|
|
"""
|
|
db_path = tmp_path / "test.db"
|
|
upload_dir = tmp_path / "uploads"
|
|
asyncio.run(init_documents_db(db_path))
|
|
|
|
service = DocumentService(upload_dir=upload_dir, db_path=db_path)
|
|
service.register_renderer("word", WordRenderer())
|
|
service.register_renderer("excel", ExcelRenderer())
|
|
service.register_renderer("pdf", PDFRenderer())
|
|
|
|
app = FastAPI()
|
|
app.state.document_service = service
|
|
app.state.server_config = None # No auth for E2E tests
|
|
app.include_router(documents_routes.router, prefix="/api/v1")
|
|
return app
|
|
|
|
|
|
@pytest.fixture
|
|
def client(app: FastAPI) -> TestClient:
|
|
return TestClient(app)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# F1: Create → List → Download complete flow
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestF1CreateListDownload:
|
|
"""F1: User creates a document, sees it in the list, downloads it."""
|
|
|
|
def test_e2e_word_create_list_download(self, client: TestClient) -> None:
|
|
"""Word: create → list contains it → download content matches."""
|
|
# Step 1: Create
|
|
create_resp = client.post(
|
|
"/api/v1/documents/create",
|
|
json={
|
|
"format": "word",
|
|
"content": "# E2E Report\n\nThis is the report content.",
|
|
"conversation_id": "conv-e2e-1",
|
|
},
|
|
)
|
|
assert create_resp.status_code == 200
|
|
doc = create_resp.json()["document"]
|
|
doc_id = doc["id"]
|
|
assert doc["format"] == "word"
|
|
assert doc["filename"].endswith(".docx")
|
|
assert doc["size"] > 0
|
|
|
|
# Step 2: List — document appears in conversation
|
|
list_resp = client.get("/api/v1/documents/conversation/conv-e2e-1")
|
|
assert list_resp.status_code == 200
|
|
docs = list_resp.json()["documents"]
|
|
assert len(docs) == 1
|
|
assert docs[0]["id"] == doc_id
|
|
assert docs[0]["download_url"] == f"/api/v1/documents/download/{doc_id}"
|
|
|
|
# Step 3: Download — file content is valid
|
|
dl_resp = client.get(f"/api/v1/documents/download/{doc_id}")
|
|
assert dl_resp.status_code == 200
|
|
assert len(dl_resp.content) == doc["size"]
|
|
|
|
# Step 4: Verify downloaded file is a valid .docx with correct content
|
|
docx = DocxDocument(io.BytesIO(dl_resp.content))
|
|
text = "\n".join(p.text for p in docx.paragraphs)
|
|
assert "E2E Report" in text
|
|
assert "This is the report content" in text
|
|
|
|
def test_e2e_excel_create_list_download(self, client: TestClient) -> None:
|
|
"""Excel: create → list → download → verify cell content."""
|
|
create_resp = client.post(
|
|
"/api/v1/documents/create",
|
|
json={
|
|
"format": "excel",
|
|
"content": '{"Sales": [["Product", "Revenue"], ["Widget", "1000"], ["Gadget", "2000"]]}',
|
|
"conversation_id": "conv-e2e-2",
|
|
},
|
|
)
|
|
assert create_resp.status_code == 200
|
|
doc_id = create_resp.json()["document"]["id"]
|
|
|
|
# List
|
|
list_resp = client.get("/api/v1/documents/conversation/conv-e2e-2")
|
|
assert list_resp.json()["count"] == 1
|
|
|
|
# Download and verify
|
|
dl_resp = client.get(f"/api/v1/documents/download/{doc_id}")
|
|
assert dl_resp.status_code == 200
|
|
|
|
wb = load_workbook(io.BytesIO(dl_resp.content))
|
|
ws = wb["Sales"]
|
|
assert ws["A1"].value == "Product"
|
|
assert ws["B1"].value == "Revenue"
|
|
assert ws["A2"].value == "Widget"
|
|
assert ws["B2"].value == "1000"
|
|
wb.close()
|
|
|
|
def test_e2e_pdf_create_list_download(self, client: TestClient) -> None:
|
|
"""PDF: create → list → download → verify PDF magic bytes."""
|
|
create_resp = client.post(
|
|
"/api/v1/documents/create",
|
|
json={
|
|
"format": "pdf",
|
|
"content": "# PDF Report\n\nContent here.",
|
|
"conversation_id": "conv-e2e-3",
|
|
},
|
|
)
|
|
assert create_resp.status_code == 200
|
|
doc_id = create_resp.json()["document"]["id"]
|
|
|
|
# List
|
|
list_resp = client.get("/api/v1/documents/conversation/conv-e2e-3")
|
|
assert list_resp.json()["count"] == 1
|
|
|
|
# Download and verify PDF magic
|
|
dl_resp = client.get(f"/api/v1/documents/download/{doc_id}")
|
|
assert dl_resp.status_code == 200
|
|
assert dl_resp.content[:4] == b"%PDF"
|
|
|
|
def test_e2e_multiple_documents_same_conversation(self, client: TestClient) -> None:
|
|
"""Multiple documents in same conversation — list shows all, ordered."""
|
|
conv_id = "conv-multi"
|
|
|
|
# Create 3 documents
|
|
for i, fmt in enumerate(["word", "excel", "pdf"]):
|
|
resp = client.post(
|
|
"/api/v1/documents/create",
|
|
json={
|
|
"format": fmt,
|
|
"content": f"# Doc {i}",
|
|
"conversation_id": conv_id,
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
|
|
# List — all 3 present
|
|
list_resp = client.get(f"/api/v1/documents/conversation/{conv_id}")
|
|
assert list_resp.status_code == 200
|
|
data = list_resp.json()
|
|
assert data["count"] == 3
|
|
|
|
formats = [d["format"] for d in data["documents"]]
|
|
assert set(formats) == {"word", "excel", "pdf"}
|
|
|
|
# Each has a unique download URL
|
|
urls = [d["download_url"] for d in data["documents"]]
|
|
assert len(set(urls)) == 3
|
|
|
|
def test_e2e_download_returns_correct_filename(self, client: TestClient) -> None:
|
|
"""Download response includes the original filename in Content-Disposition."""
|
|
create_resp = client.post(
|
|
"/api/v1/documents/create",
|
|
json={
|
|
"format": "word",
|
|
"content": "# Test",
|
|
"conversation_id": "conv-fn",
|
|
"filename": "my-report.docx",
|
|
},
|
|
)
|
|
doc_id = create_resp.json()["document"]["id"]
|
|
|
|
dl_resp = client.get(f"/api/v1/documents/download/{doc_id}")
|
|
assert dl_resp.status_code == 200
|
|
# FileResponse sets filename in Content-Disposition
|
|
assert "my-report.docx" in dl_resp.headers.get("content-disposition", "")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# F2: Template upload → create with template → download
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestF2TemplateWorkflow:
|
|
"""F2: Upload template → Create with template → Download → Verify variables.
|
|
|
|
After Bug 2 fix, template filling works with the standard WordRenderer
|
|
registration — DocumentService lazy-loads TemplateRenderer internally.
|
|
"""
|
|
|
|
def test_e2e_upload_template_create_download(
|
|
self, client: TestClient, tmp_path: Path
|
|
) -> None:
|
|
"""Complete template workflow: upload → fill → download → verify."""
|
|
# Step 1: Create a .docx template with Jinja2 placeholders
|
|
template_doc = DocxDocument()
|
|
template_doc.add_heading("Invoice {{invoice_number}}", level=1)
|
|
template_doc.add_paragraph("Customer: {{customer_name}}")
|
|
template_doc.add_paragraph("Amount: ${{amount}}")
|
|
template_path = tmp_path / "invoice_template.docx"
|
|
template_doc.save(str(template_path))
|
|
|
|
# Step 2: Upload the template
|
|
with open(template_path, "rb") as f:
|
|
upload_resp = client.post(
|
|
"/api/v1/documents/upload-template",
|
|
files={"file": ("invoice_template.docx", f, "application/octet-stream")},
|
|
)
|
|
assert upload_resp.status_code == 200
|
|
stored_name = upload_resp.json()["stored_name"]
|
|
|
|
# Step 3: Create document using the template
|
|
create_resp = client.post(
|
|
"/api/v1/documents/create",
|
|
json={
|
|
"format": "word",
|
|
"content": "", # Ignored when template is provided
|
|
"conversation_id": "conv-template",
|
|
"template": stored_name,
|
|
"template_data": {
|
|
"invoice_number": "INV-2026-001",
|
|
"customer_name": "Acme Corp",
|
|
"amount": "1,234.56",
|
|
},
|
|
},
|
|
)
|
|
assert create_resp.status_code == 200, create_resp.text
|
|
doc_id = create_resp.json()["document"]["id"]
|
|
|
|
# Step 4: Download and verify variables were replaced
|
|
dl_resp = client.get(f"/api/v1/documents/download/{doc_id}")
|
|
assert dl_resp.status_code == 200
|
|
|
|
docx = DocxDocument(io.BytesIO(dl_resp.content))
|
|
text = "\n".join(p.text for p in docx.paragraphs)
|
|
assert "INV-2026-001" in text
|
|
assert "Acme Corp" in text
|
|
assert "1,234.56" in text
|
|
# Placeholders should be gone
|
|
assert "{{" not in text
|
|
assert "}}" not in text
|
|
|
|
def test_e2e_template_with_loop(
|
|
self, client: TestClient, tmp_path: Path
|
|
) -> None:
|
|
"""Template with {% for %} loop — verify loop expands correctly."""
|
|
template_doc = DocxDocument()
|
|
template_doc.add_heading("Shopping List", level=1)
|
|
# ponytail: docxtpl uses {%p %} for paragraph-level loops, {% %} for inline
|
|
template_doc.add_paragraph("{%p for item in items %}")
|
|
template_doc.add_paragraph("- {{item}}")
|
|
template_doc.add_paragraph("{%p endfor %}")
|
|
template_path = tmp_path / "loop_template.docx"
|
|
template_doc.save(str(template_path))
|
|
|
|
with open(template_path, "rb") as f:
|
|
upload_resp = client.post(
|
|
"/api/v1/documents/upload-template",
|
|
files={"file": ("loop_template.docx", f, "application/octet-stream")},
|
|
)
|
|
stored_name = upload_resp.json()["stored_name"]
|
|
|
|
create_resp = client.post(
|
|
"/api/v1/documents/create",
|
|
json={
|
|
"format": "word",
|
|
"content": "",
|
|
"conversation_id": "conv-loop",
|
|
"template": stored_name,
|
|
"template_data": {
|
|
"items": ["Apple", "Banana", "Cherry"],
|
|
},
|
|
},
|
|
)
|
|
assert create_resp.status_code == 200, create_resp.text
|
|
doc_id = create_resp.json()["document"]["id"]
|
|
|
|
dl_resp = client.get(f"/api/v1/documents/download/{doc_id}")
|
|
assert dl_resp.status_code == 200
|
|
|
|
docx = DocxDocument(io.BytesIO(dl_resp.content))
|
|
text = "\n".join(p.text for p in docx.paragraphs)
|
|
assert "Apple" in text
|
|
assert "Banana" in text
|
|
assert "Cherry" in text
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# F3: Cross-conversation isolation
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestF3ConversationIsolation:
|
|
"""F3: Documents from one conversation don't leak to another."""
|
|
|
|
def test_e2e_conversation_isolation(self, client: TestClient) -> None:
|
|
"""Documents in conv-A don't appear in conv-B's list."""
|
|
# Create in conv-A
|
|
client.post(
|
|
"/api/v1/documents/create",
|
|
json={
|
|
"format": "word",
|
|
"content": "# Conv A Doc",
|
|
"conversation_id": "conv-A",
|
|
},
|
|
)
|
|
# Create in conv-B
|
|
client.post(
|
|
"/api/v1/documents/create",
|
|
json={
|
|
"format": "pdf",
|
|
"content": "# Conv B Doc",
|
|
"conversation_id": "conv-B",
|
|
},
|
|
)
|
|
|
|
# List conv-A — only conv-A's doc
|
|
resp_a = client.get("/api/v1/documents/conversation/conv-A")
|
|
docs_a = resp_a.json()["documents"]
|
|
assert len(docs_a) == 1
|
|
assert docs_a[0]["format"] == "word"
|
|
|
|
# List conv-B — only conv-B's doc
|
|
resp_b = client.get("/api/v1/documents/conversation/conv-B")
|
|
docs_b = resp_b.json()["documents"]
|
|
assert len(docs_b) == 1
|
|
assert docs_b[0]["format"] == "pdf"
|
|
|
|
def test_e2e_download_any_document_by_id(self, client: TestClient) -> None:
|
|
"""Download works by doc_id regardless of conversation (no ACL in v1)."""
|
|
# Create in conv-A
|
|
create_resp = client.post(
|
|
"/api/v1/documents/create",
|
|
json={
|
|
"format": "word",
|
|
"content": "# Downloadable",
|
|
"conversation_id": "conv-X",
|
|
},
|
|
)
|
|
doc_id = create_resp.json()["document"]["id"]
|
|
|
|
# Download without specifying conversation — works (v1 has no ACL)
|
|
dl_resp = client.get(f"/api/v1/documents/download/{doc_id}")
|
|
assert dl_resp.status_code == 200
|
|
assert len(dl_resp.content) > 0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Data consistency checks
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestDataConsistency:
|
|
"""Verify metadata matches actual files on disk."""
|
|
|
|
def test_metadata_size_matches_file(self, client: TestClient) -> None:
|
|
"""Document metadata size equals actual file size on disk."""
|
|
create_resp = client.post(
|
|
"/api/v1/documents/create",
|
|
json={
|
|
"format": "word",
|
|
"content": "# Size Check\n\nContent.",
|
|
"conversation_id": "conv-size",
|
|
},
|
|
)
|
|
meta_size = create_resp.json()["document"]["size"]
|
|
doc_id = create_resp.json()["document"]["id"]
|
|
|
|
# Download and check actual size
|
|
dl_resp = client.get(f"/api/v1/documents/download/{doc_id}")
|
|
assert len(dl_resp.content) == meta_size
|
|
|
|
def test_filename_has_correct_extension(self, client: TestClient) -> None:
|
|
"""Each format produces the correct file extension."""
|
|
for fmt, ext in [("word", ".docx"), ("excel", ".xlsx"), ("pdf", ".pdf")]:
|
|
resp = client.post(
|
|
"/api/v1/documents/create",
|
|
json={
|
|
"format": fmt,
|
|
"content": "# Test",
|
|
"conversation_id": f"conv-ext-{fmt}",
|
|
},
|
|
)
|
|
filename = resp.json()["document"]["filename"]
|
|
assert filename.endswith(ext), f"{fmt} should produce {ext}, got {filename}"
|
|
|
|
def test_custom_filename_preserved(self, client: TestClient) -> None:
|
|
"""Custom filename is preserved in metadata and download."""
|
|
resp = client.post(
|
|
"/api/v1/documents/create",
|
|
json={
|
|
"format": "pdf",
|
|
"content": "# Custom Name",
|
|
"conversation_id": "conv-custom",
|
|
"filename": "quarterly-report.pdf",
|
|
},
|
|
)
|
|
assert resp.json()["document"]["filename"] == "quarterly-report.pdf"
|
|
|
|
doc_id = resp.json()["document"]["id"]
|
|
dl_resp = client.get(f"/api/v1/documents/download/{doc_id}")
|
|
assert "quarterly-report.pdf" in dl_resp.headers.get("content-disposition", "")
|