"""End-to-end integration tests for document processing (F1, F2, F3). Verifies complete user flows: - F1: Create document → List → Download → Verify content - F2: Upload template → Create with template → Download → Verify variables replaced - F3: Cross-conversation isolation """ from __future__ import annotations import asyncio import io from pathlib import Path import pytest from docx import Document as DocxDocument from fastapi import FastAPI from fastapi.testclient import TestClient from openpyxl import load_workbook from agentkit.documents.db import init_documents_db from agentkit.documents.renderers.excel_renderer import ExcelRenderer from agentkit.documents.renderers.pdf_renderer import PDFRenderer from agentkit.documents.renderers.word_renderer import WordRenderer from agentkit.documents.service import DocumentService from agentkit.server.routes import documents as documents_routes @pytest.fixture def app(tmp_path: Path) -> FastAPI: """Test app with all renderers registered. After Bug 2 fix, TemplateRenderer is lazy-loaded by DocumentService when template_path is provided — no need to register it separately. """ db_path = tmp_path / "test.db" upload_dir = tmp_path / "uploads" asyncio.run(init_documents_db(db_path)) service = DocumentService(upload_dir=upload_dir, db_path=db_path) service.register_renderer("word", WordRenderer()) service.register_renderer("excel", ExcelRenderer()) service.register_renderer("pdf", PDFRenderer()) app = FastAPI() app.state.document_service = service app.state.server_config = None # No auth for E2E tests app.include_router(documents_routes.router, prefix="/api/v1") return app @pytest.fixture def client(app: FastAPI) -> TestClient: return TestClient(app) # --------------------------------------------------------------------------- # F1: Create → List → Download complete flow # --------------------------------------------------------------------------- class TestF1CreateListDownload: """F1: User creates a document, sees it in the list, downloads it.""" def test_e2e_word_create_list_download(self, client: TestClient) -> None: """Word: create → list contains it → download content matches.""" # Step 1: Create create_resp = client.post( "/api/v1/documents/create", json={ "format": "word", "content": "# E2E Report\n\nThis is the report content.", "conversation_id": "conv-e2e-1", }, ) assert create_resp.status_code == 200 doc = create_resp.json()["document"] doc_id = doc["id"] assert doc["format"] == "word" assert doc["filename"].endswith(".docx") assert doc["size"] > 0 # Step 2: List — document appears in conversation list_resp = client.get("/api/v1/documents/conversation/conv-e2e-1") assert list_resp.status_code == 200 docs = list_resp.json()["documents"] assert len(docs) == 1 assert docs[0]["id"] == doc_id assert docs[0]["download_url"] == f"/api/v1/documents/download/{doc_id}" # Step 3: Download — file content is valid dl_resp = client.get(f"/api/v1/documents/download/{doc_id}") assert dl_resp.status_code == 200 assert len(dl_resp.content) == doc["size"] # Step 4: Verify downloaded file is a valid .docx with correct content docx = DocxDocument(io.BytesIO(dl_resp.content)) text = "\n".join(p.text for p in docx.paragraphs) assert "E2E Report" in text assert "This is the report content" in text def test_e2e_excel_create_list_download(self, client: TestClient) -> None: """Excel: create → list → download → verify cell content.""" create_resp = client.post( "/api/v1/documents/create", json={ "format": "excel", "content": '{"Sales": [["Product", "Revenue"], ["Widget", "1000"], ["Gadget", "2000"]]}', "conversation_id": "conv-e2e-2", }, ) assert create_resp.status_code == 200 doc_id = create_resp.json()["document"]["id"] # List list_resp = client.get("/api/v1/documents/conversation/conv-e2e-2") assert list_resp.json()["count"] == 1 # Download and verify dl_resp = client.get(f"/api/v1/documents/download/{doc_id}") assert dl_resp.status_code == 200 wb = load_workbook(io.BytesIO(dl_resp.content)) ws = wb["Sales"] assert ws["A1"].value == "Product" assert ws["B1"].value == "Revenue" assert ws["A2"].value == "Widget" assert ws["B2"].value == "1000" wb.close() def test_e2e_pdf_create_list_download(self, client: TestClient) -> None: """PDF: create → list → download → verify PDF magic bytes.""" create_resp = client.post( "/api/v1/documents/create", json={ "format": "pdf", "content": "# PDF Report\n\nContent here.", "conversation_id": "conv-e2e-3", }, ) assert create_resp.status_code == 200 doc_id = create_resp.json()["document"]["id"] # List list_resp = client.get("/api/v1/documents/conversation/conv-e2e-3") assert list_resp.json()["count"] == 1 # Download and verify PDF magic dl_resp = client.get(f"/api/v1/documents/download/{doc_id}") assert dl_resp.status_code == 200 assert dl_resp.content[:4] == b"%PDF" def test_e2e_multiple_documents_same_conversation(self, client: TestClient) -> None: """Multiple documents in same conversation — list shows all, ordered.""" conv_id = "conv-multi" # Create 3 documents for i, fmt in enumerate(["word", "excel", "pdf"]): resp = client.post( "/api/v1/documents/create", json={ "format": fmt, "content": f"# Doc {i}", "conversation_id": conv_id, }, ) assert resp.status_code == 200 # List — all 3 present list_resp = client.get(f"/api/v1/documents/conversation/{conv_id}") assert list_resp.status_code == 200 data = list_resp.json() assert data["count"] == 3 formats = [d["format"] for d in data["documents"]] assert set(formats) == {"word", "excel", "pdf"} # Each has a unique download URL urls = [d["download_url"] for d in data["documents"]] assert len(set(urls)) == 3 def test_e2e_download_returns_correct_filename(self, client: TestClient) -> None: """Download response includes the original filename in Content-Disposition.""" create_resp = client.post( "/api/v1/documents/create", json={ "format": "word", "content": "# Test", "conversation_id": "conv-fn", "filename": "my-report.docx", }, ) doc_id = create_resp.json()["document"]["id"] dl_resp = client.get(f"/api/v1/documents/download/{doc_id}") assert dl_resp.status_code == 200 # FileResponse sets filename in Content-Disposition assert "my-report.docx" in dl_resp.headers.get("content-disposition", "") # --------------------------------------------------------------------------- # F2: Template upload → create with template → download # --------------------------------------------------------------------------- class TestF2TemplateWorkflow: """F2: Upload template → Create with template → Download → Verify variables. After Bug 2 fix, template filling works with the standard WordRenderer registration — DocumentService lazy-loads TemplateRenderer internally. """ def test_e2e_upload_template_create_download( self, client: TestClient, tmp_path: Path ) -> None: """Complete template workflow: upload → fill → download → verify.""" # Step 1: Create a .docx template with Jinja2 placeholders template_doc = DocxDocument() template_doc.add_heading("Invoice {{invoice_number}}", level=1) template_doc.add_paragraph("Customer: {{customer_name}}") template_doc.add_paragraph("Amount: ${{amount}}") template_path = tmp_path / "invoice_template.docx" template_doc.save(str(template_path)) # Step 2: Upload the template with open(template_path, "rb") as f: upload_resp = client.post( "/api/v1/documents/upload-template", files={"file": ("invoice_template.docx", f, "application/octet-stream")}, ) assert upload_resp.status_code == 200 stored_name = upload_resp.json()["stored_name"] # Step 3: Create document using the template create_resp = client.post( "/api/v1/documents/create", json={ "format": "word", "content": "", # Ignored when template is provided "conversation_id": "conv-template", "template": stored_name, "template_data": { "invoice_number": "INV-2026-001", "customer_name": "Acme Corp", "amount": "1,234.56", }, }, ) assert create_resp.status_code == 200, create_resp.text doc_id = create_resp.json()["document"]["id"] # Step 4: Download and verify variables were replaced dl_resp = client.get(f"/api/v1/documents/download/{doc_id}") assert dl_resp.status_code == 200 docx = DocxDocument(io.BytesIO(dl_resp.content)) text = "\n".join(p.text for p in docx.paragraphs) assert "INV-2026-001" in text assert "Acme Corp" in text assert "1,234.56" in text # Placeholders should be gone assert "{{" not in text assert "}}" not in text def test_e2e_template_with_loop( self, client: TestClient, tmp_path: Path ) -> None: """Template with {% for %} loop — verify loop expands correctly.""" template_doc = DocxDocument() template_doc.add_heading("Shopping List", level=1) # ponytail: docxtpl uses {%p %} for paragraph-level loops, {% %} for inline template_doc.add_paragraph("{%p for item in items %}") template_doc.add_paragraph("- {{item}}") template_doc.add_paragraph("{%p endfor %}") template_path = tmp_path / "loop_template.docx" template_doc.save(str(template_path)) with open(template_path, "rb") as f: upload_resp = client.post( "/api/v1/documents/upload-template", files={"file": ("loop_template.docx", f, "application/octet-stream")}, ) stored_name = upload_resp.json()["stored_name"] create_resp = client.post( "/api/v1/documents/create", json={ "format": "word", "content": "", "conversation_id": "conv-loop", "template": stored_name, "template_data": { "items": ["Apple", "Banana", "Cherry"], }, }, ) assert create_resp.status_code == 200, create_resp.text doc_id = create_resp.json()["document"]["id"] dl_resp = client.get(f"/api/v1/documents/download/{doc_id}") assert dl_resp.status_code == 200 docx = DocxDocument(io.BytesIO(dl_resp.content)) text = "\n".join(p.text for p in docx.paragraphs) assert "Apple" in text assert "Banana" in text assert "Cherry" in text # --------------------------------------------------------------------------- # F3: Cross-conversation isolation # --------------------------------------------------------------------------- class TestF3ConversationIsolation: """F3: Documents from one conversation don't leak to another.""" def test_e2e_conversation_isolation(self, client: TestClient) -> None: """Documents in conv-A don't appear in conv-B's list.""" # Create in conv-A client.post( "/api/v1/documents/create", json={ "format": "word", "content": "# Conv A Doc", "conversation_id": "conv-A", }, ) # Create in conv-B client.post( "/api/v1/documents/create", json={ "format": "pdf", "content": "# Conv B Doc", "conversation_id": "conv-B", }, ) # List conv-A — only conv-A's doc resp_a = client.get("/api/v1/documents/conversation/conv-A") docs_a = resp_a.json()["documents"] assert len(docs_a) == 1 assert docs_a[0]["format"] == "word" # List conv-B — only conv-B's doc resp_b = client.get("/api/v1/documents/conversation/conv-B") docs_b = resp_b.json()["documents"] assert len(docs_b) == 1 assert docs_b[0]["format"] == "pdf" def test_e2e_download_any_document_by_id(self, client: TestClient) -> None: """Download works by doc_id regardless of conversation (no ACL in v1).""" # Create in conv-A create_resp = client.post( "/api/v1/documents/create", json={ "format": "word", "content": "# Downloadable", "conversation_id": "conv-X", }, ) doc_id = create_resp.json()["document"]["id"] # Download without specifying conversation — works (v1 has no ACL) dl_resp = client.get(f"/api/v1/documents/download/{doc_id}") assert dl_resp.status_code == 200 assert len(dl_resp.content) > 0 # --------------------------------------------------------------------------- # Data consistency checks # --------------------------------------------------------------------------- class TestDataConsistency: """Verify metadata matches actual files on disk.""" def test_metadata_size_matches_file(self, client: TestClient) -> None: """Document metadata size equals actual file size on disk.""" create_resp = client.post( "/api/v1/documents/create", json={ "format": "word", "content": "# Size Check\n\nContent.", "conversation_id": "conv-size", }, ) meta_size = create_resp.json()["document"]["size"] doc_id = create_resp.json()["document"]["id"] # Download and check actual size dl_resp = client.get(f"/api/v1/documents/download/{doc_id}") assert len(dl_resp.content) == meta_size def test_filename_has_correct_extension(self, client: TestClient) -> None: """Each format produces the correct file extension.""" for fmt, ext in [("word", ".docx"), ("excel", ".xlsx"), ("pdf", ".pdf")]: resp = client.post( "/api/v1/documents/create", json={ "format": fmt, "content": "# Test", "conversation_id": f"conv-ext-{fmt}", }, ) filename = resp.json()["document"]["filename"] assert filename.endswith(ext), f"{fmt} should produce {ext}, got {filename}" def test_custom_filename_preserved(self, client: TestClient) -> None: """Custom filename is preserved in metadata and download.""" resp = client.post( "/api/v1/documents/create", json={ "format": "pdf", "content": "# Custom Name", "conversation_id": "conv-custom", "filename": "quarterly-report.pdf", }, ) assert resp.json()["document"]["filename"] == "quarterly-report.pdf" doc_id = resp.json()["document"]["id"] dl_resp = client.get(f"/api/v1/documents/download/{doc_id}") assert "quarterly-report.pdf" in dl_resp.headers.get("content-disposition", "")