import pytest from fastapi import UploadFile from io import BytesIO from services.document_service import document_service import os @pytest.fixture def sample_pdf(): return BytesIO(b"%PDF-1.4\n%Test PDF content") @pytest.fixture def sample_docx(): return BytesIO(b"PK\x03\x04\x14\x00\x00\x00\x08\x00") @pytest.fixture def sample_image(): return BytesIO(b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR") @pytest.mark.asyncio async def test_save_upload_file(sample_pdf): file = UploadFile( filename="test.pdf", file=sample_pdf ) file_path = await document_service.save_upload_file(file) assert file_path.endswith("test.pdf") assert os.path.exists(file_path) @pytest.mark.asyncio async def test_process_pdf(sample_pdf): file = UploadFile( filename="test.pdf", file=sample_pdf ) file_path = await document_service.save_upload_file(file) result = await document_service.process_document(file_path) assert "text" in result assert result["type"] == "pdf" @pytest.mark.asyncio async def test_process_docx(sample_docx): file = UploadFile( filename="test.docx", file=sample_docx ) file_path = await document_service.save_upload_file(file) result = await document_service.process_document(file_path) assert "text" in result assert result["type"] == "word" @pytest.mark.asyncio async def test_process_image(sample_image): file = UploadFile( filename="test.png", file=sample_image ) file_path = await document_service.save_upload_file(file) result = await document_service.process_document(file_path) assert "text" in result assert result["type"] == "image" @pytest.mark.asyncio async def test_segment_document(): text = "Line 1\nLine 2\nLine 3\nLine 4\nLine 5" segments = await document_service.segment_document(text, max_segment_size=20) assert len(segments) > 0 assert all(len(segment) <= 20 for segment in segments)