Spaces:
Running
Running
import pytest | |
from fastapi import UploadFile | |
from io import BytesIO | |
from services.document_service import document_service | |
import os | |
def sample_pdf(): | |
return BytesIO(b"%PDF-1.4\n%Test PDF content") | |
def sample_docx(): | |
return BytesIO(b"PK\x03\x04\x14\x00\x00\x00\x08\x00") | |
def sample_image(): | |
return BytesIO(b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR") | |
async def test_save_upload_file(sample_pdf): | |
file = UploadFile( | |
filename="test.pdf", | |
file=sample_pdf | |
) | |
file_path = await document_service.save_upload_file(file) | |
assert file_path.endswith("test.pdf") | |
assert os.path.exists(file_path) | |
async def test_process_pdf(sample_pdf): | |
file = UploadFile( | |
filename="test.pdf", | |
file=sample_pdf | |
) | |
file_path = await document_service.save_upload_file(file) | |
result = await document_service.process_document(file_path) | |
assert "text" in result | |
assert result["type"] == "pdf" | |
async def test_process_docx(sample_docx): | |
file = UploadFile( | |
filename="test.docx", | |
file=sample_docx | |
) | |
file_path = await document_service.save_upload_file(file) | |
result = await document_service.process_document(file_path) | |
assert "text" in result | |
assert result["type"] == "word" | |
async def test_process_image(sample_image): | |
file = UploadFile( | |
filename="test.png", | |
file=sample_image | |
) | |
file_path = await document_service.save_upload_file(file) | |
result = await document_service.process_document(file_path) | |
assert "text" in result | |
assert result["type"] == "image" | |
async def test_segment_document(): | |
text = "Line 1\nLine 2\nLine 3\nLine 4\nLine 5" | |
segments = await document_service.segment_document(text, max_segment_size=20) | |
assert len(segments) > 0 | |
assert all(len(segment) <= 20 for segment in segments) |