import fitz # PyMuPDF import docx from io import BytesIO import logging from fastapi import HTTPException def parse_docx(file: BytesIO): doc = docx.Document(file) text = "" for para in doc.paragraphs: text += para.text + "\n" return text def parse_pdf(file: BytesIO): try: doc = fitz.open(stream=file, filetype="pdf") text = "" for page_num in range(doc.page_count): page = doc.load_page(page_num) text += page.get_text() return text except Exception as e: logging.error(f"Error while processing PDF: {str(e)}") raise HTTPException( status_code=500, detail="Error processing PDF file") def parse_txt(file: BytesIO): return file.read().decode("utf-8")