import gradio as gr from docling.document_converter import DocumentConverter import tempfile import os import shutil import requests converter = DocumentConverter() # def convert_input(pdf_file, pdf_url): # temp_pdf_path = None # try: # if pdf_file: # with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: # tmp.write(pdf_file.read()) # temp_pdf_path = tmp.name # source = temp_pdf_path # elif pdf_url: # source = pdf_url # else: # raise gr.Error("Please upload a PDF or provide a URL.") # doc = converter.convert(source).document # markdown = doc.export_to_markdown() # # Save markdown to a file # with tempfile.NamedTemporaryFile(delete=False, suffix=".md", mode="w", encoding="utf-8") as tmp_md: # tmp_md.write(markdown) # markdown_path = tmp_md.name # return pdf_file.name if pdf_file else None, markdown, markdown_path # except Exception as e: # return None, f"Error: {str(e)}", None # finally: # if temp_pdf_path and os.path.exists(temp_pdf_path): # os.remove(temp_pdf_path) def convert_input(pdf_file, pdf_url): source = None try: if pdf_file is not None: # pdf_file is a NamedString object with .name being the local path source = pdf_file.name pdf_preview_path = source elif pdf_url: source = pdf_url pdf_preview_path = None # No local file to preview else: raise gr.Error("Please upload a PDF or provide a URL.") doc = converter.convert(source).document markdown = doc.export_to_markdown() # Save markdown to a file with tempfile.NamedTemporaryFile(delete=False, suffix=".md", mode="w", encoding="utf-8") as tmp_md: tmp_md.write(markdown) markdown_path = tmp_md.name return pdf_preview_path, markdown, markdown_path except Exception as e: return None, f"Error: {str(e)}", None with gr.Blocks() as demo: gr.Markdown("# 📄 PDF to Markdown with `docling`") gr.Markdown("Upload a PDF or enter a URL (e.g., from arXiv). View Markdown and download it.") with gr.Row(): pdf_input = gr.File(label="📤 Upload PDF", file_types=[".pdf"]) url_input = gr.Textbox(label="🌐 Or enter PDF URL (e.g., https://arxiv.org/pdf/...)") convert_btn = gr.Button("Convert to Markdown") with gr.Row(): pdf_preview = gr.File(label="📘 PDF Preview", interactive=False) md_file = gr.File(label="📝 Download Markdown", interactive=False) md_output = gr.Markdown(label="📄 Markdown Output") convert_btn.click( fn=convert_input, inputs=[pdf_input, url_input], outputs=[pdf_preview, md_output, md_file] ) if __name__ == "__main__": demo.launch()