docling_demo / app.py
ma7583's picture
Update app.py
5155dfc verified
import gradio as gr
from docling.document_converter import DocumentConverter
import tempfile
import os
import shutil
import requests
converter = DocumentConverter()
# def convert_input(pdf_file, pdf_url):
# temp_pdf_path = None
# try:
# if pdf_file:
# with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
# tmp.write(pdf_file.read())
# temp_pdf_path = tmp.name
# source = temp_pdf_path
# elif pdf_url:
# source = pdf_url
# else:
# raise gr.Error("Please upload a PDF or provide a URL.")
# doc = converter.convert(source).document
# markdown = doc.export_to_markdown()
# # Save markdown to a file
# with tempfile.NamedTemporaryFile(delete=False, suffix=".md", mode="w", encoding="utf-8") as tmp_md:
# tmp_md.write(markdown)
# markdown_path = tmp_md.name
# return pdf_file.name if pdf_file else None, markdown, markdown_path
# except Exception as e:
# return None, f"Error: {str(e)}", None
# finally:
# if temp_pdf_path and os.path.exists(temp_pdf_path):
# os.remove(temp_pdf_path)
def convert_input(pdf_file, pdf_url):
source = None
try:
if pdf_file is not None:
# pdf_file is a NamedString object with .name being the local path
source = pdf_file.name
pdf_preview_path = source
elif pdf_url:
source = pdf_url
pdf_preview_path = None # No local file to preview
else:
raise gr.Error("Please upload a PDF or provide a URL.")
doc = converter.convert(source).document
markdown = doc.export_to_markdown()
# Save markdown to a file
with tempfile.NamedTemporaryFile(delete=False, suffix=".md", mode="w", encoding="utf-8") as tmp_md:
tmp_md.write(markdown)
markdown_path = tmp_md.name
return pdf_preview_path, markdown, markdown_path
except Exception as e:
return None, f"Error: {str(e)}", None
with gr.Blocks() as demo:
gr.Markdown("# πŸ“„ PDF to Markdown with `docling`")
gr.Markdown("Upload a PDF or enter a URL (e.g., from arXiv). View Markdown and download it.")
with gr.Row():
pdf_input = gr.File(label="πŸ“€ Upload PDF", file_types=[".pdf"])
url_input = gr.Textbox(label="🌐 Or enter PDF URL (e.g., https://arxiv.org/pdf/...)")
convert_btn = gr.Button("Convert to Markdown")
with gr.Row():
pdf_preview = gr.File(label="πŸ“˜ PDF Preview", interactive=False)
md_file = gr.File(label="πŸ“ Download Markdown", interactive=False)
md_output = gr.Markdown(label="πŸ“„ Markdown Output")
convert_btn.click(
fn=convert_input,
inputs=[pdf_input, url_input],
outputs=[pdf_preview, md_output, md_file]
)
if __name__ == "__main__":
demo.launch()