File size: 1,053 Bytes
243af33
 
 
 
 
 
 
 
 
 
 
 
07beedf
243af33
 
 
 
 
 
 
 
07beedf
243af33
07beedf
243af33
4f9eb00
243af33
07beedf
4f9eb00
243af33
07beedf
243af33
 
 
 
07beedf
243af33
a613433
243af33
a613433
07beedf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import gradio as gr
import subprocess
import os
import uuid
import shutil

def convert_pdf_to_html(pdf_file):
    job_id = str(uuid.uuid4())
    work_dir = f"/tmp/{job_id}"
    os.makedirs(work_dir, exist_ok=True)

    input_path = os.path.join(work_dir, "input.pdf")
    output_path = os.path.join(work_dir, "input.html")

    with open(input_path, "wb") as f:
        f.write(pdf_file.read())

    try:
        result = subprocess.run(
            ["pdf2htmlEX", "--dest-dir", work_dir, "--embed", "cfijo", input_path],
            capture_output=True,
            text=True
        )

        if result.returncode != 0:
            return f"Error:\n{result.stderr}"

        if not os.path.exists(output_path):
            return "Conversion failed: output HTML not found."

        return output_path

    finally:
        shutil.rmtree(work_dir, ignore_errors=True)

gr.Interface(
    fn=convert_pdf_to_html,
    inputs=gr.File(type="filepath", label="Upload PDF"),
    outputs=gr.File(label="Download HTML"),
    allow_flagging="never"
).launch()