import gradio as gr import fitz # PyMuPDF from base64 import b64encode from gradio_pdf import PDF def read_pdf(file): # 打开PDF文件 pdf_document = fitz.open(file) text = "" # 遍历每一页PDF,并提取文本 for page in pdf_document: text += page.get_text() pdf_document.close() return text # def display_pdf(file): # # 将PDF文件内容编码为base64 # with open(file.name, "rb") as f: # encoded_pdf = b64encode(f.read()).decode('utf-8') # # 使用HTML嵌入PDF查看器 # pdf_html = f'' # return pdf_html def display_pdf(file): return file # 返回文件路径,以便PDF组件使用 # 使用Blocks布局 with gr.Blocks() as app: gr.Markdown('''
Upload a PDF file to extract its text and view it.
''') with gr.Row(): with gr.Column(scale=1): file_input = gr.File(label="Upload a PDF file", type="filepath") # 设置为 'filepath' with gr.Row(): display_button = gr.Button("Display PDF", variant="primary") extract_button = gr.Button("Extract Text", variant="secondary") # pdf_viewer = gr.HTML(label="PDF Viewer") pdf_viewer = PDF(label="PDF Viewer") # 使用gradio_pdf的PDF组件 with gr.Column(scale=1): text_output = gr.Textbox(label="Extracted Text", interactive=False, placeholder="Extracted text will appear here...", lines=49) # 连接按钮和功能 extract_button.click(read_pdf, inputs=file_input, outputs=text_output) display_button.click(display_pdf, inputs=file_input, outputs=pdf_viewer) app.launch()