jackkuo commited on
Commit
de61289
·
verified ·
1 Parent(s): 5675b50

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -0
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import fitz # PyMuPDF
3
+ from base64 import b64encode
4
+ from gradio_pdf import PDF
5
+
6
+
7
+ def read_pdf(file):
8
+ # 打开PDF文件
9
+ pdf_document = fitz.open(file)
10
+ text = ""
11
+ # 遍历每一页PDF,并提取文本
12
+ for page in pdf_document:
13
+ text += page.get_text()
14
+ pdf_document.close()
15
+ return text
16
+
17
+
18
+ # def display_pdf(file):
19
+ # # 将PDF文件内容编码为base64
20
+ # with open(file.name, "rb") as f:
21
+ # encoded_pdf = b64encode(f.read()).decode('utf-8')
22
+ # # 使用HTML嵌入PDF查看器
23
+ # pdf_html = f'<iframe src="data:application/pdf;base64,{encoded_pdf}" width="100%" height="600px"></iframe>'
24
+ # return pdf_html
25
+ def display_pdf(file):
26
+ return file # 返回文件路径,以便PDF组件使用
27
+
28
+
29
+ # 使用Blocks布局
30
+ with gr.Blocks() as app:
31
+ gr.Markdown('''<h1 align="center"> PDF Text Extractor </h1>''')
32
+ gr.Markdown('''<p align="center">Upload a PDF file to extract its text and view it.</p>''')
33
+
34
+ with gr.Row():
35
+ with gr.Column(scale=1):
36
+ file_input = gr.File(label="Upload a PDF file", type="filepath") # 设置为 'filepath'
37
+ with gr.Row():
38
+ display_button = gr.Button("Display PDF", variant="primary")
39
+ extract_button = gr.Button("Extract Text", variant="secondary")
40
+ # pdf_viewer = gr.HTML(label="PDF Viewer")
41
+ pdf_viewer = PDF(label="PDF Viewer") # 使用gradio_pdf的PDF组件
42
+
43
+ with gr.Column(scale=1):
44
+ text_output = gr.Textbox(label="Extracted Text", interactive=False,
45
+ placeholder="Extracted text will appear here...", lines=49)
46
+
47
+ # 连接按钮和功能
48
+ extract_button.click(read_pdf, inputs=file_input, outputs=text_output)
49
+ display_button.click(display_pdf, inputs=file_input, outputs=pdf_viewer)
50
+
51
+ app.launch()