xiaoyao9184 commited on
Commit
6512245
·
verified ·
1 Parent(s): 43544e4

Synced repo using 'sync_with_huggingface' Github Action

Browse files
Files changed (2) hide show
  1. gradio_app.py +12 -5
  2. requirements.txt +1 -1
gradio_app.py CHANGED
@@ -119,11 +119,13 @@ with gr.Blocks(title="Marker") as demo:
119
  page_range_txt = gr.Textbox(label="Page range to parse, comma separated like 0,5-10,20", value=f"")
120
  output_format_dd = gr.Dropdown(label="Output format", choices=["markdown", "json", "html"], value="markdown")
121
 
 
122
  force_ocr_ckb = gr.Checkbox(label="Force OCR", value=True, info="Force OCR on all pages")
123
  show_blocks_ckb = gr.Checkbox(label="Show Blocks", info="Display detected blocks, only when output is JSON", value=False, interactive=False)
124
  debug_ckb = gr.Checkbox(label="Debug", value=False, info="Show debug information")
125
- use_llm_ckb = gr.Checkbox(label="Use LLM", value=False, info="Use LLM for higher quality processing")
126
  strip_existing_ocr_ckb = gr.Checkbox(label="Strip existing OCR", value=False, info="Strip existing OCR text from the PDF and re-OCR.")
 
 
127
  run_marker_btn = gr.Button("Run Marker", interactive=False)
128
  with gr.Column():
129
  result_md = gr.Markdown(label="Result markdown", visible=False)
@@ -191,7 +193,7 @@ with gr.Blocks(title="Marker") as demo:
191
  )
192
 
193
  # Run Marker
194
- def run_marker_img(filename, page_range, force_ocr, output_format, show_blocks, debug, use_llm, strip_existing_ocr):
195
  """
196
  Run marker on the given PDF file and return processed results in multiple formats.
197
 
@@ -209,7 +211,10 @@ with gr.Blocks(title="Marker") as demo:
209
  Defaults to False.
210
  strip_existing_ocr (bool, optional): If True, strip embedded OCR text and re-run OCR.
211
  Defaults to False.
212
-
 
 
 
213
  Returns:
214
  tuple:
215
  - markdown_result (str): Markdown output string.
@@ -226,7 +231,9 @@ with gr.Blocks(title="Marker") as demo:
226
  "debug": debug,
227
  "output_dir": settings.DEBUG_DATA_FOLDER if debug else None,
228
  "use_llm": use_llm,
229
- "strip_existing_ocr": strip_existing_ocr
 
 
230
  }
231
  config_parser = ConfigParser(cli_options)
232
  rendered = convert_pdf(
@@ -310,7 +317,7 @@ with gr.Blocks(title="Marker") as demo:
310
 
311
  run_marker_btn.click(
312
  fn=run_marker_img,
313
- inputs=[in_file, page_range_txt, force_ocr_ckb, output_format_dd, show_blocks_ckb, debug_ckb, use_llm_ckb, strip_existing_ocr_ckb],
314
  outputs=[result_md, result_json, result_html, debug_img_pdf, debug_img_layout, in_img]
315
  )
316
 
 
119
  page_range_txt = gr.Textbox(label="Page range to parse, comma separated like 0,5-10,20", value=f"")
120
  output_format_dd = gr.Dropdown(label="Output format", choices=["markdown", "json", "html"], value="markdown")
121
 
122
+ use_llm_ckb = gr.Checkbox(label="Use LLM", value=False, info="Use LLM for higher quality processing")
123
  force_ocr_ckb = gr.Checkbox(label="Force OCR", value=True, info="Force OCR on all pages")
124
  show_blocks_ckb = gr.Checkbox(label="Show Blocks", info="Display detected blocks, only when output is JSON", value=False, interactive=False)
125
  debug_ckb = gr.Checkbox(label="Debug", value=False, info="Show debug information")
 
126
  strip_existing_ocr_ckb = gr.Checkbox(label="Strip existing OCR", value=False, info="Strip existing OCR text from the PDF and re-OCR.")
127
+ format_lines_ckb = gr.Checkbox(label="Format lines", value=False, info="Format lines in the document with OCR model")
128
+ disable_ocr_math_ckb = gr.Checkbox(label="Disable math", value=False, info="Disable math in OCR output - no inline math")
129
  run_marker_btn = gr.Button("Run Marker", interactive=False)
130
  with gr.Column():
131
  result_md = gr.Markdown(label="Result markdown", visible=False)
 
193
  )
194
 
195
  # Run Marker
196
+ def run_marker_img(filename, page_range, force_ocr, output_format, show_blocks, debug, use_llm, strip_existing_ocr, format_lines, disable_ocr_math):
197
  """
198
  Run marker on the given PDF file and return processed results in multiple formats.
199
 
 
211
  Defaults to False.
212
  strip_existing_ocr (bool, optional): If True, strip embedded OCR text and re-run OCR.
213
  Defaults to False.
214
+ format_lines (bool, optional): If True, format lines in the document with OCR model.
215
+ Defaults to False.
216
+ disable_ocr_math (bool, optional): If True, disable math in OCR output - no inline math.
217
+ Defaults to False.
218
  Returns:
219
  tuple:
220
  - markdown_result (str): Markdown output string.
 
231
  "debug": debug,
232
  "output_dir": settings.DEBUG_DATA_FOLDER if debug else None,
233
  "use_llm": use_llm,
234
+ "strip_existing_ocr": strip_existing_ocr,
235
+ "format_lines": format_lines,
236
+ "disable_ocr_math": disable_ocr_math,
237
  }
238
  config_parser = ConfigParser(cli_options)
239
  rendered = convert_pdf(
 
317
 
318
  run_marker_btn.click(
319
  fn=run_marker_img,
320
+ inputs=[in_file, page_range_txt, force_ocr_ckb, output_format_dd, show_blocks_ckb, debug_ckb, use_llm_ckb, strip_existing_ocr_ckb, format_lines_ckb, disable_ocr_math_ckb],
321
  outputs=[result_md, result_json, result_html, debug_img_pdf, debug_img_layout, in_img]
322
  )
323
 
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  torch==2.5.1
2
- marker-pdf[full]==1.7.0
3
  gradio[mcp]==5.28.0
4
  huggingface-hub==0.28.1
5
 
 
1
  torch==2.5.1
2
+ marker-pdf[full]==1.7.1
3
  gradio[mcp]==5.28.0
4
  huggingface-hub==0.28.1
5