zamal commited on
Commit
94f2e74
·
verified ·
1 Parent(s): 20a5a76

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -17
app.py CHANGED
@@ -16,6 +16,8 @@ import pymupdf
16
  from PIL import Image
17
  from pypdf import PdfReader
18
  from dotenv import load_dotenv
 
 
19
  from welcome_text import WELCOME_INTRO
20
 
21
  from doctr.io import DocumentFile
@@ -357,7 +359,7 @@ MODEL_OPTIONS = [
357
  ]
358
 
359
  with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
360
- # We only need a single State to track whether extraction has happened
361
  session_state = gr.State({})
362
 
363
  # ─── Welcome Screen ─────────────────────────────────────────────
@@ -372,8 +374,8 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
372
  with gr.Column(visible=False) as app_col:
373
  gr.Markdown("## 📚 Multimodal Chat-PDF Playground")
374
 
375
- with gr.Tabs():
376
- # Tab 1: Upload & Extract
377
  with gr.TabItem("1. Upload & Extract"):
378
  docs = gr.File(
379
  file_count="multiple",
@@ -391,11 +393,8 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
391
  label="OCR"
392
  )
393
  ocr_dd = gr.Dropdown(
394
- choices=[
395
- "db_resnet50 + crnn_mobilenet_v3_large",
396
- "db_resnet50 + crnn_resnet31"
397
- ],
398
- value="db_resnet50 + crnn_mobilenet_v3_large",
399
  label="OCR Model"
400
  )
401
  vlm_dd = gr.Dropdown(
@@ -419,6 +418,8 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
419
  )
420
  preview_html = gr.HTML()
421
 
 
 
422
  extract_btn.click(
423
  fn=extract_data_from_pdfs,
424
  inputs=[
@@ -430,15 +431,19 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
430
  vlm_dd
431
  ],
432
  outputs=[
433
- session_state, # updates processed flag
434
- preview_text, # shows sample text
435
- preview_img, # shows sample images
436
- preview_html # shows “Done!” message
437
  ]
 
 
 
 
438
  )
439
 
440
- # Tab 2: Chat
441
- with gr.TabItem("2. Chat"):
442
  with gr.Row():
443
  with gr.Column(scale=3):
444
  chat = gr.Chatbot(type="messages", label="Chat")
@@ -455,13 +460,13 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
455
  )
456
  num_ctx = gr.Slider(1, 20, value=3, label="Text Contexts")
457
  img_ctx = gr.Slider(1, 10, value=2, label="Image Contexts")
458
- temp = gr.Slider(0.1, 1.0, step=0.1, value=0.4, label="Temperature")
459
  max_tok = gr.Slider(10, 1000, step=10, value=200, label="Max Tokens")
460
 
461
  send.click(
462
  fn=conversation,
463
  inputs=[
464
- session_state, # drives conversation
465
  msg,
466
  num_ctx,
467
  img_ctx,
@@ -472,7 +477,7 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
472
  ],
473
  outputs=[
474
  chat,
475
- gr.Dataframe(), # returns the retrieved docs
476
  gr.Gallery(label="Relevant Images", rows=2, value=[])
477
  ]
478
  )
 
16
  from PIL import Image
17
  from pypdf import PdfReader
18
  from dotenv import load_dotenv
19
+ import shutil
20
+ from chromadb.config import Settings
21
  from welcome_text import WELCOME_INTRO
22
 
23
  from doctr.io import DocumentFile
 
359
  ]
360
 
361
  with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
362
+ # State to track extraction
363
  session_state = gr.State({})
364
 
365
  # ─── Welcome Screen ─────────────────────────────────────────────
 
374
  with gr.Column(visible=False) as app_col:
375
  gr.Markdown("## 📚 Multimodal Chat-PDF Playground")
376
 
377
+ with gr.Tabs() as tabs:
378
+ # ── Tab 1: Upload & Extract ───────────────────────────────
379
  with gr.TabItem("1. Upload & Extract"):
380
  docs = gr.File(
381
  file_count="multiple",
 
393
  label="OCR"
394
  )
395
  ocr_dd = gr.Dropdown(
396
+ choices=list(OCR_CHOICES.keys()),
397
+ value=list(OCR_CHOICES.keys())[0],
 
 
 
398
  label="OCR Model"
399
  )
400
  vlm_dd = gr.Dropdown(
 
418
  )
419
  preview_html = gr.HTML()
420
 
421
+ # 1) Run extraction
422
+ # 2) When done, reveal the Chat tab
423
  extract_btn.click(
424
  fn=extract_data_from_pdfs,
425
  inputs=[
 
431
  vlm_dd
432
  ],
433
  outputs=[
434
+ session_state,
435
+ preview_text,
436
+ preview_img,
437
+ preview_html
438
  ]
439
+ ).then(
440
+ fn=lambda: gr.update(visible=True),
441
+ inputs=[],
442
+ outputs=[tabs.select("2. Chat")]
443
  )
444
 
445
+ # ── Tab 2: Chat (starts hidden) ────────────────────────────
446
+ with gr.TabItem("2. Chat", visible=False) as chat_tab:
447
  with gr.Row():
448
  with gr.Column(scale=3):
449
  chat = gr.Chatbot(type="messages", label="Chat")
 
460
  )
461
  num_ctx = gr.Slider(1, 20, value=3, label="Text Contexts")
462
  img_ctx = gr.Slider(1, 10, value=2, label="Image Contexts")
463
+ temp = gr.Slider(0.1, 1.0, step=0.1, value=0.4, label="Temperature")
464
  max_tok = gr.Slider(10, 1000, step=10, value=200, label="Max Tokens")
465
 
466
  send.click(
467
  fn=conversation,
468
  inputs=[
469
+ session_state,
470
  msg,
471
  num_ctx,
472
  img_ctx,
 
477
  ],
478
  outputs=[
479
  chat,
480
+ gr.Dataframe(),
481
  gr.Gallery(label="Relevant Images", rows=2, value=[])
482
  ]
483
  )