Spaces:

zamal
/

Multimodal-Chat-Playground

Running on Zero

App Files Files Community

zamal commited on May 30

Commit

94f2e74

verified ·

1 Parent(s): 20a5a76

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -17

app.py CHANGED Viewed

@@ -16,6 +16,8 @@ import pymupdf
 from PIL import Image
 from pypdf import PdfReader
 from dotenv import load_dotenv
 from welcome_text import WELCOME_INTRO
 from doctr.io import DocumentFile
@@ -357,7 +359,7 @@ MODEL_OPTIONS = [
 ]
 with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
-    # We only need a single State to track whether extraction has happened
     session_state = gr.State({})
     # ─── Welcome Screen ─────────────────────────────────────────────
@@ -372,8 +374,8 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
     with gr.Column(visible=False) as app_col:
         gr.Markdown("## 📚 Multimodal Chat-PDF Playground")
-        with gr.Tabs():
-            # Tab 1: Upload & Extract
             with gr.TabItem("1. Upload & Extract"):
                 docs = gr.File(
                     file_count="multiple",
@@ -391,11 +393,8 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
                     label="OCR"
                 )
                 ocr_dd = gr.Dropdown(
-                    choices=[
-                        "db_resnet50 + crnn_mobilenet_v3_large",
-                        "db_resnet50 + crnn_resnet31"
-                    ],
-                    value="db_resnet50 + crnn_mobilenet_v3_large",
                     label="OCR Model"
                 )
                 vlm_dd = gr.Dropdown(
@@ -419,6 +418,8 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
                 )
                 preview_html = gr.HTML()
                 extract_btn.click(
                     fn=extract_data_from_pdfs,
                     inputs=[
@@ -430,15 +431,19 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
                         vlm_dd
                     ],
                     outputs=[
-                        session_state,   # updates processed flag
-                        preview_text,    # shows sample text
-                        preview_img,     # shows sample images
-                        preview_html     # shows “Done!” message
                     ]
                 )
-            # Tab 2: Chat
-            with gr.TabItem("2. Chat"):
                 with gr.Row():
                     with gr.Column(scale=3):
                         chat = gr.Chatbot(type="messages", label="Chat")
@@ -455,13 +460,13 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
                         )
                         num_ctx = gr.Slider(1, 20, value=3, label="Text Contexts")
                         img_ctx = gr.Slider(1, 10, value=2, label="Image Contexts")
-                        temp = gr.Slider(0.1, 1.0, step=0.1, value=0.4, label="Temperature")
                         max_tok = gr.Slider(10, 1000, step=10, value=200, label="Max Tokens")
                 send.click(
                     fn=conversation,
                     inputs=[
-                        session_state,  # drives conversation
                         msg,
                         num_ctx,
                         img_ctx,
@@ -472,7 +477,7 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
                     ],
                     outputs=[
                         chat,
-                        gr.Dataframe(),  # returns the retrieved docs
                         gr.Gallery(label="Relevant Images", rows=2, value=[])
                     ]
                 )

 from PIL import Image
 from pypdf import PdfReader
 from dotenv import load_dotenv
+import shutil
+from chromadb.config import Settings
 from welcome_text import WELCOME_INTRO
 from doctr.io import DocumentFile
 ]
 with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
+    # State to track extraction
     session_state = gr.State({})
     # ─── Welcome Screen ─────────────────────────────────────────────
     with gr.Column(visible=False) as app_col:
         gr.Markdown("## 📚 Multimodal Chat-PDF Playground")
+        with gr.Tabs() as tabs:
+            # ── Tab 1: Upload & Extract ───────────────────────────────
             with gr.TabItem("1. Upload & Extract"):
                 docs = gr.File(
                     file_count="multiple",
                     label="OCR"
                 )
                 ocr_dd = gr.Dropdown(
+                    choices=list(OCR_CHOICES.keys()),
+                    value=list(OCR_CHOICES.keys())[0],
                     label="OCR Model"
                 )
                 vlm_dd = gr.Dropdown(
                 )
                 preview_html = gr.HTML()
+                # 1) Run extraction
+                # 2) When done, reveal the Chat tab
                 extract_btn.click(
                     fn=extract_data_from_pdfs,
                     inputs=[
                         vlm_dd
                     ],
                     outputs=[
+                        session_state,
+                        preview_text,
+                        preview_img,
+                        preview_html
                     ]
+                ).then(
+                    fn=lambda: gr.update(visible=True),
+                    inputs=[],
+                    outputs=[tabs.select("2. Chat")]
                 )
+            # ── Tab 2: Chat (starts hidden) ────────────────────────────
+            with gr.TabItem("2. Chat", visible=False) as chat_tab:
                 with gr.Row():
                     with gr.Column(scale=3):
                         chat = gr.Chatbot(type="messages", label="Chat")
                         )
                         num_ctx = gr.Slider(1, 20, value=3, label="Text Contexts")
                         img_ctx = gr.Slider(1, 10, value=2, label="Image Contexts")
+                        temp    = gr.Slider(0.1, 1.0, step=0.1, value=0.4, label="Temperature")
                         max_tok = gr.Slider(10, 1000, step=10, value=200, label="Max Tokens")
                 send.click(
                     fn=conversation,
                     inputs=[
+                        session_state,
                         msg,
                         num_ctx,
                         img_ctx,
                     ],
                     outputs=[
                         chat,
+                        gr.Dataframe(),
                         gr.Gallery(label="Relevant Images", rows=2, value=[])
                     ]
                 )