Spaces:

prithivMLmods
/

Doc-VLMs-OCR

Running on Zero

App Files Files Community

prithivMLmods commited on 18 days ago

Commit

29d805e

verified ·

1 Parent(s): 45fbf47

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -22

app.py CHANGED Viewed

@@ -285,9 +285,9 @@ def generate_video(model_name: str, text: str, video_path: str,
 # Define examples for image and video inference
 image_examples = [
-    ["Convert this page to docling", "images/1.png", "SmolDocling-256M-preview"],
-    ["OCR the image", "images/2.jpg", "Nanonets-OCR-s"],
-    ["Convert this page to docling", "images/3.png", "SmolDocling-256M-preview"],
 ]
 video_examples = [
@@ -307,7 +307,7 @@ css = """
 # Create the Gradio Interface
 with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
-    gr.Markdown("# **[core OCR](https://huggingface.co/collections/prithivMLmods/core-and-docscope-ocr-models-6816d7f1bde3f911c6c852bc)**")
     with gr.Row():
         with gr.Column():
             with gr.Tabs():
@@ -315,38 +315,32 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
                     image_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
                     image_upload = gr.Image(type="pil", label="Image")
                     image_submit = gr.Button("Submit", elem_classes="submit-btn")
                 with gr.TabItem("Video Inference"):
                     video_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
                     video_upload = gr.Video(label="Video")
                     video_submit = gr.Button("Submit", elem_classes="submit-btn")
             with gr.Accordion("Advanced options", open=False):
                 max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
                 temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.6)
                 top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
                 top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
                 repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2)
         with gr.Column():
-            output = gr.Textbox(label="Output", interactive=False, lines=15, scale=2)
             model_choice = gr.Radio(
-                choices=["Nanonets-OCR-s", "MonkeyOCR-Recognition", "SmolDocling-256M-preview"],
                 label="Select Model",
                 value="Nanonets-OCR-s"
             )
-            with gr.Tabs():
-                 with gr.TabItem("Image Examples"):
-                      gr.Examples(
-                          examples=image_examples,
-                          inputs=[image_query, image_upload, model_choice],
-                          label="Click on an example to run"
-                      )
-                 with gr.TabItem("Video Examples"):
-                      gr.Examples(
-                          examples=video_examples,
-                          inputs=[video_query, video_upload, model_choice],
-                          label="Click on an example to run"
-                      )
     image_submit.click(
         fn=generate_image,
         inputs=[model_choice, image_query, image_upload, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
@@ -359,4 +353,4 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
     )
 if __name__ == "__main__":
-    demo.queue(max_size=30).launch(share=True, ssr_mode=False, show_error=True)

 # Define examples for image and video inference
 image_examples = [
+    ["Convert this page to docling", "images/1.png"],
+    ["OCR the image", "images/2.jpg"],
+    ["Convert this page to docling", "images/3.png"],
 ]
 video_examples = [
 # Create the Gradio Interface
 with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
+    gr.Markdown("# **[OCR Net 4x](https://huggingface.co/collections/prithivMLmods/multimodal-implementations-67c9982ea04b39f0608badb0)**")
     with gr.Row():
         with gr.Column():
             with gr.Tabs():
                     image_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
                     image_upload = gr.Image(type="pil", label="Image")
                     image_submit = gr.Button("Submit", elem_classes="submit-btn")
+                    gr.Examples(
+                        examples=image_examples,
+                        inputs=[image_query, image_upload, model_choice]
+                    )
                 with gr.TabItem("Video Inference"):
                     video_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
                     video_upload = gr.Video(label="Video")
                     video_submit = gr.Button("Submit", elem_classes="submit-btn")
+                    gr.Examples(
+                        examples=video_examples,
+                        inputs=[video_query, video_upload]
+                    )
             with gr.Accordion("Advanced options", open=False):
                 max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
                 temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.6)
                 top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
                 top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
                 repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2)
         with gr.Column():
+            output = gr.Textbox(label="Output", interactive=False, lines=3, scale=2)
             model_choice = gr.Radio(
+                choices=["Nanonets-OCR-s", "SmolDocling-256M-preview", "MonkeyOCR-Recognition"],
                 label="Select Model",
                 value="Nanonets-OCR-s"
             )
     image_submit.click(
         fn=generate_image,
         inputs=[model_choice, image_query, image_upload, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
     )
 if __name__ == "__main__":
+    demo.queue(max_size=30).launch(share=True, mcp_server=True, ssr_mode=False, show_error=True)