Spaces:

GunaKoppula
/

MultiModal-Phi2

Runtime error

App Files Files Community

GunaKoppula commited on Jan 28, 2024

Commit

f2e6a02

verified ·

1 Parent(s): 0a8b3fd

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -46

app.py CHANGED Viewed

@@ -71,47 +71,43 @@ theme = gr.themes.Default(primary_hue="blue").set(
 with gr.Blocks(theme='upsatwal/mlsc_tiet') as demo:
-    with gr.Row():
-        gr.Markdown("## MulitModal Phi2 Model Pretraining and Finetuning from Scratch")
-        # with gr.Row() as title_row:
-        #     with gr.Column():
-        #         # Create an empty column on the left for spacing
-        #         pass
-        #     with gr.Column():
-        #         # Add Markdown with centered text
-        #         gr.Markdown("## MulitModal Phi2 Model Pretraining and Finetuning from Scratch")
-        #         gr.Markdown("This is a multimodal implementation of [Phi2](https://huggingface.co/microsoft/phi-2) model. Please find the source code and training details [here](https://github.com/RaviNaik/ERA-CAPSTONE/MultiModalPhi2).")
-        #     # with gr.Column():
-        #     #     # Create an empty column on the right for spacing
-        #     #     pass
-    with gr.Row():
-        gr.Markdown(
-            """This is a multimodal implementation of [Phi2](https://huggingface.co/microsoft/phi-2) model. Please find the source code and training details [here](https://github.com/RaviNaik/ERA-CAPSTONE/MultiModalPhi2).
-            ### Details:
-            1. LLM Backbone: [Phi2](https://huggingface.co/microsoft/phi-2)
-            2. Vision Tower: [clip-vit-large-patch14-336](https://huggingface.co/openai/clip-vit-large-patch14-336)
-            3. Audio Model: [Whisper Tiny](https://huggingface.co/openai/whisper-tiny)
-            4. Pretraining Dataset: [LAION-CC-SBU dataset with BLIP captions(200k samples)](https://huggingface.co/datasets/liuhaotian/LLaVA-Pretrain)
-            5. Finetuning Dataset: [Instruct 150k dataset based on COCO](https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K)
-            6. Finetuned Model: [GunaKoppula/Llava-Phi2](https://huggingface.co/GunaKoppula/Llava-Phi2)
-            """
-        )
-        # with gr.Row(scale=8):
-        #     with gr.Box():
-        #         with gr.Row():
-        #             chatbot = gr.Chatbot(
-        #                 avatar_images=("🧑", "🤖"),
-        #                 height=550,
-        #             )
     with gr.Row():
         chatbot = gr.Chatbot(
@@ -143,6 +139,8 @@ with gr.Blocks(theme='upsatwal/mlsc_tiet') as demo:
                         # audio_upload = gr.Audio(source="upload", type="filepath")
                         # audio_mic = gr.Audio(source="microphone", type="filepath", format="mp3")
                         audio_upload = gr.Audio(type="filepath")
                 # with gr.Column():
                     # Adding a Button
@@ -153,21 +151,21 @@ with gr.Blocks(theme='upsatwal/mlsc_tiet') as demo:
     submit.click(
         add_content,
-        # inputs=[chatbot, prompt, image, audio_upload, audio_mic],
-        inputs=[chatbot, prompt, image, audio_upload],
         outputs=[chatbot],
     ).success(
         run,
-        # inputs=[chatbot, prompt, image, audio_upload, audio_mic],
-        inputs=[chatbot, prompt, image, audio_upload],
-        # outputs=[chatbot, prompt, image, audio_upload, audio_mic],
-        outputs=[chatbot, prompt, image, audio_upload],
     )
     clear.click(
         clear_data,
-        # outputs=[prompt, image, audio_upload, audio_mic, chatbot],
-        outputs=[prompt, image, audio_upload, chatbot],
     )
 demo.launch()

 with gr.Blocks(theme='upsatwal/mlsc_tiet') as demo:
+    gr.Markdown("##                       MulitModal Phi2 Model Pretraining and Finetuning from Scratch")
+    # with gr.Row() as title_row:
+    #     with gr.Column():
+    #         # Create an empty column on the left for spacing
+    #         pass
+    #     with gr.Column():
+    #         # Add Markdown with centered text
+    #         gr.Markdown("## MulitModal Phi2 Model Pretraining and Finetuning from Scratch")
+    #         gr.Markdown("This is a multimodal implementation of [Phi2](https://huggingface.co/microsoft/phi-2) model. Please find the source code and training details [here](https://github.com/RaviNaik/ERA-CAPSTONE/MultiModalPhi2).")
+    #     # with gr.Column():
+    #     #     # Create an empty column on the right for spacing
+    #     #     pass
+    gr.Markdown(
+        """This is a multimodal implementation of [Phi2](https://huggingface.co/microsoft/phi-2) model. Please find the source code and training details [here](https://github.com/RaviNaik/ERA-CAPSTONE/MultiModalPhi2).
+        ### Details:
+        1. LLM Backbone: [Phi2](https://huggingface.co/microsoft/phi-2)
+        2. Vision Tower: [clip-vit-large-patch14-336](https://huggingface.co/openai/clip-vit-large-patch14-336)
+        3. Audio Model: [Whisper Tiny](https://huggingface.co/openai/whisper-tiny)
+        4. Pretraining Dataset: [LAION-CC-SBU dataset with BLIP captions(200k samples)](https://huggingface.co/datasets/liuhaotian/LLaVA-Pretrain)
+        5. Finetuning Dataset: [Instruct 150k dataset based on COCO](https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K)
+        6. Finetuned Model: [GunaKoppula/Llava-Phi2](https://huggingface.co/GunaKoppula/Llava-Phi2)
+        """
+    )
+    # with gr.Row(scale=8):
+    #     with gr.Box():
+    #         with gr.Row():
+    #             chatbot = gr.Chatbot(
+    #                 avatar_images=("🧑", "🤖"),
+    #                 height=550,
+    #             )
     with gr.Row():
         chatbot = gr.Chatbot(
                         # audio_upload = gr.Audio(source="upload", type="filepath")
                         # audio_mic = gr.Audio(source="microphone", type="filepath", format="mp3")
                         audio_upload = gr.Audio(type="filepath")
+                        audio_mic = gr.Microphone(source="microphone", type="filepath", format="mp3")
                 # with gr.Column():
                     # Adding a Button
     submit.click(
         add_content,
+        inputs=[chatbot, prompt, image, audio_upload, audio_mic],
+        # inputs=[chatbot, prompt, image, audio_upload],
         outputs=[chatbot],
     ).success(
         run,
+        inputs=[chatbot, prompt, image, audio_upload, audio_mic],
+        # inputs=[chatbot, prompt, image, audio_upload],
+        outputs=[chatbot, prompt, image, audio_upload, audio_mic],
+        # outputs=[chatbot, prompt, image, audio_upload],
     )
     clear.click(
         clear_data,
+        outputs=[prompt, image, audio_upload, audio_mic, chatbot],
+        # outputs=[prompt, image, audio_upload, chatbot],
     )
 demo.launch()