GunaKoppula commited on
Commit
f2e6a02
Β·
verified Β·
1 Parent(s): 0a8b3fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -46
app.py CHANGED
@@ -71,47 +71,43 @@ theme = gr.themes.Default(primary_hue="blue").set(
71
 
72
  with gr.Blocks(theme='upsatwal/mlsc_tiet') as demo:
73
 
74
- with gr.Row():
75
-
76
- gr.Markdown("## MulitModal Phi2 Model Pretraining and Finetuning from Scratch")
77
-
78
- # with gr.Row() as title_row:
79
- # with gr.Column():
80
- # # Create an empty column on the left for spacing
81
- # pass
82
-
83
- # with gr.Column():
84
- # # Add Markdown with centered text
85
- # gr.Markdown("## MulitModal Phi2 Model Pretraining and Finetuning from Scratch")
86
- # gr.Markdown("This is a multimodal implementation of [Phi2](https://huggingface.co/microsoft/phi-2) model. Please find the source code and training details [here](https://github.com/RaviNaik/ERA-CAPSTONE/MultiModalPhi2).")
87
-
88
 
89
- # # with gr.Column():
90
- # # # Create an empty column on the right for spacing
91
- # # pass
 
92
 
93
- with gr.Row():
 
94
 
95
- gr.Markdown(
96
- """This is a multimodal implementation of [Phi2](https://huggingface.co/microsoft/phi-2) model. Please find the source code and training details [here](https://github.com/RaviNaik/ERA-CAPSTONE/MultiModalPhi2).
97
-
98
- ### Details:
99
- 1. LLM Backbone: [Phi2](https://huggingface.co/microsoft/phi-2)
100
- 2. Vision Tower: [clip-vit-large-patch14-336](https://huggingface.co/openai/clip-vit-large-patch14-336)
101
- 3. Audio Model: [Whisper Tiny](https://huggingface.co/openai/whisper-tiny)
102
- 4. Pretraining Dataset: [LAION-CC-SBU dataset with BLIP captions(200k samples)](https://huggingface.co/datasets/liuhaotian/LLaVA-Pretrain)
103
- 5. Finetuning Dataset: [Instruct 150k dataset based on COCO](https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K)
104
- 6. Finetuned Model: [GunaKoppula/Llava-Phi2](https://huggingface.co/GunaKoppula/Llava-Phi2)
105
- """
106
- )
107
-
108
- # with gr.Row(scale=8):
109
- # with gr.Box():
110
- # with gr.Row():
111
- # chatbot = gr.Chatbot(
112
- # avatar_images=("πŸ§‘", "πŸ€–"),
113
- # height=550,
114
- # )
115
 
116
  with gr.Row():
117
  chatbot = gr.Chatbot(
@@ -143,6 +139,8 @@ with gr.Blocks(theme='upsatwal/mlsc_tiet') as demo:
143
  # audio_upload = gr.Audio(source="upload", type="filepath")
144
  # audio_mic = gr.Audio(source="microphone", type="filepath", format="mp3")
145
  audio_upload = gr.Audio(type="filepath")
 
 
146
 
147
  # with gr.Column():
148
  # Adding a Button
@@ -153,21 +151,21 @@ with gr.Blocks(theme='upsatwal/mlsc_tiet') as demo:
153
 
154
  submit.click(
155
  add_content,
156
- # inputs=[chatbot, prompt, image, audio_upload, audio_mic],
157
- inputs=[chatbot, prompt, image, audio_upload],
158
  outputs=[chatbot],
159
  ).success(
160
  run,
161
- # inputs=[chatbot, prompt, image, audio_upload, audio_mic],
162
- inputs=[chatbot, prompt, image, audio_upload],
163
- # outputs=[chatbot, prompt, image, audio_upload, audio_mic],
164
- outputs=[chatbot, prompt, image, audio_upload],
165
  )
166
 
167
  clear.click(
168
  clear_data,
169
- # outputs=[prompt, image, audio_upload, audio_mic, chatbot],
170
- outputs=[prompt, image, audio_upload, chatbot],
171
  )
172
 
173
  demo.launch()
 
71
 
72
  with gr.Blocks(theme='upsatwal/mlsc_tiet') as demo:
73
 
74
+ gr.Markdown("## MulitModal Phi2 Model Pretraining and Finetuning from Scratch")
75
+
76
+ # with gr.Row() as title_row:
77
+ # with gr.Column():
78
+ # # Create an empty column on the left for spacing
79
+ # pass
80
+
81
+ # with gr.Column():
82
+ # # Add Markdown with centered text
83
+ # gr.Markdown("## MulitModal Phi2 Model Pretraining and Finetuning from Scratch")
84
+ # gr.Markdown("This is a multimodal implementation of [Phi2](https://huggingface.co/microsoft/phi-2) model. Please find the source code and training details [here](https://github.com/RaviNaik/ERA-CAPSTONE/MultiModalPhi2).")
 
 
 
85
 
86
+
87
+ # # with gr.Column():
88
+ # # # Create an empty column on the right for spacing
89
+ # # pass
90
 
91
+ gr.Markdown(
92
+ """This is a multimodal implementation of [Phi2](https://huggingface.co/microsoft/phi-2) model. Please find the source code and training details [here](https://github.com/RaviNaik/ERA-CAPSTONE/MultiModalPhi2).
93
 
94
+ ### Details:
95
+ 1. LLM Backbone: [Phi2](https://huggingface.co/microsoft/phi-2)
96
+ 2. Vision Tower: [clip-vit-large-patch14-336](https://huggingface.co/openai/clip-vit-large-patch14-336)
97
+ 3. Audio Model: [Whisper Tiny](https://huggingface.co/openai/whisper-tiny)
98
+ 4. Pretraining Dataset: [LAION-CC-SBU dataset with BLIP captions(200k samples)](https://huggingface.co/datasets/liuhaotian/LLaVA-Pretrain)
99
+ 5. Finetuning Dataset: [Instruct 150k dataset based on COCO](https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K)
100
+ 6. Finetuned Model: [GunaKoppula/Llava-Phi2](https://huggingface.co/GunaKoppula/Llava-Phi2)
101
+ """
102
+ )
103
+
104
+ # with gr.Row(scale=8):
105
+ # with gr.Box():
106
+ # with gr.Row():
107
+ # chatbot = gr.Chatbot(
108
+ # avatar_images=("πŸ§‘", "πŸ€–"),
109
+ # height=550,
110
+ # )
 
 
 
111
 
112
  with gr.Row():
113
  chatbot = gr.Chatbot(
 
139
  # audio_upload = gr.Audio(source="upload", type="filepath")
140
  # audio_mic = gr.Audio(source="microphone", type="filepath", format="mp3")
141
  audio_upload = gr.Audio(type="filepath")
142
+ audio_mic = gr.Microphone(source="microphone", type="filepath", format="mp3")
143
+
144
 
145
  # with gr.Column():
146
  # Adding a Button
 
151
 
152
  submit.click(
153
  add_content,
154
+ inputs=[chatbot, prompt, image, audio_upload, audio_mic],
155
+ # inputs=[chatbot, prompt, image, audio_upload],
156
  outputs=[chatbot],
157
  ).success(
158
  run,
159
+ inputs=[chatbot, prompt, image, audio_upload, audio_mic],
160
+ # inputs=[chatbot, prompt, image, audio_upload],
161
+ outputs=[chatbot, prompt, image, audio_upload, audio_mic],
162
+ # outputs=[chatbot, prompt, image, audio_upload],
163
  )
164
 
165
  clear.click(
166
  clear_data,
167
+ outputs=[prompt, image, audio_upload, audio_mic, chatbot],
168
+ # outputs=[prompt, image, audio_upload, chatbot],
169
  )
170
 
171
  demo.launch()