Navyabhat commited on
Commit
65f2058
Β·
verified Β·
1 Parent(s): 61da4c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -8
app.py CHANGED
@@ -1,3 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from PIL import Image
3
  from inference.main import MultiModalPhi2
@@ -96,20 +204,39 @@ with gr.Blocks() as demo:
96
  txt = gr.Textbox(
97
  scale=4,
98
  show_label=False,
99
- placeholder="Enter text and press enter, or upload an image",
100
  container=False,
101
  )
102
- btn = gr.UploadButton("πŸ“", file_types=["image", "video", "audio"])
 
 
 
 
 
103
 
104
- txt_msg = txt.submit(add_content, [chatbot, txt, None, None], [chatbot, txt, None, None], queue=False).then(
105
- run, [chatbot, txt, None, None], [chatbot, txt, None, None], api_name="bot_response"
106
  )
107
- txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
108
- file_msg = btn.upload(add_content, [chatbot, None, None, btn], [chatbot, None, None, None], queue=False).then(
109
- run, [chatbot, None, None, btn], [chatbot, None, None, None]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  )
111
 
112
  # chatbot.like(print_like_dislike, None, None)
113
 
114
- demo.queue()
115
  demo.launch()
 
1
+ import gradio as gr
2
+ from PIL import Image
3
+ from inference.main import MultiModalPhi2
4
+
5
+ messages = []
6
+
7
+ multimodal_phi2 = MultiModalPhi2(
8
+ modelname_or_path="Navyabhat/Llava-Phi2",
9
+ temperature=0.2,
10
+ max_new_tokens=1024,
11
+ device="cpu",
12
+ )
13
+
14
+
15
+ def add_content(chatbot, input_data, input_type) -> gr.Chatbot:
16
+ textflag, imageflag, audioflag = False, False, False
17
+ if input_type == "text":
18
+ chatbot.append((text, None))
19
+ textflag = True
20
+ if input_type == "image":
21
+ chatbot.append(((image,), None))
22
+ imageflag = True
23
+ if input_type == "audio":
24
+ chatbot.append(((audio_mic,), None))
25
+ audioflag = True
26
+ # else:
27
+ # if audio_upload is not None:
28
+ # chatbot.append(((audio_upload,), None))
29
+ # audioflag = True
30
+ if not any([textflag, imageflag, audioflag]):
31
+ # Raise an error if neither text nor file is provided
32
+ raise gr.Error("Enter a valid text, image or audio")
33
+ return chatbot
34
+
35
+
36
+ def clear_data():
37
+ return {prompt: None, image: None, audio_upload: None, audio_mic: None, chatbot: []}
38
+
39
+
40
+ def run(history, text, image, audio_upload, audio_mic):
41
+ if text in [None, ""]:
42
+ text = None
43
+
44
+ if audio_upload is not None:
45
+ audio = audio_upload
46
+ elif audio_mic is not None:
47
+ audio = audio_mic
48
+ else:
49
+ audio = None
50
+
51
+ print("text", text)
52
+ print("image", image)
53
+ print("audio", audio)
54
+
55
+ if image is not None:
56
+ image = Image.open(image)
57
+ outputs = multimodal_phi2(text, audio, image)
58
+ # outputs = ""
59
+
60
+ history.append((None, outputs.title()))
61
+ return history, None, None, None, None
62
+
63
+
64
+ with gr.Blocks() as demo:
65
+ chatbot = gr.Chatbot(
66
+ [],
67
+ elem_id="chatbot",
68
+ bubble_full_width=False,
69
+ avatar_images=(None, (os.path.join(os.path.dirname(__file__), "avatar.png"))),
70
+ )
71
+
72
+ with gr.Row():
73
+ txt = gr.Textbox(
74
+ scale=4,
75
+ show_label=False,
76
+ placeholder="Enter text and press enter",
77
+ container=False,
78
+ )
79
+ img_audio = gr.UploadButton("πŸ“", file_types=["image", "audio"], label="Upload Image or Audio")
80
+
81
+ txt_msg = txt.submit(add_content, [chatbot, txt], [chatbot, txt, "text"], queue=False).then(
82
+ bot, chatbot, chatbot, api_name="bot_response"
83
+ )
84
+ img_audio_msg = img_audio.upload(add_input, [chatbot, img_audio], [chatbot, "image"], queue=False).then(
85
+ bot, chatbot, chatbot
86
+ )
87
+
88
+ # chatbot.like(print_like_dislike, None, None)
89
+
90
+
91
+ submit.click(
92
+ add_content,
93
+ inputs=[chatbot, prompt, image, audio_upload, audio_mic],
94
+ outputs=[chatbot],
95
+ ).success(
96
+ run,
97
+ inputs=[chatbot, prompt, image, audio_upload, audio_mic],
98
+ outputs=[chatbot, prompt, image, audio_upload, audio_mic],
99
+ )
100
+
101
+ clear.click(
102
+ clear_data,
103
+ outputs=[prompt, image, audio_upload, audio_mic, chatbot],
104
+ )
105
+
106
+ demo.launch()
107
+
108
+
109
  import gradio as gr
110
  from PIL import Image
111
  from inference.main import MultiModalPhi2
 
204
  txt = gr.Textbox(
205
  scale=4,
206
  show_label=False,
207
+ placeholder="Enter text and press enter",
208
  container=False,
209
  )
210
+ img_audio = gr.UploadButton("πŸ“", file_types=["image", "audio"], label="Upload Image or Audio")
211
+
212
+ with gr.Row():
213
+ # Adding a Button
214
+ submit = gr.Button()
215
+ clear = gr.Button(value="Clear")
216
 
217
+ txt_msg = txt.submit(add_input, [chatbot, txt], [chatbot, txt, "text"], queue=False).then(
218
+ bot, chatbot, chatbot, api_name="bot_response"
219
  )
220
+ img_audio_msg = img_audio.upload(add_input, [chatbot, img_audio], [chatbot, "image"], queue=False).then(
221
+ bot, chatbot, chatbot
222
+ )
223
+
224
+ # submit.click(
225
+ # add_content,
226
+ # inputs=[chatbot, txt, image, audio_upload, audio_mic],
227
+ # outputs=[chatbot],
228
+ # ).success(
229
+ # run,
230
+ # inputs=[chatbot, txt, image, audio_upload, audio_mic],
231
+ # outputs=[chatbot, txt, image, audio_upload, audio_mic],
232
+ # )
233
+
234
+ clear.click(
235
+ clear_data,
236
+ outputs=[prompt, image, audio_upload, audio_mic, chatbot],
237
  )
238
 
239
  # chatbot.like(print_like_dislike, None, None)
240
 
241
+ # demo.queue()
242
  demo.launch()