Navyabhat's picture
Update app.py
65f2058 verified
raw
history blame
6.56 kB
import gradio as gr
from PIL import Image
from inference.main import MultiModalPhi2
messages = []
multimodal_phi2 = MultiModalPhi2(
modelname_or_path="Navyabhat/Llava-Phi2",
temperature=0.2,
max_new_tokens=1024,
device="cpu",
)
def add_content(chatbot, input_data, input_type) -> gr.Chatbot:
textflag, imageflag, audioflag = False, False, False
if input_type == "text":
chatbot.append((text, None))
textflag = True
if input_type == "image":
chatbot.append(((image,), None))
imageflag = True
if input_type == "audio":
chatbot.append(((audio_mic,), None))
audioflag = True
# else:
# if audio_upload is not None:
# chatbot.append(((audio_upload,), None))
# audioflag = True
if not any([textflag, imageflag, audioflag]):
# Raise an error if neither text nor file is provided
raise gr.Error("Enter a valid text, image or audio")
return chatbot
def clear_data():
return {prompt: None, image: None, audio_upload: None, audio_mic: None, chatbot: []}
def run(history, text, image, audio_upload, audio_mic):
if text in [None, ""]:
text = None
if audio_upload is not None:
audio = audio_upload
elif audio_mic is not None:
audio = audio_mic
else:
audio = None
print("text", text)
print("image", image)
print("audio", audio)
if image is not None:
image = Image.open(image)
outputs = multimodal_phi2(text, audio, image)
# outputs = ""
history.append((None, outputs.title()))
return history, None, None, None, None
with gr.Blocks() as demo:
chatbot = gr.Chatbot(
[],
elem_id="chatbot",
bubble_full_width=False,
avatar_images=(None, (os.path.join(os.path.dirname(__file__), "avatar.png"))),
)
with gr.Row():
txt = gr.Textbox(
scale=4,
show_label=False,
placeholder="Enter text and press enter",
container=False,
)
img_audio = gr.UploadButton("πŸ“", file_types=["image", "audio"], label="Upload Image or Audio")
txt_msg = txt.submit(add_content, [chatbot, txt], [chatbot, txt, "text"], queue=False).then(
bot, chatbot, chatbot, api_name="bot_response"
)
img_audio_msg = img_audio.upload(add_input, [chatbot, img_audio], [chatbot, "image"], queue=False).then(
bot, chatbot, chatbot
)
# chatbot.like(print_like_dislike, None, None)
submit.click(
add_content,
inputs=[chatbot, prompt, image, audio_upload, audio_mic],
outputs=[chatbot],
).success(
run,
inputs=[chatbot, prompt, image, audio_upload, audio_mic],
outputs=[chatbot, prompt, image, audio_upload, audio_mic],
)
clear.click(
clear_data,
outputs=[prompt, image, audio_upload, audio_mic, chatbot],
)
demo.launch()
import gradio as gr
from PIL import Image
from inference.main import MultiModalPhi2
import os
messages = []
multimodal_phi2 = MultiModalPhi2(
modelname_or_path="Navyabhat/Llava-Phi2",
temperature=0.2,
max_new_tokens=1024,
device="cpu",
)
def add_content(chatbot, text, image, audio_upload, audio_mic) -> gr.Chatbot:
textflag, imageflag, audioflag = False, False, False
if text not in ["", None]:
chatbot.append((text, None))
textflag = True
if image is not None:
chatbot.append(((image,), None))
imageflag = True
if audio_mic is not None:
chatbot.append(((audio_mic,), None))
audioflag = True
else:
if audio_upload is not None:
chatbot.append(((audio_upload,), None))
audioflag = True
if not any([textflag, imageflag, audioflag]):
# Raise an error if neither text nor file is provided
raise gr.Error("Enter a valid text, image or audio")
return chatbot
def clear_data():
return {"text": None, "image": None, "audio_upload": None, "audio_mic": None, "chatbot": []}
def run(history, text, image, audio_upload, audio_mic):
if text in [None, ""]:
text = None
if audio_upload is not None:
audio = audio_upload
elif audio_mic is not None:
audio = audio_mic
else:
audio = None
print("text", text)
print("image", image)
print("audio", audio)
if image is not None:
image = Image.open(image)
outputs = multimodal_phi2(text, audio, image)
history.append((None, outputs.title()))
return history, None, None, None, None
# def print_like_dislike(x: gr.LikeData):
# print(x.index, x.value, x.liked)
def add_text(history, text):
history = history + [(text, None)]
return history, gr.Textbox(value="", interactive=False)
def add_file(history, file):
history = history + [((file.name,), None)]
return history
def bot(history):
response = "**That's cool!**"
history[-1][1] = ""
for character in response:
history[-1][1] += character
time.sleep(0.05)
yield history
with gr.Blocks() as demo:
chatbot = gr.Chatbot(
[],
elem_id="chatbot",
bubble_full_width=False,
avatar_images=(None, (os.path.join(os.path.dirname(__file__), "avatar.png"))),
)
with gr.Row():
txt = gr.Textbox(
scale=4,
show_label=False,
placeholder="Enter text and press enter",
container=False,
)
img_audio = gr.UploadButton("πŸ“", file_types=["image", "audio"], label="Upload Image or Audio")
with gr.Row():
# Adding a Button
submit = gr.Button()
clear = gr.Button(value="Clear")
txt_msg = txt.submit(add_input, [chatbot, txt], [chatbot, txt, "text"], queue=False).then(
bot, chatbot, chatbot, api_name="bot_response"
)
img_audio_msg = img_audio.upload(add_input, [chatbot, img_audio], [chatbot, "image"], queue=False).then(
bot, chatbot, chatbot
)
# submit.click(
# add_content,
# inputs=[chatbot, txt, image, audio_upload, audio_mic],
# outputs=[chatbot],
# ).success(
# run,
# inputs=[chatbot, txt, image, audio_upload, audio_mic],
# outputs=[chatbot, txt, image, audio_upload, audio_mic],
# )
clear.click(
clear_data,
outputs=[prompt, image, audio_upload, audio_mic, chatbot],
)
# chatbot.like(print_like_dislike, None, None)
# demo.queue()
demo.launch()