Spaces:
Runtime error
Runtime error
File size: 4,633 Bytes
efe75b3 8008efd dafd76d efe75b3 fb4c1f6 efe75b3 dafd76d e5701a3 9c868d5 f2e6a02 9c868d5 efe75b3 f2e6a02 9c868d5 0a8b3fd 47e7f67 b8a7ca9 0d6943c 2722cbc efe75b3 2722cbc 1cf7891 410fca0 2722cbc 1cf7891 2722cbc e2f50d5 f2e6a02 2722cbc cbe6641 2722cbc a85cefc 8cd024c efe75b3 f2e6a02 efe75b3 f2e6a02 efe75b3 f2e6a02 efe75b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import gradio as gr
from PIL import Image
from inference.main import MultiModalPhi2
# gr.themes.builder()
messages = []
multimodal_phi2 = MultiModalPhi2(
modelname_or_path="GunaKoppula/Llava-Phi2",
temperature=0.2,
max_new_tokens=1024,
device="cpu",
)
def add_content(chatbot, text, image, audio_upload, audio_mic) -> gr.Chatbot:
textflag, imageflag, audioflag = False, False, False
if text not in ["", None]:
chatbot.append((text, None))
textflag = True
if image is not None:
chatbot.append(((image,), None))
imageflag = True
if audio_mic is not None:
chatbot.append(((audio_mic,), None))
audioflag = True
else:
if audio_upload is not None:
chatbot.append(((audio_upload,), None))
audioflag = True
if not any([textflag, imageflag, audioflag]):
# Raise an error if neither text nor file is provided
raise gr.Error("Enter a valid text, image or audio")
return chatbot
def clear_data():
return {prompt: None, image: None, audio_upload: None, audio_mic: None, chatbot: []}
def run(history, text, image, audio_upload, audio_mic):
if text in [None, ""]:
text = None
if audio_upload is not None:
audio = audio_upload
elif audio_mic is not None:
audio = audio_mic
else:
audio = None
print("text", text)
print("image", image)
print("audio", audio)
if image is not None:
image = Image.open(image)
outputs = multimodal_phi2(text, audio, image)
# outputs = ""
history.append((None, outputs.title()))
return history, None, None, None, None
with gr.Blocks(theme='upsatwal/mlsc_tiet') as demo:
with gr.Row() as title_row:
with gr.Column():
# Create an empty column on the left for spacing
pass
with gr.Column():
# Add Markdown with centered text
gr.Markdown("## MulitModal Phi2 Model Pretraining and Finetuning from Scratch")
gr.Markdown("This is a multimodal implementation of [Phi2](https://huggingface.co/microsoft/phi-2) model. Please find the source code and training details [here](https://github.com/RaviNaik/ERA-CAPSTONE/MultiModalPhi2).")
# with gr.Column():
# # Create an empty column on the right for spacing
# pass
with gr.Row():
chatbot = gr.Chatbot(
avatar_images=("π§", "π€"),
height=550,
)
with gr.Row():
with gr.Column(scale=4):
# Creating a column with a scale of 6
with gr.Box():
with gr.Column():
with gr.Row():
# Adding a Textbox with a placeholder "write prompt"
prompt = gr.Textbox(
placeholder="Enter Prompt", lines=2, label="Query", value=None
)
# Creating a column with a scale of 2
with gr.Row():
# Adding image
image = gr.Image(type="filepath", value=None)
# Creating a column with a scale of 2
with gr.Row():
# Add audio
# audio_upload = gr.Audio(source="upload", type="filepath")
# audio_mic = gr.Audio(source="microphone", type="filepath", format="mp3")
audio_upload = gr.Audio(type="filepath")
audio_mic = gr.Microphone(source="microphone", type="filepath", format="mp3")
# with gr.Column():
# Adding a Button
with gr.Row():
submit = gr.Button()
with gr.Row():
clear = gr.Button(value="Clear")
submit.click(
add_content,
inputs=[chatbot, prompt, image, audio_upload, audio_mic],
# inputs=[chatbot, prompt, image, audio_upload],
outputs=[chatbot],
).success(
run,
inputs=[chatbot, prompt, image, audio_upload, audio_mic],
# inputs=[chatbot, prompt, image, audio_upload],
outputs=[chatbot, prompt, image, audio_upload, audio_mic],
# outputs=[chatbot, prompt, image, audio_upload],
)
clear.click(
clear_data,
outputs=[prompt, image, audio_upload, audio_mic, chatbot],
# outputs=[prompt, image, audio_upload, chatbot],
)
demo.launch()
|