Spaces:
Sleeping
Sleeping
import gradio as gr | |
from PIL import Image | |
from inference.main import MultiModalPhi2 | |
# from __future__ import annotations | |
from typing import Iterable | |
import gradio as gr | |
from gradio.themes.base import Base | |
from gradio.themes.utils import colors, fonts, sizes | |
import time | |
class Seafoam(Base): | |
def __init__( | |
self, | |
*, | |
primary_hue: colors.Color | str = colors.emerald, | |
secondary_hue: colors.Color | str = colors.blue, | |
neutral_hue: colors.Color | str = colors.gray, | |
spacing_size: sizes.Size | str = sizes.spacing_md, | |
radius_size: sizes.Size | str = sizes.radius_md, | |
text_size: sizes.Size | str = sizes.text_lg, | |
font: fonts.Font | |
| str | |
| Iterable[fonts.Font | str] = ( | |
fonts.GoogleFont("Quicksand"), | |
"ui-sans-serif", | |
"sans-serif", | |
), | |
font_mono: fonts.Font | |
| str | |
| Iterable[fonts.Font | str] = ( | |
fonts.GoogleFont("IBM Plex Mono"), | |
"ui-monospace", | |
"monospace", | |
), | |
): | |
super().__init__( | |
primary_hue=primary_hue, | |
secondary_hue=secondary_hue, | |
neutral_hue=neutral_hue, | |
spacing_size=spacing_size, | |
radius_size=radius_size, | |
text_size=text_size, | |
font=font, | |
font_mono=font_mono, | |
) | |
seafoam = Seafoam() | |
messages = [] | |
multimodal_phi2 = MultiModalPhi2( | |
modelname_or_path="Navyabhat/Llava-Phi2", | |
temperature=0.2, | |
max_new_tokens=1024, | |
device="cpu", | |
) | |
def add_content(chatbot, text, image, audio_upload, audio_mic) -> gr.Chatbot: | |
textflag, imageflag, audioflag = False, False, False | |
if text not in ["", None]: | |
chatbot.append((text, None)) | |
textflag = True | |
if image is not None: | |
chatbot.append(((image,), None)) | |
imageflag = True | |
if audio_mic is not None: | |
chatbot.append(((audio_mic,), None)) | |
audioflag = True | |
else: | |
if audio_upload is not None: | |
chatbot.append(((audio_upload,), None)) | |
audioflag = True | |
if not any([textflag, imageflag, audioflag]): | |
# Raise an error if neither text nor file is provided | |
raise gr.Error("Enter a valid text, image or audio") | |
return chatbot | |
def clear_data(): | |
return {prompt: None, image: None, audio_upload: None, audio_mic: None, chatbot: []} | |
def run(history, text, image, audio_upload, audio_mic): | |
if text in [None, ""]: | |
text = None | |
if audio_upload is not None: | |
audio = audio_upload | |
elif audio_mic is not None: | |
audio = audio_mic | |
else: | |
audio = None | |
print("text", text) | |
print("image", image) | |
print("audio", audio) | |
if image is not None: | |
image = Image.open(image) | |
outputs = multimodal_phi2(text, audio, image) | |
# outputs = "" | |
history.append((None, outputs.title())) | |
return history, None, None, None, None | |
with gr.Blocks(theme=seafoam) as demo: | |
gr.Markdown("## MulitModal Phi2 Model Pretraining and Finetuning from Scratch") | |
with gr.Row(): | |
chatbot = gr.Chatbot( | |
avatar_images=("🧑", "🤖"), | |
height=350, | |
) | |
with gr.Row(): | |
# Adding a Textbox with a placeholder "write prompt" | |
prompt = gr.Textbox( | |
placeholder="Enter text, or upload an image or audio", lines=2, label="Query", value=None, scale = 4 | |
) | |
image = gr.Image(type="filepath", value=None, label = "Upload Image") | |
audio_upload = gr.Audio(source="upload", type="filepath", label="Upload audio") | |
# file_output = gr.File() | |
# upload_button = gr.UploadButton("Click to Upload a File", file_types=["image", "audio"], file_count="multiple") | |
# image = upload_button.upload(upload_file, upload_button, file_output) | |
# audio_upload = image | |
audio_mic = gr.Audio( | |
source="microphone", type="filepath", format="mp3" | |
) | |
with gr.Row(): | |
# Adding a Button | |
submit = gr.Button(value = "Submit", variant="primary") | |
clear = gr.Button(value="Clear") | |
submit.click( | |
add_content, | |
inputs=[chatbot, prompt, image, audio_upload, audio_mic], | |
outputs=[chatbot], | |
).success( | |
run, | |
inputs=[chatbot, prompt, image, audio_upload, audio_mic], | |
outputs=[chatbot, prompt, image, audio_upload, audio_mic], | |
) | |
clear.click( | |
clear_data, | |
outputs=[prompt, image, audio_upload, audio_mic, chatbot], | |
) | |
demo.launch() |