import gradio as gr from PIL import Image from inference.main import MultiModalPhi2 messages = [] multimodal_phi2 = MultiModalPhi2( modelname_or_path="Navyabhat/Llava-Phi2", temperature=0.2, max_new_tokens=1024, device="cpu", ) def add_content(chatbot, input_data, input_type) -> gr.Chatbot: textflag, imageflag, audioflag = False, False, False if input_type == "text": chatbot.append((text, None)) textflag = True if input_type == "image": chatbot.append(((image,), None)) imageflag = True if input_type == "audio": chatbot.append(((audio_mic,), None)) audioflag = True # else: # if audio_upload is not None: # chatbot.append(((audio_upload,), None)) # audioflag = True if not any([textflag, imageflag, audioflag]): # Raise an error if neither text nor file is provided raise gr.Error("Enter a valid text, image or audio") return chatbot def clear_data(): return {prompt: None, image: None, audio_upload: None, audio_mic: None, chatbot: []} def run(history, text, image, audio_upload, audio_mic): if text in [None, ""]: text = None if audio_upload is not None: audio = audio_upload elif audio_mic is not None: audio = audio_mic else: audio = None print("text", text) print("image", image) print("audio", audio) if image is not None: image = Image.open(image) outputs = multimodal_phi2(text, audio, image) # outputs = "" history.append((None, outputs.title())) return history, None, None, None, None with gr.Blocks() as demo: chatbot = gr.Chatbot( [], elem_id="chatbot", bubble_full_width=False, avatar_images=(None, (os.path.join(os.path.dirname(__file__), "avatar.png"))), ) with gr.Row(): txt = gr.Textbox( scale=4, show_label=False, placeholder="Enter text and press enter", container=False, ) img_audio = gr.UploadButton("📁", file_types=["image", "audio"], label="Upload Image or Audio") txt_msg = txt.submit(add_content, [chatbot, txt], [chatbot, txt, "text"], queue=False).then( bot, chatbot, chatbot, api_name="bot_response" ) img_audio_msg = img_audio.upload(add_input, [chatbot, img_audio], [chatbot, "image"], queue=False).then( bot, chatbot, chatbot ) # chatbot.like(print_like_dislike, None, None) demo.launch()