import gradio as gr from PIL import Image from inference.main import MultiModalPhi2 import os messages = [] multimodal_phi2 = MultiModalPhi2( modelname_or_path="RaviNaik/Llava-Phi2", temperature=0.2, max_new_tokens=1024, device="cpu", ) def add_content(chatbot, text, image, audio_upload, audio_mic) -> gr.Chatbot: textflag, imageflag, audioflag = False, False, False if text not in ["", None]: chatbot.append((text, None)) textflag = True if image is not None: chatbot.append(((image,), None)) imageflag = True if audio_mic is not None: chatbot.append(((audio_mic,), None)) audioflag = True else: if audio_upload is not None: chatbot.append(((audio_upload,), None)) audioflag = True if not any([textflag, imageflag, audioflag]): # Raise an error if neither text nor file is provided raise gr.Error("Enter a valid text, image or audio") return chatbot def clear_data(): return {"text": None, "image": None, "audio_upload": None, "audio_mic": None, "chatbot": []} def run(history, text, image, audio_upload, audio_mic): if text in [None, ""]: text = None if audio_upload is not None: audio = audio_upload elif audio_mic is not None: audio = audio_mic else: audio = None print("text", text) print("image", image) print("audio", audio) if image is not None: image = Image.open(image) outputs = multimodal_phi2(text, audio, image) history.append((None, outputs.title())) return history, None, None, None, None def print_like_dislike(x: gr.LikeData): print(x.index, x.value, x.liked) def add_text(history, text): history = history + [(text, None)] return history, gr.Textbox(value="", interactive=False) def add_file(history, file): history = history + [((file.name,), None)] return history def bot(history): response = "**That's cool!**" history[-1][1] = "" for character in response: history[-1][1] += character time.sleep(0.05) yield history with gr.Blocks() as demo: chatbot = gr.Chatbot( [], elem_id="chatbot", bubble_full_width=False, avatar_images=(None, (os.path.join(os.path.dirname(__file__), "avatar.png"))), ) with gr.Row(): txt = gr.Textbox( scale=4, show_label=False, placeholder="Enter text and press enter, or upload an image", container=False, ) btn = gr.UploadButton("📁", file_types=["image", "video", "audio"]) txt_msg = txt.submit(add_content, [chatbot, txt, None, None], [chatbot, txt, None, None], queue=False).then( run, [chatbot, txt, None, None], [chatbot, txt, None, None], api_name="bot_response" ) txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False) file_msg = btn.upload(add_content, [chatbot, None, None, btn], [chatbot, None, None, None], queue=False).then( run, [chatbot, None, None, btn], [chatbot, None, None, None] ) chatbot.like(print_like_dislike, None, None) demo.queue() demo.launch()