Navyabhat's picture
Update app.py
494d93f verified
raw
history blame
8.35 kB
import gradio as gr
from PIL import Image
from inference.main import MultiModalPhi2
messages = []
multimodal_phi2 = MultiModalPhi2(
modelname_or_path="Navyabhat/Llava-Phi2",
temperature=0.2,
max_new_tokens=1024,
device="cpu",
)
def add_content(chatbot, text, image, audio_upload, audio_mic) -> gr.Chatbot:
textflag, imageflag, audioflag = False, False, False
if text not in ["", None]:
chatbot.append((text, None))
textflag = True
if image is not None:
chatbot.append(((image,), None))
imageflag = True
if audio_mic is not None:
chatbot.append(((audio_mic,), None))
audioflag = True
else:
if audio_upload is not None:
chatbot.append(((audio_upload,), None))
audioflag = True
if not any([textflag, imageflag, audioflag]):
# Raise an error if neither text nor file is provided
raise gr.Error("Enter a valid text, image or audio")
return chatbot
def clear_data():
return {prompt: None, image: None, audio_upload: None, audio_mic: None, chatbot: []}
def run(history, text, image, audio_upload, audio_mic):
if text in [None, ""]:
text = None
if audio_upload is not None:
audio = audio_upload
elif audio_mic is not None:
audio = audio_mic
else:
audio = None
print("text", text)
print("image", image)
print("audio", audio)
if image is not None:
image = Image.open(image)
outputs = multimodal_phi2(text, audio, image)
# outputs = ""
history.append((None, outputs.title()))
return history, None, None, None, None
# # Custom styling
# interface_style = {
# "box": {
# "backgroundColor": "#f9f9f9",
# "padding": "20px",
# "borderRadius": "10px",
# "boxShadow": "0 0 10px rgba(0, 0, 0, 0.1)",
# },
# "button": {
# "backgroundColor": "#4caf50",
# "color": "#fff",
# "padding": "10px",
# "border": "none",
# "borderRadius": "5px",
# "cursor": "pointer",
# },
# "textbox": {
# "width": "100%",
# "padding": "10px",
# "marginBottom": "10px",
# "boxSizing": "border-box",
# },
# "image": {
# "width": "100%",
# "marginBottom": "10px",
# },
# "audio": {
# "width": "100%",
# "marginBottom": "10px",
# },
# "chatbox": {
# "height": "550px",
# "backgroundColor": "#f0f0f0",
# "borderRadius": "5px",
# "padding": "10px",
# "overflowY": "auto",
# },
# }
# with gr.Blocks() as demo:
# gr.Markdown("## MultiModal Phi2 Model Pretraining and Finetuning from Scratch")
# with gr.Row():
# with gr.Column(scale=4):
# with gr.Box(style=interface_style["box"]):
# with gr.Row():
# prompt = gr.Textbox(
# placeholder="Enter Prompt",
# lines=2,
# label="Query",
# value=None,
# style=interface_style["textbox"],
# )
# with gr.Row():
# image = gr.Image(
# type="filepath", value=None, style=interface_style["image"]
# )
# with gr.Row():
# audio_upload = gr.Audio(
# source="upload", type="filepath", style=interface_style["audio"]
# )
# audio_mic = gr.Audio(
# source="microphone",
# type="filepath",
# format="mp3",
# style=interface_style["audio"],
# )
# with gr.Column(scale=8):
# with gr.Box(style=interface_style["box"]):
# with gr.Row():
# chatbot = gr.Chatbot(
# avatar_images=("🧑", "🤖"),
# height=550,
# style=interface_style["chatbox"],
# )
# with gr.Row():
# submit = gr.Button(style=interface_style["button"])
# clear = gr.Button(value="Clear", style=interface_style["button"])
# submit.click(
# add_content,
# inputs=[chatbot, prompt, image, audio_upload, audio_mic],
# outputs=[chatbot],
# ).success(
# run,
# inputs=[chatbot, prompt, image, audio_upload, audio_mic],
# outputs=[chatbot, prompt, image, audio_upload, audio_mic],
# )
# clear.click(
# clear_data,
# outputs=[prompt, image, audio_upload, audio_mic, chatbot],
# )
# demo.launch()
custom_html = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<style>
body {
font-family: 'Arial', sans-serif;
background-color: #f4f4f4;
margin: 0;
padding: 0;
}
#container {
max-width: 800px;
margin: 20px auto;
padding: 20px;
background-color: #fff;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
border-radius: 10px;
display: flex;
flex-direction: column;
align-items: center;
}
label {
font-weight: bold;
margin-bottom: 5px;
display: block;
}
textarea, input[type="file"] {
width: 100%;
padding: 10px;
margin-bottom: 10px;
box-sizing: border-box;
}
button {
background-color: #4caf50;
color: #fff;
padding: 10px;
border: none;
border-radius: 5px;
cursor: pointer;
}
button:hover {
background-color: #45a049;
}
#chat-container {
max-height: 550px;
overflow-y: auto;
border: 1px solid #ddd;
padding: 10px;
border-radius: 5px;
background-color: #f0f0f0;
margin-top: 10px;
}
</style>
</head>
<body>
<div id="container">
<h2 style="text-align: center;">MultiModal Phi2 Model Pretraining and Finetuning from Scratch</h2>
<div style="display: flex; width: 100%;">
<div style="flex: 1; margin-right: 10px;">
<label for="inputText">Text Input:</label>
<textarea id="inputText" name="inputText" placeholder="Enter Prompt" rows="4"></textarea>
<label for="inputImage">Image Input:</label>
<input type="file" id="inputImage" name="inputImage" accept="image/*">
<label for="inputAudio">Audio Input:</label>
<input type="file" id="inputAudio" name="inputAudio" accept="audio/*">
</div>
<div style="flex: 2;">
<div id="chat-container"></div>
</div>
</div>
<button id="submitBtn">Submit</button>
<button id="clearBtn" style="background-color: #f44336;">Clear</button>
</div>
<script>
document.getElementById('submitBtn').addEventListener('click', function() {
// Add your logic to process and update the chat
// Example: Update the chat with a placeholder response
var inputText = document.getElementById('inputText').value;
var chatContainer = document.getElementById('chat-container');
var response = "<strong>User:</strong> " + inputText + "<br><strong>Model:</strong> This is a placeholder response.<br>";
chatContainer.innerHTML += response;
});
document.getElementById('clearBtn').addEventListener('click', function() {
// Clear input and chat
document.getElementById('inputText').value = '';
document.getElementById('inputImage').value = '';
document.getElementById('inputAudio').value = '';
document.getElementById('chat-container').innerHTML = '';
});
</script>
</body>
</html>
"""
with gr.Blocks() as demo:
gr.HTML(custom_html)
demo.launch()