Spaces:

Navyabhat
/

Capstone_Project

Sleeping

App Files Files Community

Capstone_Project / app.py

Navyabhat

Update app.py

494d93f verified over 1 year ago

raw

history blame

8.35 kB

	import gradio as gr
	from PIL import Image
	from inference.main import MultiModalPhi2

	messages = []

	multimodal_phi2 = MultiModalPhi2(
	modelname_or_path="Navyabhat/Llava-Phi2",
	temperature=0.2,
	max_new_tokens=1024,
	device="cpu",
	)


	def add_content(chatbot, text, image, audio_upload, audio_mic) -> gr.Chatbot:
	textflag, imageflag, audioflag = False, False, False
	if text not in ["", None]:
	chatbot.append((text, None))
	textflag = True
	if image is not None:
	chatbot.append(((image,), None))
	imageflag = True
	if audio_mic is not None:
	chatbot.append(((audio_mic,), None))
	audioflag = True
	else:
	if audio_upload is not None:
	chatbot.append(((audio_upload,), None))
	audioflag = True
	if not any([textflag, imageflag, audioflag]):
	# Raise an error if neither text nor file is provided
	raise gr.Error("Enter a valid text, image or audio")
	return chatbot


	def clear_data():
	return {prompt: None, image: None, audio_upload: None, audio_mic: None, chatbot: []}


	def run(history, text, image, audio_upload, audio_mic):
	if text in [None, ""]:
	text = None

	if audio_upload is not None:
	audio = audio_upload
	elif audio_mic is not None:
	audio = audio_mic
	else:
	audio = None

	print("text", text)
	print("image", image)
	print("audio", audio)

	if image is not None:
	image = Image.open(image)
	outputs = multimodal_phi2(text, audio, image)
	# outputs = ""

	history.append((None, outputs.title()))
	return history, None, None, None, None


	# # Custom styling
	# interface_style = {
	# "box": {
	# "backgroundColor": "#f9f9f9",
	# "padding": "20px",
	# "borderRadius": "10px",
	# "boxShadow": "0 0 10px rgba(0, 0, 0, 0.1)",
	# },
	# "button": {
	# "backgroundColor": "#4caf50",
	# "color": "#fff",
	# "padding": "10px",
	# "border": "none",
	# "borderRadius": "5px",
	# "cursor": "pointer",
	# },
	# "textbox": {
	# "width": "100%",
	# "padding": "10px",
	# "marginBottom": "10px",
	# "boxSizing": "border-box",
	# },
	# "image": {
	# "width": "100%",
	# "marginBottom": "10px",
	# },
	# "audio": {
	# "width": "100%",
	# "marginBottom": "10px",
	# },
	# "chatbox": {
	# "height": "550px",
	# "backgroundColor": "#f0f0f0",
	# "borderRadius": "5px",
	# "padding": "10px",
	# "overflowY": "auto",
	# },
	# }

	# with gr.Blocks() as demo:
	# gr.Markdown("## MultiModal Phi2 Model Pretraining and Finetuning from Scratch")

	# with gr.Row():
	# with gr.Column(scale=4):
	# with gr.Box(style=interface_style["box"]):
	# with gr.Row():
	# prompt = gr.Textbox(
	# placeholder="Enter Prompt",
	# lines=2,
	# label="Query",
	# value=None,
	# style=interface_style["textbox"],
	# )
	# with gr.Row():
	# image = gr.Image(
	# type="filepath", value=None, style=interface_style["image"]
	# )
	# with gr.Row():
	# audio_upload = gr.Audio(
	# source="upload", type="filepath", style=interface_style["audio"]
	# )
	# audio_mic = gr.Audio(
	# source="microphone",
	# type="filepath",
	# format="mp3",
	# style=interface_style["audio"],
	# )

	# with gr.Column(scale=8):
	# with gr.Box(style=interface_style["box"]):
	# with gr.Row():
	# chatbot = gr.Chatbot(
	# avatar_images=("🧑", "🤖"),
	# height=550,
	# style=interface_style["chatbox"],
	# )
	# with gr.Row():
	# submit = gr.Button(style=interface_style["button"])
	# clear = gr.Button(value="Clear", style=interface_style["button"])

	# submit.click(
	# add_content,
	# inputs=[chatbot, prompt, image, audio_upload, audio_mic],
	# outputs=[chatbot],
	# ).success(
	# run,
	# inputs=[chatbot, prompt, image, audio_upload, audio_mic],
	# outputs=[chatbot, prompt, image, audio_upload, audio_mic],
	# )

	# clear.click(
	# clear_data,
	# outputs=[prompt, image, audio_upload, audio_mic, chatbot],
	# )

	# demo.launch()

	custom_html = """
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<style>
	body {
	font-family: 'Arial', sans-serif;
	background-color: #f4f4f4;
	margin: 0;
	padding: 0;
	}
	#container {
	max-width: 800px;
	margin: 20px auto;
	padding: 20px;
	background-color: #fff;
	box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
	border-radius: 10px;
	display: flex;
	flex-direction: column;
	align-items: center;
	}
	label {
	font-weight: bold;
	margin-bottom: 5px;
	display: block;
	}
	textarea, input[type="file"] {
	width: 100%;
	padding: 10px;
	margin-bottom: 10px;
	box-sizing: border-box;
	}
	button {
	background-color: #4caf50;
	color: #fff;
	padding: 10px;
	border: none;
	border-radius: 5px;
	cursor: pointer;
	}
	button:hover {
	background-color: #45a049;
	}
	#chat-container {
	max-height: 550px;
	overflow-y: auto;
	border: 1px solid #ddd;
	padding: 10px;
	border-radius: 5px;
	background-color: #f0f0f0;
	margin-top: 10px;
	}
	</style>
	</head>
	<body>
	<div id="container">
	<h2 style="text-align: center;">MultiModal Phi2 Model Pretraining and Finetuning from Scratch</h2>
	<div style="display: flex; width: 100%;">
	<div style="flex: 1; margin-right: 10px;">
	<label for="inputText">Text Input:</label>
	<textarea id="inputText" name="inputText" placeholder="Enter Prompt" rows="4"></textarea>

	<label for="inputImage">Image Input:</label>
	<input type="file" id="inputImage" name="inputImage" accept="image/*">

	<label for="inputAudio">Audio Input:</label>
	<input type="file" id="inputAudio" name="inputAudio" accept="audio/*">
	</div>
	<div style="flex: 2;">
	<div id="chat-container"></div>
	</div>
	</div>
	<button id="submitBtn">Submit</button>
	<button id="clearBtn" style="background-color: #f44336;">Clear</button>
	</div>

	<script>
	document.getElementById('submitBtn').addEventListener('click', function() {
	// Add your logic to process and update the chat
	// Example: Update the chat with a placeholder response
	var inputText = document.getElementById('inputText').value;
	var chatContainer = document.getElementById('chat-container');
	var response = "<strong>User:</strong> " + inputText + "<br><strong>Model:</strong> This is a placeholder response.<br>";
	chatContainer.innerHTML += response;
	});

	document.getElementById('clearBtn').addEventListener('click', function() {
	// Clear input and chat
	document.getElementById('inputText').value = '';
	document.getElementById('inputImage').value = '';
	document.getElementById('inputAudio').value = '';
	document.getElementById('chat-container').innerHTML = '';
	});
	</script>
	</body>
	</html>
	"""

	with gr.Blocks() as demo:
	gr.HTML(custom_html)

	demo.launch()