Spaces:

allantacuelwvsu
/

multimodal_chatbot

Running

App Files Files Community

multimodal_chatbot / app.py

allantacuelwvsu

quality of life

cedbd37 2 months ago

raw

history blame contribute delete

7.37 kB

	import streamlit as st
	import requests
	import os
	import time

	# Load Hugging Face API key
	HF_API_KEY = os.getenv("HF_API_KEY")

	# Define API URLs
	IMG2TEXT_API = "https://api-inference.huggingface.co/models/nlpconnect/vit-gpt2-image-captioning"
	CHAT_API = "https://api-inference.huggingface.co/models/facebook/blenderbot-3B"
	HEADERS = {"Authorization": f"Bearer {HF_API_KEY}"}

	# App Title
	st.title("Multimodal Chatbot")

	# Initialize chat history
	if "messages" not in st.session_state:
	st.session_state.messages = []
	# Add initial bot welcome message
	initial_message = "Hello! I'm a chatbot. You can upload an image or ask me anything to get started!"
	st.session_state.messages.append({"role": "assistant", "content": initial_message})

	# Display chat history
	for msg in st.session_state.messages:
	with st.chat_message(msg["role"]):
	st.write(msg["content"])

	# Image upload
	uploaded_file = st.file_uploader("Upload an image...", type=["jpg", "png", "jpeg"])

	# User input
	user_input = st.chat_input("Ask about this image or anything...")
	image_caption = None

	# Process image if uploaded
	if uploaded_file:
	# Check image type
	if uploaded_file.type not in ["image/jpeg", "image/png"]:
	st.error("⚠️ Please upload a valid JPG or PNG image.")
	else:
	# Send image to Hugging Face image-to-text API with retries
	img_bytes = uploaded_file.read()
	st.session_state.messages.append({"role": "user", "content": "[Image Uploaded]"})
	with st.chat_message("user"):
	st.image(img_bytes, caption="Uploaded Image", use_column_width=True)
	# st.write(f"Image to text context generated: {image_caption}") fix plz

	max_retries = 3
	for i in range(max_retries):
	response = requests.post(
	IMG2TEXT_API,
	headers={
	"Authorization": f"Bearer {HF_API_KEY}",
	"Content-Type": "application/octet-stream",
	},
	data=img_bytes # Send raw image data
	)

	if response.status_code == 200:
	try:
	res_json = response.json()
	# Check for list format and dictionary format
	if isinstance(res_json, list) and len(res_json) > 0:
	image_caption = res_json[0].get("generated_text", "⚠️ No caption generated.")
	elif isinstance(res_json, dict) and "generated_text" in res_json:
	image_caption = res_json["generated_text"]

	if image_caption:
	st.session_state.image_caption = image_caption
	bot_context = (
	f"Consider this image: {image_caption}. Please provide a relevant and engaging response to the image."
	)
	payload = {"inputs": bot_context}

	# Send context to chatbot
	bot_response = requests.post(CHAT_API, headers=HEADERS, json=payload)

	if bot_response.status_code == 200:
	res_json = bot_response.json()
	# Check if the response is a list or dictionary
	if isinstance(res_json, list) and len(res_json) > 0:
	bot_reply = res_json[0].get("generated_text", "I received your image. What would you like to ask about it?")
	elif isinstance(res_json, dict) and "generated_text" in res_json:
	bot_reply = res_json["generated_text"]
	else:
	bot_reply = "I received your image. What would you like to ask about it?"
	else:
	bot_reply = "I received your image. What would you like to ask about it?"

	# Append chatbot's generated response
	st.session_state.messages.append({"role": "assistant", "content": bot_reply})
	with st.chat_message("assistant"):
	st.write(bot_reply)
	uploaded_file = None # Clear image after processing
	break # Successful, no need to retry
	else:
	st.error("⚠️ Unexpected response format from image captioning API.")
	break
	except (KeyError, IndexError, TypeError) as e:
	st.error(f"⚠️ Error: Unable to generate caption. Details: {e}")
	break
	elif response.status_code == 503:
	st.warning(f"⏳ Model warming up... Retrying in 5 seconds. Attempt {i+1}/{max_retries}")
	time.sleep(5) # Wait before retrying
	else:
	st.error(f"⚠️ Image API Error: {response.status_code} - {response.text}")
	break

	# Process user input if provided
	if user_input:
	combined_input = user_input

	# Merge image caption with user query if an image was uploaded
	if "image_caption" in st.session_state and st.session_state.image_caption:
	combined_input = f"Image context: {st.session_state.image_caption}. {user_input}"

	# Append user message
	st.session_state.messages.append({"role": "user", "content": user_input})
	with st.chat_message("user"):
	st.write(user_input)

	# Send combined input to chatbot with retries
	payload = {"inputs": combined_input}
	max_retries = 3
	for i in range(max_retries):
	response = requests.post(CHAT_API, headers=HEADERS, json=payload)

	if response.status_code == 200:
	try:
	res_json = response.json()

	# If it's a dictionary and contains 'generated_text'
	if isinstance(res_json, dict) and "generated_text" in res_json:
	bot_reply = res_json["generated_text"]
	break # Successful, no need to retry

	# If response is a list (some models return list format)
	elif isinstance(res_json, list) and len(res_json) > 0 and "generated_text" in res_json[0]:
	bot_reply = res_json[0]["generated_text"]
	break
	else:
	st.error("⚠️ Unexpected response format from chatbot API.")
	bot_reply = "⚠️ Unable to generate a response."
	break
	except (KeyError, TypeError, IndexError):
	bot_reply = "⚠️ Error: Unable to generate response."
	break
	elif response.status_code == 503:
	st.warning(f"⏳ Model warming up... Retrying in 5 seconds. Attempt {i+1}/{max_retries}")
	time.sleep(5) # Wait before retrying
	else:
	bot_reply = f"⚠️ Chatbot Error {response.status_code}: {response.text}"
	break

	# Append bot response
	st.session_state.messages.append({"role": "assistant", "content": bot_reply})
	with st.chat_message("assistant"):
	st.write(bot_reply)

	# Clear button to reset chat
	if st.button("Clear Chat"):
	st.session_state.messages = []
	st.experimental_rerun()