import streamlit as st from PIL import Image from transformers import BlipProcessor, BlipForConditionalGeneration import torch # Load model and processor @st.cache_resource def load_model(): model_name = "Nishthaaa/image_captioning" processor = BlipProcessor.from_pretrained(model_name) model = BlipForConditionalGeneration.from_pretrained(model_name) return processor, model processor, model = load_model() # Streamlit UI st.title("Cartoon Caption Generator 🖼️📜") st.write("Upload a cartoon image and get a funny caption!") uploaded_file = st.file_uploader("Upload a Cartoon Image", type=["jpg", "png", "jpeg"]) if uploaded_file is not None: image = Image.open(uploaded_file).convert("RGB") st.image(uploaded_file, caption="Uploaded Image", use_container_width=True) # Preprocess and generate caption inputs = processor(images=image, return_tensors="pt") with torch.no_grad(): generated_ids = model.generate(**inputs) caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] st.subheader("Generated Caption:") st.write(f"💬 {caption}")