import tensorflow as tf
import numpy as np
from PIL import Image
import gradio as gr

# CNN+LSTM model loaded
model = tf.keras.models.load_model("model_9.h5")

# Define the preprocessing function for the image
def preprocess_image(image):
    # Resize and normalize image to match model's expected input shape
    image = image.resize((224, 224))  # Modify size based on your model
    image_array = np.array(image) / 255.0  # Normalize
    image_array = np.expand_dims(image_array, axis=0)  # Add batch dimension
    return image_array

# Define the function that generates a caption from the image
def generate_caption(image):
    preprocessed_image = preprocess_image(image)
    
    # Generate a caption from the model
    # Note: Adjust this if your model requires a sequence start token or has a decoding loop
    caption_tokens = model.predict(preprocessed_image)

    return caption_tokens

# Define the Gradio interface
iface = gr.Interface(
    fn=generate_caption,          # The function that generates captions
    inputs=gr.Image(type="pil"),  # Accept an image input
    outputs="text",                # Output a text caption
    title="Image Captioning Model",
    description="Upload an image, and the model will generate a caption describing it."
)

iface.launch(share=True)