import gradio as gr 
from transformers import MarianMTModel, MarianTokenizer, BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import torch

# 1. Load Tamil to English translation model
translation_model_name = "Helsinki-NLP/opus-mt-ta-en"
tokenizer = MarianTokenizer.from_pretrained(translation_model_name)
translation_model = MarianMTModel.from_pretrained(translation_model_name)

# 2. Load BLIP model for image captioning
caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

# 3. Translation function
def translate_tamil_to_english(tamil_text):
    if not tamil_text.strip():
        return "No input given"
    inputs = tokenizer(tamil_text, return_tensors="pt", padding=True, truncation=True)
    translated = translation_model.generate(**inputs)
    english_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return english_text

# 4. Generate dummy image from text
def generate_image_from_text(text_prompt):
    # Use a plain color image with PIL
    img = Image.new('RGB', (512, 512), color=(200, 230, 255))
    return img

# 5. Describe the image
def describe_image(image):
    inputs = caption_processor(images=image, return_tensors="pt")
    out = caption_model.generate(**inputs)
    caption = caption_processor.decode(out[0], skip_special_tokens=True)
    return caption

# 6. Combined pipeline
def full_pipeline(tamil_text):
    english_text = translate_tamil_to_english(tamil_text)
    generated_image = generate_image_from_text(english_text)
    caption = describe_image(generated_image)
    return english_text, generated_image, caption

# 7. Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 🌍 Tamil ➝ English ➝ Image ➝ Description App")

    with gr.Row():
        tamil_input = gr.Textbox(label="Enter Tamil Text", lines=2, placeholder="உதாரணம்: ஒரு பூங்காவில் ஒரு பசுமை மரம் உள்ளது")

    translate_btn = gr.Button("Translate, Generate Image, and Describe")

    with gr.Row():
        english_output = gr.Textbox(label="Translated English")
        caption_output = gr.Textbox(label="Image Description")

    image_output = gr.Image(label="Generated Image")

    translate_btn.click(
        fn=full_pipeline,
        inputs=tamil_input,
        outputs=[english_output, image_output, caption_output]
    )

demo.launch()