import gradio as gr from transformers import MarianMTModel, MarianTokenizer, BlipProcessor, BlipForConditionalGeneration from PIL import Image import torch # 1. Load Tamil to English translation model translation_model_name = "Helsinki-NLP/opus-mt-ta-en" tokenizer = MarianTokenizer.from_pretrained(translation_model_name) translation_model = MarianMTModel.from_pretrained(translation_model_name) # 2. Load BLIP model for image captioning caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") # 3. Translation function def translate_tamil_to_english(tamil_text): if not tamil_text.strip(): return "No input given" inputs = tokenizer(tamil_text, return_tensors="pt", padding=True, truncation=True) translated = translation_model.generate(**inputs) english_text = tokenizer.decode(translated[0], skip_special_tokens=True) return english_text # 4. Generate dummy image from text def generate_image_from_text(text_prompt): # Use a plain color image with PIL img = Image.new('RGB', (512, 512), color=(200, 230, 255)) return img # 5. Describe the image def describe_image(image): inputs = caption_processor(images=image, return_tensors="pt") out = caption_model.generate(**inputs) caption = caption_processor.decode(out[0], skip_special_tokens=True) return caption # 6. Combined pipeline def full_pipeline(tamil_text): english_text = translate_tamil_to_english(tamil_text) generated_image = generate_image_from_text(english_text) caption = describe_image(generated_image) return english_text, generated_image, caption # 7. Gradio UI with gr.Blocks() as demo: gr.Markdown("## 🌍 Tamil ➝ English ➝ Image ➝ Description App") with gr.Row(): tamil_input = gr.Textbox(label="Enter Tamil Text", lines=2, placeholder="உதாரணம்: ஒரு பூங்காவில் ஒரு பசுமை மரம் உள்ளது") translate_btn = gr.Button("Translate, Generate Image, and Describe") with gr.Row(): english_output = gr.Textbox(label="Translated English") caption_output = gr.Textbox(label="Image Description") image_output = gr.Image(label="Generated Image") translate_btn.click( fn=full_pipeline, inputs=tamil_input, outputs=[english_output, image_output, caption_output] ) demo.launch()