24Sureshkumar's picture
Update app.py
bb4fda3 verified
raw
history blame
2.51 kB
import gradio as gr
from transformers import MarianMTModel, MarianTokenizer, BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import torch
# 1. Load Tamil to English translation model
translation_model_name = "Helsinki-NLP/opus-mt-ta-en"
tokenizer = MarianTokenizer.from_pretrained(translation_model_name)
translation_model = MarianMTModel.from_pretrained(translation_model_name)
# 2. Load BLIP model for image captioning
caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
# 3. Translation function
def translate_tamil_to_english(tamil_text):
if not tamil_text.strip():
return "No input given"
inputs = tokenizer(tamil_text, return_tensors="pt", padding=True, truncation=True)
translated = translation_model.generate(**inputs)
english_text = tokenizer.decode(translated[0], skip_special_tokens=True)
return english_text
# 4. Generate dummy image from text
def generate_image_from_text(text_prompt):
# Use a plain color image with PIL
img = Image.new('RGB', (512, 512), color=(200, 230, 255))
return img
# 5. Describe the image
def describe_image(image):
inputs = caption_processor(images=image, return_tensors="pt")
out = caption_model.generate(**inputs)
caption = caption_processor.decode(out[0], skip_special_tokens=True)
return caption
# 6. Combined pipeline
def full_pipeline(tamil_text):
english_text = translate_tamil_to_english(tamil_text)
generated_image = generate_image_from_text(english_text)
caption = describe_image(generated_image)
return english_text, generated_image, caption
# 7. Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## 🌍 Tamil ➝ English ➝ Image ➝ Description App")
with gr.Row():
tamil_input = gr.Textbox(label="Enter Tamil Text", lines=2, placeholder="உதாரணம்: ஒரு பூங்காவில் ஒரு பசுமை மரம் உள்ளது")
translate_btn = gr.Button("Translate, Generate Image, and Describe")
with gr.Row():
english_output = gr.Textbox(label="Translated English")
caption_output = gr.Textbox(label="Image Description")
image_output = gr.Image(label="Generated Image")
translate_btn.click(
fn=full_pipeline,
inputs=tamil_input,
outputs=[english_output, image_output, caption_output]
)
demo.launch()