File size: 2,556 Bytes
a0c12a7
 
b6ca69f
bf7e1be
 
87884fb
a0c12a7
 
 
 
b6ca69f
a0c12a7
b6ca69f
 
 
a0c12a7
b6ca69f
a0c12a7
 
 
b6ca69f
 
 
 
a0c12a7
b6ca69f
a0c12a7
 
b6ca69f
 
a0c12a7
b6ca69f
 
 
 
 
 
a0c12a7
b6ca69f
 
 
a0c12a7
 
b6ca69f
a0c12a7
2c77ef8
a0c12a7
b6ca69f
 
a0c12a7
 
 
52b96f9
2bd9593
a0c12a7
 
2c77ef8
b6ca69f
2c77ef8
b6ca69f
 
 
a0c12a7
b6ca69f
2c77ef8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
pip install gradio transformers torch Pillow
import gradio as gr 
from transformers import MarianMTModel, MarianTokenizer, BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import torch

# 1. Load Tamil to English translation model
translation_model_name = "Helsinki-NLP/opus-mt-ta-en"
tokenizer = MarianTokenizer.from_pretrained(translation_model_name)
translation_model = MarianMTModel.from_pretrained(translation_model_name)

# 2. Load BLIP model for image captioning
caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

# 3. Translation function
def translate_tamil_to_english(tamil_text):
    if not tamil_text.strip():
        return "No input given"
    inputs = tokenizer(tamil_text, return_tensors="pt", padding=True, truncation=True)
    translated = translation_model.generate(**inputs)
    english_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return english_text

# 4. Generate dummy image from text
def generate_image_from_text(text_prompt):
    # Use a plain color image with PIL
    img = Image.new('RGB', (512, 512), color=(200, 230, 255))
    return img

# 5. Describe the image
def describe_image(image):
    inputs = caption_processor(images=image, return_tensors="pt")
    out = caption_model.generate(**inputs)
    caption = caption_processor.decode(out[0], skip_special_tokens=True)
    return caption

# 6. Combined pipeline
def full_pipeline(tamil_text):
    english_text = translate_tamil_to_english(tamil_text)
    generated_image = generate_image_from_text(english_text)
    caption = describe_image(generated_image)
    return english_text, generated_image, caption

# 7. Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 🌍 Tamil ➝ English ➝ Image ➝ Description App")

    with gr.Row():
        tamil_input = gr.Textbox(label="Enter Tamil Text", lines=2, placeholder="உதாரணம்: ஒரு பூங்காவில் ஒரு பசுமை மரம் உள்ளது")

    translate_btn = gr.Button("Translate, Generate Image, and Describe")

    with gr.Row():
        english_output = gr.Textbox(label="Translated English")
        caption_output = gr.Textbox(label="Image Description")

    image_output = gr.Image(label="Generated Image")

    translate_btn.click(
        fn=full_pipeline,
        inputs=tamil_input,
        outputs=[english_output, image_output, caption_output]
    )

demo.launch()