Update app.py
Browse files
app.py
CHANGED
@@ -1,61 +1,66 @@
|
|
1 |
-
|
|
|
2 |
from transformers import MarianMTModel, MarianTokenizer, BlipProcessor, BlipForConditionalGeneration
|
3 |
from PIL import Image
|
4 |
import torch
|
5 |
|
6 |
-
# Load
|
7 |
-
|
8 |
-
tokenizer = MarianTokenizer.from_pretrained(
|
9 |
-
translation_model = MarianMTModel.from_pretrained(
|
10 |
|
11 |
-
# Load
|
12 |
caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
13 |
caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
|
14 |
|
|
|
15 |
def translate_tamil_to_english(tamil_text):
|
16 |
-
|
|
|
|
|
17 |
translated = translation_model.generate(**inputs)
|
18 |
english_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|
19 |
return english_text
|
20 |
|
21 |
-
# Generate image
|
22 |
def generate_image_from_text(text_prompt):
|
23 |
-
#
|
24 |
-
img = Image.new('RGB', (512, 512), color=
|
25 |
return img
|
26 |
|
|
|
27 |
def describe_image(image):
|
28 |
inputs = caption_processor(images=image, return_tensors="pt")
|
29 |
out = caption_model.generate(**inputs)
|
30 |
caption = caption_processor.decode(out[0], skip_special_tokens=True)
|
31 |
return caption
|
32 |
|
|
|
33 |
def full_pipeline(tamil_text):
|
34 |
english_text = translate_tamil_to_english(tamil_text)
|
35 |
generated_image = generate_image_from_text(english_text)
|
36 |
-
|
37 |
-
return english_text, generated_image,
|
38 |
|
39 |
-
# Gradio
|
40 |
with gr.Blocks() as demo:
|
41 |
-
gr.Markdown("## Tamil
|
42 |
|
43 |
with gr.Row():
|
44 |
-
tamil_input = gr.Textbox(label="Enter Tamil Text", lines=2, placeholder="உதாரணம்: ஒரு
|
45 |
-
|
46 |
-
|
47 |
-
translate_btn = gr.Button("Translate and Generate")
|
48 |
|
49 |
with gr.Row():
|
50 |
-
english_output = gr.Textbox(label="Translated English
|
51 |
-
|
52 |
|
53 |
image_output = gr.Image(label="Generated Image")
|
54 |
|
55 |
translate_btn.click(
|
56 |
fn=full_pipeline,
|
57 |
inputs=tamil_input,
|
58 |
-
outputs=[english_output, image_output,
|
59 |
)
|
60 |
|
61 |
demo.launch()
|
|
|
1 |
+
pip install gradio transformers torch Pillow
|
2 |
+
import gradio as gr
|
3 |
from transformers import MarianMTModel, MarianTokenizer, BlipProcessor, BlipForConditionalGeneration
|
4 |
from PIL import Image
|
5 |
import torch
|
6 |
|
7 |
+
# 1. Load Tamil to English translation model
|
8 |
+
translation_model_name = "Helsinki-NLP/opus-mt-ta-en"
|
9 |
+
tokenizer = MarianTokenizer.from_pretrained(translation_model_name)
|
10 |
+
translation_model = MarianMTModel.from_pretrained(translation_model_name)
|
11 |
|
12 |
+
# 2. Load BLIP model for image captioning
|
13 |
caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
14 |
caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
|
15 |
|
16 |
+
# 3. Translation function
|
17 |
def translate_tamil_to_english(tamil_text):
|
18 |
+
if not tamil_text.strip():
|
19 |
+
return "No input given"
|
20 |
+
inputs = tokenizer(tamil_text, return_tensors="pt", padding=True, truncation=True)
|
21 |
translated = translation_model.generate(**inputs)
|
22 |
english_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|
23 |
return english_text
|
24 |
|
25 |
+
# 4. Generate dummy image from text
|
26 |
def generate_image_from_text(text_prompt):
|
27 |
+
# Use a plain color image with PIL
|
28 |
+
img = Image.new('RGB', (512, 512), color=(200, 230, 255))
|
29 |
return img
|
30 |
|
31 |
+
# 5. Describe the image
|
32 |
def describe_image(image):
|
33 |
inputs = caption_processor(images=image, return_tensors="pt")
|
34 |
out = caption_model.generate(**inputs)
|
35 |
caption = caption_processor.decode(out[0], skip_special_tokens=True)
|
36 |
return caption
|
37 |
|
38 |
+
# 6. Combined pipeline
|
39 |
def full_pipeline(tamil_text):
|
40 |
english_text = translate_tamil_to_english(tamil_text)
|
41 |
generated_image = generate_image_from_text(english_text)
|
42 |
+
caption = describe_image(generated_image)
|
43 |
+
return english_text, generated_image, caption
|
44 |
|
45 |
+
# 7. Gradio UI
|
46 |
with gr.Blocks() as demo:
|
47 |
+
gr.Markdown("## 🌍 Tamil ➝ English ➝ Image ➝ Description App")
|
48 |
|
49 |
with gr.Row():
|
50 |
+
tamil_input = gr.Textbox(label="Enter Tamil Text", lines=2, placeholder="உதாரணம்: ஒரு பூங்காவில் ஒரு பசுமை மரம் உள்ளது")
|
51 |
+
|
52 |
+
translate_btn = gr.Button("Translate, Generate Image, and Describe")
|
|
|
53 |
|
54 |
with gr.Row():
|
55 |
+
english_output = gr.Textbox(label="Translated English")
|
56 |
+
caption_output = gr.Textbox(label="Image Description")
|
57 |
|
58 |
image_output = gr.Image(label="Generated Image")
|
59 |
|
60 |
translate_btn.click(
|
61 |
fn=full_pipeline,
|
62 |
inputs=tamil_input,
|
63 |
+
outputs=[english_output, image_output, caption_output]
|
64 |
)
|
65 |
|
66 |
demo.launch()
|