24Sureshkumar commited on
Commit
92c7729
·
verified ·
1 Parent(s): bb4fda3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -47
app.py CHANGED
@@ -1,66 +1,66 @@
 
 
1
 
2
- import gradio as gr
3
- from transformers import MarianMTModel, MarianTokenizer, BlipProcessor, BlipForConditionalGeneration
4
  from PIL import Image
5
  import torch
 
 
 
6
 
7
- # 1. Load Tamil to English translation model
8
- translation_model_name = "Helsinki-NLP/opus-mt-ta-en"
9
- tokenizer = MarianTokenizer.from_pretrained(translation_model_name)
10
- translation_model = MarianMTModel.from_pretrained(translation_model_name)
11
 
12
- # 2. Load BLIP model for image captioning
13
- caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
14
- caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
 
 
15
 
16
- # 3. Translation function
17
- def translate_tamil_to_english(tamil_text):
18
- if not tamil_text.strip():
19
- return "No input given"
20
- inputs = tokenizer(tamil_text, return_tensors="pt", padding=True, truncation=True)
21
- translated = translation_model.generate(**inputs)
22
  english_text = tokenizer.decode(translated[0], skip_special_tokens=True)
23
  return english_text
24
 
25
- # 4. Generate dummy image from text
26
- def generate_image_from_text(text_prompt):
27
- # Use a plain color image with PIL
28
- img = Image.new('RGB', (512, 512), color=(200, 230, 255))
 
29
  return img
30
 
31
- # 5. Describe the image
32
  def describe_image(image):
33
- inputs = caption_processor(images=image, return_tensors="pt")
34
- out = caption_model.generate(**inputs)
35
- caption = caption_processor.decode(out[0], skip_special_tokens=True)
36
- return caption
37
-
38
- # 6. Combined pipeline
39
- def full_pipeline(tamil_text):
40
- english_text = translate_tamil_to_english(tamil_text)
41
- generated_image = generate_image_from_text(english_text)
42
- caption = describe_image(generated_image)
43
- return english_text, generated_image, caption
44
 
45
- # 7. Gradio UI
46
- with gr.Blocks() as demo:
47
- gr.Markdown("## 🌍 Tamil ➝ English ➝ Image ➝ Description App")
 
48
 
49
- with gr.Row():
50
- tamil_input = gr.Textbox(label="Enter Tamil Text", lines=2, placeholder="உதாரணம்: ஒரு பூங்காவில் ஒரு பசுமை மரம் உள்ளது")
 
51
 
52
- translate_btn = gr.Button("Translate, Generate Image, and Describe")
 
 
53
 
54
- with gr.Row():
55
- english_output = gr.Textbox(label="Translated English")
56
- caption_output = gr.Textbox(label="Image Description")
57
 
58
- image_output = gr.Image(label="Generated Image")
 
59
 
60
- translate_btn.click(
61
- fn=full_pipeline,
62
- inputs=tamil_input,
63
- outputs=[english_output, image_output, caption_output]
64
- )
65
 
66
- demo.launch()
 
 
 
 
 
1
+ # Install necessary libraries if not already installed
2
+ # !pip install transformers diffusers torch torchvision accelerate huggingface_hub matplotlib Pillow
3
 
4
+ from huggingface_hub import login
5
+ from transformers import MarianTokenizer, MarianMTModel
6
  from PIL import Image
7
  import torch
8
+ from torchvision import transforms
9
+ from transformers import BlipProcessor, BlipForConditionalGeneration
10
+ import matplotlib.pyplot as plt
11
 
12
+ # Step 1: Login to Hugging Face
13
+ login("your_huggingface_token_here") # 🔐 Replace this with your actual token
 
 
14
 
15
+ # Step 2: Tamil to English Translation
16
+ def translate_tamil_to_english(text):
17
+ model_name = "Helsinki-NLP/opus-mt-ta-en"
18
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
19
+ model = MarianMTModel.from_pretrained(model_name)
20
 
21
+ inputs = tokenizer(text, return_tensors="pt", padding=True)
22
+ translated = model.generate(**inputs)
 
 
 
 
23
  english_text = tokenizer.decode(translated[0], skip_special_tokens=True)
24
  return english_text
25
 
26
+ # Step 3: Generate Image (using any placeholder image here since BLIP is captioning-only)
27
+ # You can download a sample image or use a real generation model like Stable Diffusion
28
+ def get_sample_image():
29
+ img_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/coco_sample.png"
30
+ img = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
31
  return img
32
 
33
+ # Step 4: Describe the Image
34
  def describe_image(image):
35
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
36
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
 
 
 
 
 
 
 
 
 
37
 
38
+ inputs = processor(images=image, return_tensors="pt")
39
+ out = model.generate(**inputs)
40
+ caption = processor.decode(out[0], skip_special_tokens=True)
41
+ return caption
42
 
43
+ # === MAIN ===
44
+ if __name__ == "__main__":
45
+ import requests
46
 
47
+ # Step A: Input Tamil text
48
+ tamil_text = "ஒரு சிறிய வீடு கடற்கரைக்கு அருகிலுள்ளது"
49
+ print("Tamil Input:", tamil_text)
50
 
51
+ # Step B: Translate to English
52
+ english_translation = translate_tamil_to_english(tamil_text)
53
+ print("Translated English:", english_translation)
54
 
55
+ # Step C: Get sample image (placeholder for now, since we aren't using text-to-image yet)
56
+ image = get_sample_image()
57
 
58
+ # Step D: Describe the image
59
+ caption = describe_image(image)
60
+ print("Image Caption:", caption)
 
 
61
 
62
+ # Optional: Display the image
63
+ plt.imshow(image)
64
+ plt.title(caption)
65
+ plt.axis("off")
66
+ plt.show()