24Sureshkumar commited on
Commit
87e851b
·
verified ·
1 Parent(s): 016b5dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -84
app.py CHANGED
@@ -1,96 +1,40 @@
1
- import os
2
  import gradio as gr
3
- import torch
4
- from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer, pipeline
5
- from diffusers import StableDiffusionPipeline
6
- from PIL import Image
7
 
8
- # Load translation model/tokenizer (Tamil→English)
9
- try:
10
- translator = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
11
- tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
12
- tokenizer.src_lang = "ta"
13
- except Exception as e:
14
- print(f"Error loading M2M100 model: {e}")
15
- translator = tokenizer = None
16
 
17
- # Load GPT-2 text generation pipeline
18
- try:
19
- text_generator = pipeline("text-generation", model="gpt2")
20
- except Exception as e:
21
- print(f"Error loading GPT-2 model: {e}")
22
- text_generator = None
23
 
24
- # Load Stable Diffusion pipeline
25
- hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
26
- device = "cuda" if torch.cuda.is_available() else "cpu"
27
- try:
28
- pipe = StableDiffusionPipeline.from_pretrained(
29
- "runwayml/stable-diffusion-v1-5",
30
- torch_dtype=torch.float16 if device=="cuda" else torch.float32,
31
- use_auth_token=hf_token
32
- )
33
- pipe = pipe.to(device)
34
- # Optionally enable efficient attention slicing if on GPU to save memory
35
- if device == "cuda":
36
- pipe.enable_attention_slicing()
37
- except Exception as e:
38
- print(f"Error loading Stable Diffusion pipeline: {e}")
39
- pipe = None
40
 
41
- def tamil_to_image(tamil_text):
42
- """
43
- Translate Tamil text to English, generate new text with GPT-2,
44
- and produce an image with Stable Diffusion.
45
- Returns (PIL.Image, info_text).
46
- """
47
- if not tamil_text or not tamil_text.strip():
48
- return None, "Error: Please enter Tamil text as input."
49
 
50
- # Translation
51
- try:
52
- tokenizer.src_lang = "ta"
53
- encoded = tokenizer(tamil_text, return_tensors="pt")
54
- generated_tokens = translator.generate(
55
- **encoded, forced_bos_token_id=tokenizer.get_lang_id("en")
56
- )
57
- translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
58
- except Exception as e:
59
- return None, f"Translation error: {e}"
60
 
61
- # Text generation with GPT-2
62
- try:
63
- gen = text_generator(translation, max_length=50, num_return_sequences=1)
64
- gen_text = gen[0]['generated_text'] if isinstance(gen, list) else gen['generated_text']
65
- except Exception as e:
66
- return None, f"Text generation error: {e}"
67
 
68
- # Image generation with Stable Diffusion
69
- try:
70
- # Use the generated text as prompt
71
- prompt = gen_text
72
- if device == "cuda":
73
- image = pipe(prompt, num_inference_steps=50).images[0]
74
- else:
75
- # On CPU, reduce steps to speed up if needed
76
- image = pipe(prompt, num_inference_steps=25).images[0]
77
- except Exception as e:
78
- return None, f"Image generation error: {e}"
79
-
80
- info = f"Translated → English: {translation}\nGPT-2 Prompt: {prompt}"
81
- return image, info
82
-
83
- # Build Gradio interface
84
- iface = gr.Interface(
85
- fn=tamil_to_image,
86
- inputs=gr.Textbox(label="Tamil Input", placeholder="Enter Tamil text here", type="text"),
87
  outputs=[
88
- gr.Image(type="pil", label="Generated Image"),
89
- gr.Textbox(label="Output Info", type="text")
 
90
  ],
91
- title="Tamil Text-to-Image Generator",
92
- description="Enter Tamil text; this demo translates it to English, generates a story prompt with GPT-2, then creates an image with Stable Diffusion."
93
  )
94
 
95
- # Launch the app (in Spaces this will run on startup)
96
- iface.launch()
 
 
1
  import gradio as gr
2
+ from transformers import pipeline
 
 
 
3
 
4
+ # Load models
5
+ translation_pipeline = pipeline("translation", model="Helsinki-NLP/opus-mt-ta-en")
6
+ text_generation_pipeline = pipeline("text-generation", model="gpt2")
 
 
 
 
 
7
 
8
+ # Simulated image generation (replace with Hugging Face Diffusers or similar if needed)
9
+ def generate_image(prompt: str) -> str:
10
+ # You can integrate actual image generation here
11
+ return f"https://via.placeholder.com/512?text={prompt.replace(' ', '+')}"
 
 
12
 
13
+ # Main function
14
+ def multimodal_pipeline(tamil_text: str):
15
+ # Step 1: Translate Tamil to English
16
+ translated = translation_pipeline(tamil_text)[0]["translation_text"]
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ # Step 2: Generate English text
19
+ generated = text_generation_pipeline(translated, max_length=50, do_sample=True)[0]["generated_text"]
 
 
 
 
 
 
20
 
21
+ # Step 3: Generate Image (simulate)
22
+ image_url = generate_image(generated)
 
 
 
 
 
 
 
 
23
 
24
+ return translated, generated, image_url
 
 
 
 
 
25
 
26
+ # Gradio Interface
27
+ interface = gr.Interface(
28
+ fn=multimodal_pipeline,
29
+ inputs=gr.Textbox(label="Enter Tamil Text", placeholder="உங்கள் தமிழ் உரையை இங்கே உள்ளிடவும்"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  outputs=[
31
+ gr.Textbox(label="English Translation"),
32
+ gr.Textbox(label="Generated Prompt"),
33
+ gr.Image(label="Generated Image"),
34
  ],
35
+ title="Tamil to Image Multimodal App",
36
+ description="This app translates Tamil to English, generates a descriptive sentence, and creates an image based on it."
37
  )
38
 
39
+ if __name__ == "__main__":
40
+ interface.launch()