Spaces:

24Sureshkumar
/

Tam_to_Eng_Translation_and_Image_Generation_Model

Running

App Files Files Community

24Sureshkumar commited on Jun 23

Commit

87e851b

verified ·

1 Parent(s): 016b5dd

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -84

app.py CHANGED Viewed

@@ -1,96 +1,40 @@
-import os
 import gradio as gr
-import torch
-from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer, pipeline
-from diffusers import StableDiffusionPipeline
-from PIL import Image
-# Load translation model/tokenizer (Tamil→English)
-try:
-    translator = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
-    tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
-    tokenizer.src_lang = "ta"
-except Exception as e:
-    print(f"Error loading M2M100 model: {e}")
-    translator = tokenizer = None
-# Load GPT-2 text generation pipeline
-try:
-    text_generator = pipeline("text-generation", model="gpt2")
-except Exception as e:
-    print(f"Error loading GPT-2 model: {e}")
-    text_generator = None
-# Load Stable Diffusion pipeline
-hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
-device = "cuda" if torch.cuda.is_available() else "cpu"
-try:
-    pipe = StableDiffusionPipeline.from_pretrained(
-        "runwayml/stable-diffusion-v1-5",
-        torch_dtype=torch.float16 if device=="cuda" else torch.float32,
-        use_auth_token=hf_token
-    )
-    pipe = pipe.to(device)
-    # Optionally enable efficient attention slicing if on GPU to save memory
-    if device == "cuda":
-        pipe.enable_attention_slicing()
-except Exception as e:
-    print(f"Error loading Stable Diffusion pipeline: {e}")
-    pipe = None
-def tamil_to_image(tamil_text):
-    """
-    Translate Tamil text to English, generate new text with GPT-2,
-    and produce an image with Stable Diffusion.
-    Returns (PIL.Image, info_text).
-    """
-    if not tamil_text or not tamil_text.strip():
-        return None, "Error: Please enter Tamil text as input."
-    # Translation
-    try:
-        tokenizer.src_lang = "ta"
-        encoded = tokenizer(tamil_text, return_tensors="pt")
-        generated_tokens = translator.generate(
-            **encoded, forced_bos_token_id=tokenizer.get_lang_id("en")
-        )
-        translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
-    except Exception as e:
-        return None, f"Translation error: {e}"
-    # Text generation with GPT-2
-    try:
-        gen = text_generator(translation, max_length=50, num_return_sequences=1)
-        gen_text = gen[0]['generated_text'] if isinstance(gen, list) else gen['generated_text']
-    except Exception as e:
-        return None, f"Text generation error: {e}"
-    # Image generation with Stable Diffusion
-    try:
-        # Use the generated text as prompt
-        prompt = gen_text
-        if device == "cuda":
-            image = pipe(prompt, num_inference_steps=50).images[0]
-        else:
-            # On CPU, reduce steps to speed up if needed
-            image = pipe(prompt, num_inference_steps=25).images[0]
-    except Exception as e:
-        return None, f"Image generation error: {e}"
-    info = f"Translated → English: {translation}\nGPT-2 Prompt: {prompt}"
-    return image, info
-# Build Gradio interface
-iface = gr.Interface(
-    fn=tamil_to_image,
-    inputs=gr.Textbox(label="Tamil Input", placeholder="Enter Tamil text here", type="text"),
     outputs=[
-        gr.Image(type="pil", label="Generated Image"),
-        gr.Textbox(label="Output Info", type="text")
     ],
-    title="Tamil Text-to-Image Generator",
-    description="Enter Tamil text; this demo translates it to English, generates a story prompt with GPT-2, then creates an image with Stable Diffusion."
 )
-# Launch the app (in Spaces this will run on startup)
-iface.launch()

 import gradio as gr
+from transformers import pipeline
+# Load models
+translation_pipeline = pipeline("translation", model="Helsinki-NLP/opus-mt-ta-en")
+text_generation_pipeline = pipeline("text-generation", model="gpt2")
+# Simulated image generation (replace with Hugging Face Diffusers or similar if needed)
+def generate_image(prompt: str) -> str:
+    # You can integrate actual image generation here
+    return f"https://via.placeholder.com/512?text={prompt.replace(' ', '+')}"
+# Main function
+def multimodal_pipeline(tamil_text: str):
+    # Step 1: Translate Tamil to English
+    translated = translation_pipeline(tamil_text)[0]["translation_text"]
+    # Step 2: Generate English text
+    generated = text_generation_pipeline(translated, max_length=50, do_sample=True)[0]["generated_text"]
+    # Step 3: Generate Image (simulate)
+    image_url = generate_image(generated)
+    return translated, generated, image_url
+# Gradio Interface
+interface = gr.Interface(
+    fn=multimodal_pipeline,
+    inputs=gr.Textbox(label="Enter Tamil Text", placeholder="உங்கள் தமிழ் உரையை இங்கே உள்ளிடவும்"),
     outputs=[
+        gr.Textbox(label="English Translation"),
+        gr.Textbox(label="Generated Prompt"),
+        gr.Image(label="Generated Image"),
     ],
+    title="Tamil to Image Multimodal App",
+    description="This app translates Tamil to English, generates a descriptive sentence, and creates an image based on it."
 )
+if __name__ == "__main__":
+    interface.launch()