Spaces:

24Sureshkumar
/

Tam_to_Eng_Translation_and_Image_Generation_Model

Running

App Files Files Community

24Sureshkumar commited on Jul 4

Commit

f67d206

verified ·

1 Parent(s): cbc840b

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -13

app.py CHANGED Viewed

@@ -8,6 +8,8 @@ from PIL import Image
 import tempfile
 import os
 import time
 # Use CUDA if available
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -21,13 +23,16 @@ translator_tokenizer.src_lang = "ta_IN"
 gen_model = AutoModelForCausalLM.from_pretrained("gpt2").to(device)
 gen_tokenizer = AutoTokenizer.from_pretrained("gpt2")
-# Load a lightweight image generation model (for CPU)
 pipe = StableDiffusionPipeline.from_pretrained(
     "OFA-Sys/small-stable-diffusion-v0",
     torch_dtype=torch.float32,
-    use_auth_token=os.getenv("HF_TOKEN")  # Set this in Hugging Face Space secrets
 ).to(device)
-pipe.safety_checker = None  # Optional: disable safety checker for speed
 # Translation Function
 def translate_tamil_to_english(text, reference=None):
@@ -48,7 +53,7 @@ def translate_tamil_to_english(text, reference=None):
     return translated, duration, rouge_l
-# Creative Text Generator
 def generate_creative_text(prompt, max_length=100):
     start = time.time()
     input_ids = gen_tokenizer.encode(prompt, return_tensors="pt").to(device)
@@ -59,26 +64,45 @@ def generate_creative_text(prompt, max_length=100):
     tokens = text.split()
     repetition_rate = sum(t1 == t2 for t1, t2 in zip(tokens, tokens[1:])) / len(tokens)
-    return text, duration, len(tokens), round(repetition_rate, 4)
-# AI Image Generator
 def generate_image(prompt):
     try:
         start = time.time()
         result = pipe(prompt)
         image = result.images[0].resize((256, 256))
         tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
         image.save(tmp_file.name)
-        return tmp_file.name, round(time.time() - start, 2)
     except Exception as e:
-        return None, f"Image generation failed: {str(e)}"
 # Streamlit UI
 st.set_page_config(page_title="Tamil → English + AI Art", layout="centered")
 st.title("🧠 Tamil → English + 🎨 Creative Text + AI Image")
 tamil_input = st.text_area("✍️ Enter Tamil text here", height=150)
-reference_input = st.text_input("📘 Optional: Reference English translation for ROUGE")
 if st.button("🚀 Generate Output"):
     if not tamil_input.strip():
@@ -95,20 +119,24 @@ if st.button("🚀 Generate Output"):
             st.info("ℹ️ ROUGE-L not calculated. Reference not provided.")
         with st.spinner("🎨 Generating image..."):
-            image_path, img_time = generate_image(english_text)
         if image_path:
             st.success(f"🖼️ Image generated in {img_time} seconds")
             st.image(Image.open(image_path), caption="AI-Generated Image", use_column_width=True)
         else:
             st.error(image_path)
         with st.spinner("💡 Generating creative text..."):
-            creative, c_time, tokens, rep_rate = generate_creative_text(english_text)
         st.success(f"✨ Creative text generated in {c_time} seconds")
-        st.markdown(f"**🧠 Creative Output:** `{creative}`")
-        st.markdown(f"📌 Tokens: `{tokens}`, Repetition Rate: `{rep_rate}`")
 st.markdown("---")
 st.caption("Built by Sureshkumar R using MBart, GPT-2 & Stable Diffusion on Hugging Face")

 import tempfile
 import os
 import time
+import clip
+import torchvision.transforms as transforms
 # Use CUDA if available
 device = "cuda" if torch.cuda.is_available() else "cpu"
 gen_model = AutoModelForCausalLM.from_pretrained("gpt2").to(device)
 gen_tokenizer = AutoTokenizer.from_pretrained("gpt2")
+# Load a lightweight image generation model
 pipe = StableDiffusionPipeline.from_pretrained(
     "OFA-Sys/small-stable-diffusion-v0",
     torch_dtype=torch.float32,
+    use_auth_token=os.getenv("HF_TOKEN")  # Set in Hugging Face Space secrets
 ).to(device)
+pipe.safety_checker = None  # Optional: disable for speed
+# Load CLIP model for image-text similarity
+clip_model, clip_preprocess = clip.load("ViT-B/32", device=device)
 # Translation Function
 def translate_tamil_to_english(text, reference=None):
     return translated, duration, rouge_l
+# Creative Text Generator with Perplexity
 def generate_creative_text(prompt, max_length=100):
     start = time.time()
     input_ids = gen_tokenizer.encode(prompt, return_tensors="pt").to(device)
     tokens = text.split()
     repetition_rate = sum(t1 == t2 for t1, t2 in zip(tokens, tokens[1:])) / len(tokens)
+    # Perplexity calculation
+    with torch.no_grad():
+        input_ids = gen_tokenizer.encode(text, return_tensors="pt").to(device)
+        outputs = gen_model(input_ids, labels=input_ids)
+        loss = outputs.loss
+        perplexity = torch.exp(loss).item()
+    return text, duration, len(tokens), round(repetition_rate, 4), round(perplexity, 4)
+# AI Image Generator with CLIP Similarity
 def generate_image(prompt):
     try:
         start = time.time()
         result = pipe(prompt)
         image = result.images[0].resize((256, 256))
         tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
         image.save(tmp_file.name)
+        # CLIP similarity
+        image_input = clip_preprocess(image).unsqueeze(0).to(device)
+        text_input = clip.tokenize([prompt]).to(device)
+        with torch.no_grad():
+            image_features = clip_model.encode_image(image_input)
+            text_features = clip_model.encode_text(text_input)
+            similarity = torch.cosine_similarity(image_features, text_features).item()
+        return tmp_file.name, round(time.time() - start, 2), round(similarity, 4)
     except Exception as e:
+        return None, f"Image generation failed: {str(e)}", None
 # Streamlit UI
 st.set_page_config(page_title="Tamil → English + AI Art", layout="centered")
 st.title("🧠 Tamil → English + 🎨 Creative Text + AI Image")
 tamil_input = st.text_area("✍️ Enter Tamil text here", height=150)
+reference_input = st.text_input("📘 Optional: Reference English translation for ROUGE-L")
 if st.button("🚀 Generate Output"):
     if not tamil_input.strip():
             st.info("ℹ️ ROUGE-L not calculated. Reference not provided.")
         with st.spinner("🎨 Generating image..."):
+            image_path, img_time, clip_score = generate_image(english_text)
         if image_path:
             st.success(f"🖼️ Image generated in {img_time} seconds")
             st.image(Image.open(image_path), caption="AI-Generated Image", use_column_width=True)
+            st.markdown(f"🔍 **CLIP Text-Image Similarity:** `{clip_score}`")
         else:
             st.error(image_path)
         with st.spinner("💡 Generating creative text..."):
+            creative, c_time, tokens, rep_rate, perplexity = generate_creative_text(english_text)
         st.success(f"✨ Creative text generated in {c_time} seconds")
+        st.markdown("**🧠 Creative Output:**")
+        st.text(creative)
+        st.markdown(f"📌 Tokens: `{tokens}`")
+        st.markdown(f"🔁 Repetition Rate: `{rep_rate}`")
+        st.markdown(f"📉 Perplexity: `{perplexity}`")
 st.markdown("---")
 st.caption("Built by Sureshkumar R using MBart, GPT-2 & Stable Diffusion on Hugging Face")