24Sureshkumar commited on
Commit
f67d206
Β·
verified Β·
1 Parent(s): cbc840b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -13
app.py CHANGED
@@ -8,6 +8,8 @@ from PIL import Image
8
  import tempfile
9
  import os
10
  import time
 
 
11
 
12
  # Use CUDA if available
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -21,13 +23,16 @@ translator_tokenizer.src_lang = "ta_IN"
21
  gen_model = AutoModelForCausalLM.from_pretrained("gpt2").to(device)
22
  gen_tokenizer = AutoTokenizer.from_pretrained("gpt2")
23
 
24
- # Load a lightweight image generation model (for CPU)
25
  pipe = StableDiffusionPipeline.from_pretrained(
26
  "OFA-Sys/small-stable-diffusion-v0",
27
  torch_dtype=torch.float32,
28
- use_auth_token=os.getenv("HF_TOKEN") # Set this in Hugging Face Space secrets
29
  ).to(device)
30
- pipe.safety_checker = None # Optional: disable safety checker for speed
 
 
 
31
 
32
  # Translation Function
33
  def translate_tamil_to_english(text, reference=None):
@@ -48,7 +53,7 @@ def translate_tamil_to_english(text, reference=None):
48
 
49
  return translated, duration, rouge_l
50
 
51
- # Creative Text Generator
52
  def generate_creative_text(prompt, max_length=100):
53
  start = time.time()
54
  input_ids = gen_tokenizer.encode(prompt, return_tensors="pt").to(device)
@@ -59,26 +64,45 @@ def generate_creative_text(prompt, max_length=100):
59
  tokens = text.split()
60
  repetition_rate = sum(t1 == t2 for t1, t2 in zip(tokens, tokens[1:])) / len(tokens)
61
 
62
- return text, duration, len(tokens), round(repetition_rate, 4)
 
 
 
 
 
 
 
63
 
64
- # AI Image Generator
65
  def generate_image(prompt):
66
  try:
67
  start = time.time()
68
  result = pipe(prompt)
69
  image = result.images[0].resize((256, 256))
 
70
  tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
71
  image.save(tmp_file.name)
72
- return tmp_file.name, round(time.time() - start, 2)
 
 
 
 
 
 
 
 
 
 
 
73
  except Exception as e:
74
- return None, f"Image generation failed: {str(e)}"
75
 
76
  # Streamlit UI
77
  st.set_page_config(page_title="Tamil β†’ English + AI Art", layout="centered")
78
  st.title("🧠 Tamil β†’ English + 🎨 Creative Text + AI Image")
79
 
80
  tamil_input = st.text_area("✍️ Enter Tamil text here", height=150)
81
- reference_input = st.text_input("πŸ“˜ Optional: Reference English translation for ROUGE")
82
 
83
  if st.button("πŸš€ Generate Output"):
84
  if not tamil_input.strip():
@@ -95,20 +119,24 @@ if st.button("πŸš€ Generate Output"):
95
  st.info("ℹ️ ROUGE-L not calculated. Reference not provided.")
96
 
97
  with st.spinner("🎨 Generating image..."):
98
- image_path, img_time = generate_image(english_text)
99
 
100
  if image_path:
101
  st.success(f"πŸ–ΌοΈ Image generated in {img_time} seconds")
102
  st.image(Image.open(image_path), caption="AI-Generated Image", use_column_width=True)
 
103
  else:
104
  st.error(image_path)
105
 
106
  with st.spinner("πŸ’‘ Generating creative text..."):
107
- creative, c_time, tokens, rep_rate = generate_creative_text(english_text)
108
 
109
  st.success(f"✨ Creative text generated in {c_time} seconds")
110
- st.markdown(f"**🧠 Creative Output:** `{creative}`")
111
- st.markdown(f"πŸ“Œ Tokens: `{tokens}`, Repetition Rate: `{rep_rate}`")
 
 
 
112
 
113
  st.markdown("---")
114
  st.caption("Built by Sureshkumar R using MBart, GPT-2 & Stable Diffusion on Hugging Face")
 
8
  import tempfile
9
  import os
10
  import time
11
+ import clip
12
+ import torchvision.transforms as transforms
13
 
14
  # Use CUDA if available
15
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
23
  gen_model = AutoModelForCausalLM.from_pretrained("gpt2").to(device)
24
  gen_tokenizer = AutoTokenizer.from_pretrained("gpt2")
25
 
26
+ # Load a lightweight image generation model
27
  pipe = StableDiffusionPipeline.from_pretrained(
28
  "OFA-Sys/small-stable-diffusion-v0",
29
  torch_dtype=torch.float32,
30
+ use_auth_token=os.getenv("HF_TOKEN") # Set in Hugging Face Space secrets
31
  ).to(device)
32
+ pipe.safety_checker = None # Optional: disable for speed
33
+
34
+ # Load CLIP model for image-text similarity
35
+ clip_model, clip_preprocess = clip.load("ViT-B/32", device=device)
36
 
37
  # Translation Function
38
  def translate_tamil_to_english(text, reference=None):
 
53
 
54
  return translated, duration, rouge_l
55
 
56
+ # Creative Text Generator with Perplexity
57
  def generate_creative_text(prompt, max_length=100):
58
  start = time.time()
59
  input_ids = gen_tokenizer.encode(prompt, return_tensors="pt").to(device)
 
64
  tokens = text.split()
65
  repetition_rate = sum(t1 == t2 for t1, t2 in zip(tokens, tokens[1:])) / len(tokens)
66
 
67
+ # Perplexity calculation
68
+ with torch.no_grad():
69
+ input_ids = gen_tokenizer.encode(text, return_tensors="pt").to(device)
70
+ outputs = gen_model(input_ids, labels=input_ids)
71
+ loss = outputs.loss
72
+ perplexity = torch.exp(loss).item()
73
+
74
+ return text, duration, len(tokens), round(repetition_rate, 4), round(perplexity, 4)
75
 
76
+ # AI Image Generator with CLIP Similarity
77
  def generate_image(prompt):
78
  try:
79
  start = time.time()
80
  result = pipe(prompt)
81
  image = result.images[0].resize((256, 256))
82
+
83
  tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
84
  image.save(tmp_file.name)
85
+
86
+ # CLIP similarity
87
+ image_input = clip_preprocess(image).unsqueeze(0).to(device)
88
+ text_input = clip.tokenize([prompt]).to(device)
89
+
90
+ with torch.no_grad():
91
+ image_features = clip_model.encode_image(image_input)
92
+ text_features = clip_model.encode_text(text_input)
93
+ similarity = torch.cosine_similarity(image_features, text_features).item()
94
+
95
+ return tmp_file.name, round(time.time() - start, 2), round(similarity, 4)
96
+
97
  except Exception as e:
98
+ return None, f"Image generation failed: {str(e)}", None
99
 
100
  # Streamlit UI
101
  st.set_page_config(page_title="Tamil β†’ English + AI Art", layout="centered")
102
  st.title("🧠 Tamil β†’ English + 🎨 Creative Text + AI Image")
103
 
104
  tamil_input = st.text_area("✍️ Enter Tamil text here", height=150)
105
+ reference_input = st.text_input("πŸ“˜ Optional: Reference English translation for ROUGE-L")
106
 
107
  if st.button("πŸš€ Generate Output"):
108
  if not tamil_input.strip():
 
119
  st.info("ℹ️ ROUGE-L not calculated. Reference not provided.")
120
 
121
  with st.spinner("🎨 Generating image..."):
122
+ image_path, img_time, clip_score = generate_image(english_text)
123
 
124
  if image_path:
125
  st.success(f"πŸ–ΌοΈ Image generated in {img_time} seconds")
126
  st.image(Image.open(image_path), caption="AI-Generated Image", use_column_width=True)
127
+ st.markdown(f"πŸ” **CLIP Text-Image Similarity:** `{clip_score}`")
128
  else:
129
  st.error(image_path)
130
 
131
  with st.spinner("πŸ’‘ Generating creative text..."):
132
+ creative, c_time, tokens, rep_rate, perplexity = generate_creative_text(english_text)
133
 
134
  st.success(f"✨ Creative text generated in {c_time} seconds")
135
+ st.markdown("**🧠 Creative Output:**")
136
+ st.text(creative)
137
+ st.markdown(f"πŸ“Œ Tokens: `{tokens}`")
138
+ st.markdown(f"πŸ” Repetition Rate: `{rep_rate}`")
139
+ st.markdown(f"πŸ“‰ Perplexity: `{perplexity}`")
140
 
141
  st.markdown("---")
142
  st.caption("Built by Sureshkumar R using MBart, GPT-2 & Stable Diffusion on Hugging Face")