BurhaanZargar commited on
Commit
0cd73e9
Β·
1 Parent(s): 7304960
Files changed (1) hide show
  1. app.py +46 -31
app.py CHANGED
@@ -7,7 +7,7 @@ import requests
7
  from datetime import datetime
8
  import tempfile
9
  from gtts import gTTS
10
- import os
11
 
12
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
13
 
@@ -45,7 +45,7 @@ def save_to_supabase(input_text, output_text, direction):
45
  response = requests.post(f"{SUPABASE_URL}/rest/v1/{table}", json=payload, headers=headers)
46
  return "βœ… Saved successfully!" if response.status_code == 201 else "❌ Failed to save."
47
  except Exception as e:
48
- print("Save error:", e)
49
  return "❌ Save error."
50
 
51
  def get_translation_history(direction):
@@ -62,11 +62,11 @@ def get_translation_history(direction):
62
  return "\n\n".join([f"Input: {r['input_text']} β†’ Output: {r['output_text']}" for r in data])
63
  return "Failed to load history."
64
  except Exception as e:
65
- print("History error:", e)
66
  return "Error loading history."
67
 
68
- # --- Translation ---
69
- def translate(text, direction):
70
  if not text.strip():
71
  return "Enter some text.", None
72
 
@@ -84,9 +84,15 @@ def translate(text, direction):
84
  output = model.generate(**tokens, max_length=256, num_beams=5)
85
  result = tokenizer.batch_decode(output, skip_special_tokens=True)
86
  final = ip.postprocess_batch(result, lang=tgt_lang)[0]
87
- return final, None
 
 
 
 
 
 
88
  except Exception as e:
89
- print("Translation error:", e)
90
  return "⚠️ Translation failed.", None
91
 
92
  # --- TTS for English output ---
@@ -97,28 +103,36 @@ def synthesize_tts(text):
97
  tts.save(f.name)
98
  return f.name
99
  except Exception as e:
100
- print("TTS error:", e)
101
  return None
102
 
103
- # --- STT only for en_to_ks ---
104
- def generate_stt_for_input(audio_path, direction):
105
- if direction != "en_to_ks":
106
- return "⚠️ Audio input is only supported for English to Kashmiri.", "", None
107
-
108
  try:
 
 
 
 
 
 
 
109
  transcription = asr(audio_path)["text"]
 
110
  except Exception as e:
111
- print("STT error:", e)
112
- return "⚠️ Transcription failed.", "", None
113
 
114
- translated, _ = translate(transcription, direction)
115
- return transcription, translated, None
 
 
116
 
117
- # --- Generate TTS for English output ---
118
- def generate_tts_for_output(output_text, direction):
119
- if direction == "ks_to_en" and output_text.strip():
120
- return synthesize_tts(output_text)
121
- return None
 
122
 
123
  # --- Switch UI direction ---
124
  def switch_direction(direction, input_text_val, output_text_val):
@@ -145,23 +159,24 @@ with gr.Blocks() as interface:
145
  history = gr.Textbox(label="Translation History", lines=8, interactive=False)
146
 
147
  with gr.Row():
148
- audio_input = gr.Audio(type="filepath", label="πŸŽ™οΈ Record English audio")
149
  audio_output = gr.Audio(label="πŸ”Š English TTS", interactive=False)
150
 
151
- stt_button = gr.Button("🎀 Transcribe & Translate (EN β†’ KS Only)")
152
- tts_button = gr.Button("πŸ”Š Generate English Speech (KS β†’ EN Only)")
 
153
 
154
  # Events
155
  translate_button.click(
156
  fn=translate,
157
- inputs=[input_text, translation_direction],
158
  outputs=[output_text, audio_output]
159
  )
160
 
161
  tts_button.click(
162
- fn=generate_tts_for_output,
163
- inputs=[output_text, translation_direction],
164
- outputs=audio_output
165
  )
166
 
167
  save_button.click(
@@ -181,9 +196,9 @@ with gr.Blocks() as interface:
181
  )
182
 
183
  stt_button.click(
184
- fn=generate_stt_for_input,
185
  inputs=[audio_input, translation_direction],
186
- outputs=[input_text, output_text, audio_output]
187
  )
188
 
189
  if __name__ == "__main__":
 
7
  from datetime import datetime
8
  import tempfile
9
  from gtts import gTTS
10
+
11
 
12
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
13
 
 
45
  response = requests.post(f"{SUPABASE_URL}/rest/v1/{table}", json=payload, headers=headers)
46
  return "βœ… Saved successfully!" if response.status_code == 201 else "❌ Failed to save."
47
  except Exception as e:
48
+ logging.error("Save error: %s", e)
49
  return "❌ Save error."
50
 
51
  def get_translation_history(direction):
 
62
  return "\n\n".join([f"Input: {r['input_text']} β†’ Output: {r['output_text']}" for r in data])
63
  return "Failed to load history."
64
  except Exception as e:
65
+ logging.error("History error: %s", e)
66
  return "Error loading history."
67
 
68
+ # --- Translation with TTS integration ---
69
+ def translate(text, direction, generate_tts=False):
70
  if not text.strip():
71
  return "Enter some text.", None
72
 
 
84
  output = model.generate(**tokens, max_length=256, num_beams=5)
85
  result = tokenizer.batch_decode(output, skip_special_tokens=True)
86
  final = ip.postprocess_batch(result, lang=tgt_lang)[0]
87
+
88
+ # Generate TTS for KS→EN direction if requested
89
+ audio_path = None
90
+ if generate_tts and direction == "ks_to_en":
91
+ audio_path = synthesize_tts(final)
92
+
93
+ return final, audio_path
94
  except Exception as e:
95
+ logging.error("Translation error: %s", e)
96
  return "⚠️ Translation failed.", None
97
 
98
  # --- TTS for English output ---
 
103
  tts.save(f.name)
104
  return f.name
105
  except Exception as e:
106
+ logging.error("TTS error: %s", e)
107
  return None
108
 
109
+ # --- STT for English audio ---
110
+ def transcribe_audio(audio_path):
 
 
 
111
  try:
112
+ if not audio_path:
113
+ return None, "No audio file provided"
114
+
115
+ # Convert to mono if needed
116
+ if isinstance(audio_path, tuple):
117
+ audio_path = audio_path[1] # Gradio returns (sample_rate, audio_data)
118
+
119
  transcription = asr(audio_path)["text"]
120
+ return transcription, None
121
  except Exception as e:
122
+ logging.error("STT error: %s", e)
123
+ return None, f"⚠️ Transcription failed: {str(e)}"
124
 
125
+ # --- Handle audio translation ---
126
+ def handle_audio_translation(audio_path, direction):
127
+ if direction != "en_to_ks":
128
+ return "⚠️ Audio input is only supported for English to Kashmiri.", "", None
129
 
130
+ transcription, error = transcribe_audio(audio_path)
131
+ if error:
132
+ return error, "", None
133
+
134
+ translated, _ = translate(transcription, direction)
135
+ return "", transcription, translated
136
 
137
  # --- Switch UI direction ---
138
  def switch_direction(direction, input_text_val, output_text_val):
 
159
  history = gr.Textbox(label="Translation History", lines=8, interactive=False)
160
 
161
  with gr.Row():
162
+ audio_input = gr.Audio(type="filepath", label="πŸŽ™οΈ Record English audio", sources=["microphone"])
163
  audio_output = gr.Audio(label="πŸ”Š English TTS", interactive=False)
164
 
165
+ with gr.Row():
166
+ stt_button = gr.Button("🎀 Transcribe & Translate (EN β†’ KS)")
167
+ tts_button = gr.Button("πŸ”Š Translate & Speak (KS β†’ EN)")
168
 
169
  # Events
170
  translate_button.click(
171
  fn=translate,
172
+ inputs=[input_text, translation_direction, gr.State(False)],
173
  outputs=[output_text, audio_output]
174
  )
175
 
176
  tts_button.click(
177
+ fn=translate,
178
+ inputs=[input_text, translation_direction, gr.State(True)],
179
+ outputs=[output_text, audio_output]
180
  )
181
 
182
  save_button.click(
 
196
  )
197
 
198
  stt_button.click(
199
+ fn=handle_audio_translation,
200
  inputs=[audio_input, translation_direction],
201
+ outputs=[save_status, input_text, output_text]
202
  )
203
 
204
  if __name__ == "__main__":