BurhaanZargar commited on
Commit
33d56ec
Β·
1 Parent(s): 4399d89
Files changed (2) hide show
  1. app.py +30 -16
  2. requirements.txt +3 -1
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
@@ -7,10 +8,8 @@ from datetime import datetime
7
  import tempfile
8
  from gtts import gTTS
9
  import os
10
- import logging
11
 
12
- # Configure logging
13
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
14
 
15
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
16
 
@@ -114,31 +113,38 @@ def transcribe_audio(audio_path):
114
  try:
115
  if not audio_path:
116
  return None, "No audio file provided"
117
-
118
- # Convert to mono if needed
119
- if isinstance(audio_path, tuple):
120
- audio_path = audio_path[1] # Gradio returns (sample_rate, audio_data)
121
-
122
- transcription = asr(audio_path)["text"]
 
 
123
  return transcription, None
124
  except Exception as e:
125
  logging.error("STT error: %s", e)
126
  return None, f"⚠️ Transcription failed: {str(e)}"
127
 
 
 
 
 
 
128
  # --- Handle audio translation ---
129
  def handle_audio_translation(audio_path, direction):
130
  if direction != "en_to_ks":
131
- return "⚠️ Audio input is only supported for English to Kashmiri.", "", "", None
132
 
133
  transcription, error = transcribe_audio(audio_path)
134
  if error:
135
- return error, "", "", None
136
 
137
  translated, _ = translate(transcription, direction, generate_tts=False)
138
- return "", transcription, translated, None
139
 
140
  # --- Switch UI direction ---
141
- def switch_direction(direction, input_text_val, output_text_val):
142
  new_direction = "ks_to_en" if direction == "en_to_ks" else "en_to_ks"
143
  input_label = "Kashmiri Text" if new_direction == "ks_to_en" else "English Text"
144
  output_label = "English Translation" if new_direction == "ks_to_en" else "Kashmiri Translation"
@@ -154,6 +160,7 @@ with gr.Blocks() as interface:
154
  </div>
155
  """)
156
  translation_direction = gr.State(value="en_to_ks")
 
157
 
158
  with gr.Row():
159
  input_text = gr.Textbox(label="English Text", placeholder="Enter text here...", lines=2)
@@ -175,6 +182,13 @@ with gr.Blocks() as interface:
175
  stt_button = gr.Button("🎀 Transcribe & Translate (EN β†’ KS)")
176
  tts_button = gr.Button("πŸ”Š Translate & Speak (KS β†’ EN)")
177
 
 
 
 
 
 
 
 
178
  # Events
179
  translate_button.click(
180
  fn=translate,
@@ -200,14 +214,14 @@ with gr.Blocks() as interface:
200
 
201
  switch_button.click(
202
  fn=switch_direction,
203
- inputs=[translation_direction, input_text, output_text],
204
  outputs=[translation_direction, input_text, output_text, audio_output]
205
  )
206
 
207
  stt_button.click(
208
  fn=handle_audio_translation,
209
- inputs=[audio_input, translation_direction],
210
- outputs=[save_status, input_text, output_text, audio_output]
211
  )
212
 
213
  if __name__ == "__main__":
 
1
+ #app.py
2
  import gradio as gr
3
  import torch
4
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
 
8
  import tempfile
9
  from gtts import gTTS
10
  import os
11
+ import shutil
12
 
 
 
13
 
14
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
15
 
 
113
  try:
114
  if not audio_path:
115
  return None, "No audio file provided"
116
+
117
+ # Create a persistent copy of the audio file
118
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
119
+ temp_path = f.name
120
+ shutil.copy(audio_path, temp_path)
121
+
122
+ transcription = asr(temp_path)["text"]
123
+ os.unlink(temp_path) # Clean up temporary file
124
  return transcription, None
125
  except Exception as e:
126
  logging.error("STT error: %s", e)
127
  return None, f"⚠️ Transcription failed: {str(e)}"
128
 
129
+ # --- Store audio file path ---
130
+ def store_audio(audio_path):
131
+ """Store audio path in state and return it to keep it visible"""
132
+ return audio_path
133
+
134
  # --- Handle audio translation ---
135
  def handle_audio_translation(audio_path, direction):
136
  if direction != "en_to_ks":
137
+ return "⚠️ Audio input is only supported for English to Kashmiri.", "", "", audio_path
138
 
139
  transcription, error = transcribe_audio(audio_path)
140
  if error:
141
+ return error, "", "", audio_path
142
 
143
  translated, _ = translate(transcription, direction, generate_tts=False)
144
+ return "", transcription, translated, audio_path
145
 
146
  # --- Switch UI direction ---
147
+ def switch_direction(direction, input_text_val, output_text_val, audio_path):
148
  new_direction = "ks_to_en" if direction == "en_to_ks" else "en_to_ks"
149
  input_label = "Kashmiri Text" if new_direction == "ks_to_en" else "English Text"
150
  output_label = "English Translation" if new_direction == "ks_to_en" else "Kashmiri Translation"
 
160
  </div>
161
  """)
162
  translation_direction = gr.State(value="en_to_ks")
163
+ stored_audio = gr.State()
164
 
165
  with gr.Row():
166
  input_text = gr.Textbox(label="English Text", placeholder="Enter text here...", lines=2)
 
182
  stt_button = gr.Button("🎀 Transcribe & Translate (EN β†’ KS)")
183
  tts_button = gr.Button("πŸ”Š Translate & Speak (KS β†’ EN)")
184
 
185
+ # Store audio when recorded
186
+ audio_input.change(
187
+ fn=store_audio,
188
+ inputs=audio_input,
189
+ outputs=stored_audio
190
+ )
191
+
192
  # Events
193
  translate_button.click(
194
  fn=translate,
 
214
 
215
  switch_button.click(
216
  fn=switch_direction,
217
+ inputs=[translation_direction, input_text, output_text, stored_audio],
218
  outputs=[translation_direction, input_text, output_text, audio_output]
219
  )
220
 
221
  stt_button.click(
222
  fn=handle_audio_translation,
223
+ inputs=[stored_audio, translation_direction],
224
+ outputs=[save_status, input_text, output_text, audio_input]
225
  )
226
 
227
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -8,4 +8,6 @@ gTTS
8
  pydub
9
  ffmpeg-python
10
  soundfile
11
- accelerate
 
 
 
8
  pydub
9
  ffmpeg-python
10
  soundfile
11
+ accelerate
12
+ os
13
+ shutil