BurhaanZargar commited on
Commit
85071eb
Β·
1 Parent(s): d00c1a7
Files changed (2) hide show
  1. app.py +34 -32
  2. requirements.txt +4 -0
app.py CHANGED
@@ -1,42 +1,35 @@
1
  import gradio as gr
2
- print("Gradio version:", gr.__version__)
3
-
4
  import torch
5
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
6
  from IndicTransToolkit.processor import IndicProcessor
7
- import gradio as gr
8
  import requests
9
  from datetime import datetime
10
  import tempfile
11
  from gtts import gTTS
12
  import os
13
 
14
- # Supabase configuration
15
- SUPABASE_URL = "https://gptmdbhzblfybdnohqnh.supabase.co"
16
- SUPABASE_API_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImdwdG1kYmh6YmxmeWJkbm9ocW5oIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NDc0NjY1NDgsImV4cCI6MjA2MzA0MjU0OH0.CfWArts6Kd_x7Wj0a_nAyGJfrFt8F7Wdy_MdYDj9e7U"
17
- SUPABASE_TABLE = "translations"
18
-
19
  # Device configuration
20
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
21
 
 
 
 
 
22
  # Load translation models
23
  model_en_to_indic = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en-indic-1B", trust_remote_code=True).to(DEVICE)
24
  tokenizer_en_to_indic = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-en-indic-1B", trust_remote_code=True)
25
  model_indic_to_en = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-indic-en-1B", trust_remote_code=True).to(DEVICE)
26
  tokenizer_indic_to_en = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-indic-en-1B", trust_remote_code=True)
27
- ip = IndicProcessor(inference=True)
28
 
29
- # Whisper STT pipeline (keep as is)
30
  asr = pipeline("automatic-speech-recognition", model="openai/whisper-small")
31
 
32
 
33
- # Save to Supabase
34
  def save_to_supabase(input_text, output_text, direction):
35
  if not input_text.strip() or not output_text.strip():
36
  return "Nothing to save."
37
 
38
  table_name = "translations" if direction == "en_to_ks" else "ks_to_en_translations"
39
-
40
  payload = {
41
  "timestamp": datetime.utcnow().isoformat(),
42
  "input_text": input_text,
@@ -61,7 +54,7 @@ def save_to_supabase(input_text, output_text, direction):
61
  print("SAVE EXCEPTION:", e)
62
  return "❌ Save request error."
63
 
64
- # Fetch translation history
65
  def get_translation_history(direction="en_to_ks"):
66
  table_name = "translations" if direction == "en_to_ks" else "ks_to_en_translations"
67
 
@@ -86,7 +79,7 @@ def get_translation_history(direction="en_to_ks"):
86
  print("HISTORY FETCH ERROR:", e)
87
  return "Error loading history."
88
 
89
- # Translation function
90
  def translate(text, direction):
91
  if not text.strip():
92
  return "Please enter some text.", gr.update(), gr.update()
@@ -122,7 +115,7 @@ def translate(text, direction):
122
  print("Translation Error:", e)
123
  return "⚠️ Translation failed.", gr.update(), gr.update()
124
 
125
- # Transcribe English audio
126
  def transcribe_audio(audio_path):
127
  try:
128
  result = asr(audio_path)
@@ -131,7 +124,7 @@ def transcribe_audio(audio_path):
131
  print("STT Error:", e)
132
  return "⚠️ Transcription failed."
133
 
134
- # Synthesize English TTS using gTTS for ks_to_en direction
135
  def synthesize_tts(text, direction):
136
  if direction == "ks_to_en" and text.strip():
137
  try:
@@ -144,7 +137,7 @@ def synthesize_tts(text, direction):
144
  print("TTS Error:", e)
145
  return None
146
 
147
- # Direction switch
148
  def switch_direction(direction, input_text_val, output_text_val):
149
  new_direction = "ks_to_en" if direction == "en_to_ks" else "en_to_ks"
150
  input_label = "Kashmiri Text" if new_direction == "ks_to_en" else "English Text"
@@ -155,7 +148,23 @@ def switch_direction(direction, input_text_val, output_text_val):
155
  gr.update(value=input_text_val, label=output_label)
156
  )
157
 
158
- # Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  with gr.Blocks() as interface:
160
  gr.HTML("""
161
  <div style="display: flex; justify-content: space-between; align-items: center; padding: 10px;">
@@ -181,10 +190,11 @@ with gr.Blocks() as interface:
181
 
182
  with gr.Row():
183
  audio_input = gr.Audio(type="filepath", label="πŸŽ™οΈ Upload or record English audio")
184
- audio_output = gr.Audio(label="πŸ”Š English Output Audio", type="filepath", interactive=False)
 
185
  stt_translate_button = gr.Button("🎀 Transcribe & Translate")
186
 
187
- # Click events
188
  translate_button.click(
189
  fn=translate,
190
  inputs=[input_text, translation_direction],
@@ -208,18 +218,10 @@ with gr.Blocks() as interface:
208
  )
209
 
210
  stt_translate_button.click(
211
- fn=transcribe_audio,
212
- inputs=audio_input,
213
- outputs=input_text
214
- ).then(
215
- fn=translate,
216
- inputs=[input_text, translation_direction],
217
- outputs=[output_text, input_text, output_text]
218
- ).then(
219
- fn=synthesize_tts,
220
- inputs=[output_text, translation_direction],
221
- outputs=audio_output
222
  )
223
 
224
  if __name__ == "__main__":
225
- interface.queue().launch()
 
1
  import gradio as gr
 
 
2
  import torch
3
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
4
  from IndicTransToolkit.processor import IndicProcessor
 
5
  import requests
6
  from datetime import datetime
7
  import tempfile
8
  from gtts import gTTS
9
  import os
10
 
 
 
 
 
 
11
  # Device configuration
12
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
13
 
14
+ # Supabase configuration
15
+ SUPABASE_URL = "https://gptmdbhzblfybdnohqnh.supabase.co"
16
+ SUPABASE_API_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9..."
17
+
18
  # Load translation models
19
  model_en_to_indic = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en-indic-1B", trust_remote_code=True).to(DEVICE)
20
  tokenizer_en_to_indic = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-en-indic-1B", trust_remote_code=True)
21
  model_indic_to_en = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-indic-en-1B", trust_remote_code=True).to(DEVICE)
22
  tokenizer_indic_to_en = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-indic-en-1B", trust_remote_code=True)
 
23
 
24
+ ip = IndicProcessor(inference=True)
25
  asr = pipeline("automatic-speech-recognition", model="openai/whisper-small")
26
 
27
 
 
28
  def save_to_supabase(input_text, output_text, direction):
29
  if not input_text.strip() or not output_text.strip():
30
  return "Nothing to save."
31
 
32
  table_name = "translations" if direction == "en_to_ks" else "ks_to_en_translations"
 
33
  payload = {
34
  "timestamp": datetime.utcnow().isoformat(),
35
  "input_text": input_text,
 
54
  print("SAVE EXCEPTION:", e)
55
  return "❌ Save request error."
56
 
57
+
58
  def get_translation_history(direction="en_to_ks"):
59
  table_name = "translations" if direction == "en_to_ks" else "ks_to_en_translations"
60
 
 
79
  print("HISTORY FETCH ERROR:", e)
80
  return "Error loading history."
81
 
82
+
83
  def translate(text, direction):
84
  if not text.strip():
85
  return "Please enter some text.", gr.update(), gr.update()
 
115
  print("Translation Error:", e)
116
  return "⚠️ Translation failed.", gr.update(), gr.update()
117
 
118
+
119
  def transcribe_audio(audio_path):
120
  try:
121
  result = asr(audio_path)
 
124
  print("STT Error:", e)
125
  return "⚠️ Transcription failed."
126
 
127
+
128
  def synthesize_tts(text, direction):
129
  if direction == "ks_to_en" and text.strip():
130
  try:
 
137
  print("TTS Error:", e)
138
  return None
139
 
140
+
141
  def switch_direction(direction, input_text_val, output_text_val):
142
  new_direction = "ks_to_en" if direction == "en_to_ks" else "en_to_ks"
143
  input_label = "Kashmiri Text" if new_direction == "ks_to_en" else "English Text"
 
148
  gr.update(value=input_text_val, label=output_label)
149
  )
150
 
151
+
152
+ def handle_audio_translation(audio_path, direction):
153
+ if direction == "en_to_ks":
154
+ transcription = transcribe_audio(audio_path)
155
+ if transcription.startswith("⚠️"):
156
+ return transcription, "", "", None
157
+ translation, _, _ = translate(transcription, direction)
158
+ return transcription, translation, transcription, None
159
+ else:
160
+ # Assume audio_path is not used; rely on text in the input box
161
+ transcription = transcribe_audio(audio_path)
162
+ translation, _, _ = translate(transcription, direction)
163
+ tts_audio = synthesize_tts(translation, direction)
164
+ return transcription, translation, transcription, tts_audio
165
+
166
+
167
+ # === Gradio Interface ===
168
  with gr.Blocks() as interface:
169
  gr.HTML("""
170
  <div style="display: flex; justify-content: space-between; align-items: center; padding: 10px;">
 
190
 
191
  with gr.Row():
192
  audio_input = gr.Audio(type="filepath", label="πŸŽ™οΈ Upload or record English audio")
193
+ audio_output = gr.Audio(label="πŸ”Š English Output Audio")
194
+
195
  stt_translate_button = gr.Button("🎀 Transcribe & Translate")
196
 
197
+ # Events
198
  translate_button.click(
199
  fn=translate,
200
  inputs=[input_text, translation_direction],
 
218
  )
219
 
220
  stt_translate_button.click(
221
+ fn=handle_audio_translation,
222
+ inputs=[audio_input, translation_direction],
223
+ outputs=[input_text, output_text, input_text, audio_output]
 
 
 
 
 
 
 
 
224
  )
225
 
226
  if __name__ == "__main__":
227
+ interface.queue().launch(share=True)
requirements.txt CHANGED
@@ -5,3 +5,7 @@ gradio==5.32.0
5
  requests
6
  git+https://github.com/VarunGumma/IndicTransToolkit.git
7
  gTTS
 
 
 
 
 
5
  requests
6
  git+https://github.com/VarunGumma/IndicTransToolkit.git
7
  gTTS
8
+ pydub
9
+ ffmpeg-python
10
+ soundfile
11
+ accelerate