BurhaanZargar commited on
Commit
8c9cc75
·
1 Parent(s): 7f95fc6

Used gTTS for TTS as previous TTS model didn't work

Browse files
Files changed (3) hide show
  1. app.py +12 -7
  2. postBuild +1 -2
  3. requirements.txt +2 -2
app.py CHANGED
@@ -5,9 +5,11 @@ import gradio as gr
5
  import requests
6
  from datetime import datetime
7
  import tempfile
 
 
8
 
9
  # Supabase configuration
10
- SUPABASE_URL = "https://gptmdbhzblfybdnohqnh.supabase.co"
11
  SUPABASE_API_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImdwdG1kYmh6YmxmeWJkbm9ocW5oIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NDc0NjY1NDgsImV4cCI6MjA2MzA0MjU0OH0.CfWArts6Kd_x7Wj0a_nAyGJfrFt8F7Wdy_MdYDj9e7U"
12
  SUPABASE_TABLE = "translations"
13
 
@@ -21,9 +23,9 @@ model_indic_to_en = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2
21
  tokenizer_indic_to_en = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-indic-en-1B", trust_remote_code=True)
22
  ip = IndicProcessor(inference=True)
23
 
24
- # Whisper STT and English TTS pipelines
25
  asr = pipeline("automatic-speech-recognition", model="openai/whisper-small")
26
- tts_en = pipeline("text-to-speech", model="espnet/kan-bayashi_ljspeech_vits")
27
 
28
  # Save to Supabase
29
  def save_to_supabase(input_text, output_text, direction):
@@ -126,12 +128,15 @@ def transcribe_audio(audio_path):
126
  print("STT Error:", e)
127
  return "⚠️ Transcription failed."
128
 
129
- # Synthesize English audio if direction is ks_to_en
130
  def synthesize_tts(text, direction):
131
- if direction == "ks_to_en":
132
  try:
133
- result = tts_en(text)
134
- return (result["sampling_rate"], result["audio"])
 
 
 
135
  except Exception as e:
136
  print("TTS Error:", e)
137
  return None
 
5
  import requests
6
  from datetime import datetime
7
  import tempfile
8
+ from gtts import gTTS
9
+ import os
10
 
11
  # Supabase configuration
12
+ SUPABASE_URL = "https://gptmdbhzblfybdnohqnh.supabase.co"
13
  SUPABASE_API_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImdwdG1kYmh6YmxmeWJkbm9ocW5oIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NDc0NjY1NDgsImV4cCI6MjA2MzA0MjU0OH0.CfWArts6Kd_x7Wj0a_nAyGJfrFt8F7Wdy_MdYDj9e7U"
14
  SUPABASE_TABLE = "translations"
15
 
 
23
  tokenizer_indic_to_en = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-indic-en-1B", trust_remote_code=True)
24
  ip = IndicProcessor(inference=True)
25
 
26
+ # Whisper STT pipeline (keep as is)
27
  asr = pipeline("automatic-speech-recognition", model="openai/whisper-small")
28
+
29
 
30
  # Save to Supabase
31
  def save_to_supabase(input_text, output_text, direction):
 
128
  print("STT Error:", e)
129
  return "⚠️ Transcription failed."
130
 
131
+ # Synthesize English TTS using gTTS for ks_to_en direction
132
  def synthesize_tts(text, direction):
133
+ if direction == "ks_to_en" and text.strip():
134
  try:
135
+ tts = gTTS(text=text, lang="en")
136
+ tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
137
+ tts.save(tmp_file.name)
138
+ tmp_file.close()
139
+ return tmp_file.name
140
  except Exception as e:
141
  print("TTS Error:", e)
142
  return None
postBuild CHANGED
@@ -3,5 +3,4 @@ AutoModelForSeq2SeqLM.from_pretrained('ai4bharat/indictrans2-en-indic-1B'); \
3
  AutoTokenizer.from_pretrained('ai4bharat/indictrans2-en-indic-1B'); \
4
  AutoModelForSeq2SeqLM.from_pretrained('ai4bharat/indictrans2-indic-en-1B'); \
5
  AutoTokenizer.from_pretrained('ai4bharat/indictrans2-indic-en-1B'); \
6
- pipeline('automatic-speech-recognition', model='openai/whisper-small'); \
7
- pipeline('text-to-speech', model='espnet/kan-bayashi_ljspeech_vits')"
 
3
  AutoTokenizer.from_pretrained('ai4bharat/indictrans2-en-indic-1B'); \
4
  AutoModelForSeq2SeqLM.from_pretrained('ai4bharat/indictrans2-indic-en-1B'); \
5
  AutoTokenizer.from_pretrained('ai4bharat/indictrans2-indic-en-1B'); \
6
+ pipeline('automatic-speech-recognition', model='openai/whisper-small')"
 
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
  torch>=1.12
2
  transformers>=4.30.0
3
- sentencepiece # Required for tokenizer in IndicTrans2
4
- torchaudio # Required by Whisper and ESPnet TTS
5
  gradio
6
  requests
7
  git+https://github.com/VarunGumma/IndicTransToolkit.git
 
 
1
  torch>=1.12
2
  transformers>=4.30.0
3
+ sentencepiece
 
4
  gradio
5
  requests
6
  git+https://github.com/VarunGumma/IndicTransToolkit.git
7
+ gTTS