Spaces:

sagar007
/

shuka_audio

Sleeping

App Files Files Community

sagar007 commited on Aug 23, 2024

Commit

6b8e248

verified ·

1 Parent(s): 649867e

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -10

app.py CHANGED Viewed

@@ -3,33 +3,28 @@ import librosa
 from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
 from gtts import gTTS
 import gradio as gr
-import spaces
-# Check for GPU availability
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"Using device: {device}")
 # Function to safely load pipeline
 def load_pipeline(model_name, **kwargs):
     try:
-        return pipeline(model=model_name, device=device, **kwargs)
     except Exception as e:
         print(f"Error loading {model_name} pipeline: {e}")
         return None
 # Load Whisper model for speech recognition
-@spaces.GPU
 def load_whisper():
     try:
         processor = WhisperProcessor.from_pretrained("openai/whisper-small")
-        model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small").to(device)
         return processor, model
     except Exception as e:
         print(f"Error loading Whisper model: {e}")
         return None, None
 # Load sarvam-2b for text generation
-@spaces.GPU
 def load_sarvam():
     return load_pipeline('sarvamai/sarvam-2b-v0.5')
@@ -43,7 +38,7 @@ def process_audio_input(audio):
     try:
         audio, sr = librosa.load(audio, sr=16000)
-        input_features = whisper_processor(audio, sampling_rate=sr, return_tensors="pt").input_features.to(device)
         predicted_ids = whisper_model.generate(input_features)
         transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
         return transcription
@@ -81,7 +76,6 @@ def detect_language(text):
             return 'hi'  # Default to Hindi for simplicity
     return 'en'  # Default to English if no Indic script is detected
-@spaces.GPU
 def indic_language_assistant(input_type, audio_input, text_input):
     try:
         if input_type == "audio" and audio_input is not None:

 from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
 from gtts import gTTS
 import gradio as gr
+print("Using CPU for all operations")
 # Function to safely load pipeline
 def load_pipeline(model_name, **kwargs):
     try:
+        return pipeline(model=model_name, device="cpu", **kwargs)
     except Exception as e:
         print(f"Error loading {model_name} pipeline: {e}")
         return None
 # Load Whisper model for speech recognition
 def load_whisper():
     try:
         processor = WhisperProcessor.from_pretrained("openai/whisper-small")
+        model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
         return processor, model
     except Exception as e:
         print(f"Error loading Whisper model: {e}")
         return None, None
 # Load sarvam-2b for text generation
 def load_sarvam():
     return load_pipeline('sarvamai/sarvam-2b-v0.5')
     try:
         audio, sr = librosa.load(audio, sr=16000)
+        input_features = whisper_processor(audio, sampling_rate=sr, return_tensors="pt").input_features
         predicted_ids = whisper_model.generate(input_features)
         transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
         return transcription
             return 'hi'  # Default to Hindi for simplicity
     return 'en'  # Default to English if no Indic script is detected
 def indic_language_assistant(input_type, audio_input, text_input):
     try:
         if input_type == "audio" and audio_input is not None: