Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,33 +3,28 @@ import librosa
|
|
3 |
from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
|
4 |
from gtts import gTTS
|
5 |
import gradio as gr
|
6 |
-
import spaces
|
7 |
|
8 |
-
|
9 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
10 |
-
print(f"Using device: {device}")
|
11 |
|
12 |
# Function to safely load pipeline
|
13 |
def load_pipeline(model_name, **kwargs):
|
14 |
try:
|
15 |
-
return pipeline(model=model_name, device=
|
16 |
except Exception as e:
|
17 |
print(f"Error loading {model_name} pipeline: {e}")
|
18 |
return None
|
19 |
|
20 |
# Load Whisper model for speech recognition
|
21 |
-
@spaces.GPU
|
22 |
def load_whisper():
|
23 |
try:
|
24 |
processor = WhisperProcessor.from_pretrained("openai/whisper-small")
|
25 |
-
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
|
26 |
return processor, model
|
27 |
except Exception as e:
|
28 |
print(f"Error loading Whisper model: {e}")
|
29 |
return None, None
|
30 |
|
31 |
# Load sarvam-2b for text generation
|
32 |
-
@spaces.GPU
|
33 |
def load_sarvam():
|
34 |
return load_pipeline('sarvamai/sarvam-2b-v0.5')
|
35 |
|
@@ -43,7 +38,7 @@ def process_audio_input(audio):
|
|
43 |
|
44 |
try:
|
45 |
audio, sr = librosa.load(audio, sr=16000)
|
46 |
-
input_features = whisper_processor(audio, sampling_rate=sr, return_tensors="pt").input_features
|
47 |
predicted_ids = whisper_model.generate(input_features)
|
48 |
transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
49 |
return transcription
|
@@ -81,7 +76,6 @@ def detect_language(text):
|
|
81 |
return 'hi' # Default to Hindi for simplicity
|
82 |
return 'en' # Default to English if no Indic script is detected
|
83 |
|
84 |
-
@spaces.GPU
|
85 |
def indic_language_assistant(input_type, audio_input, text_input):
|
86 |
try:
|
87 |
if input_type == "audio" and audio_input is not None:
|
|
|
3 |
from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
|
4 |
from gtts import gTTS
|
5 |
import gradio as gr
|
|
|
6 |
|
7 |
+
print("Using CPU for all operations")
|
|
|
|
|
8 |
|
9 |
# Function to safely load pipeline
|
10 |
def load_pipeline(model_name, **kwargs):
|
11 |
try:
|
12 |
+
return pipeline(model=model_name, device="cpu", **kwargs)
|
13 |
except Exception as e:
|
14 |
print(f"Error loading {model_name} pipeline: {e}")
|
15 |
return None
|
16 |
|
17 |
# Load Whisper model for speech recognition
|
|
|
18 |
def load_whisper():
|
19 |
try:
|
20 |
processor = WhisperProcessor.from_pretrained("openai/whisper-small")
|
21 |
+
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
|
22 |
return processor, model
|
23 |
except Exception as e:
|
24 |
print(f"Error loading Whisper model: {e}")
|
25 |
return None, None
|
26 |
|
27 |
# Load sarvam-2b for text generation
|
|
|
28 |
def load_sarvam():
|
29 |
return load_pipeline('sarvamai/sarvam-2b-v0.5')
|
30 |
|
|
|
38 |
|
39 |
try:
|
40 |
audio, sr = librosa.load(audio, sr=16000)
|
41 |
+
input_features = whisper_processor(audio, sampling_rate=sr, return_tensors="pt").input_features
|
42 |
predicted_ids = whisper_model.generate(input_features)
|
43 |
transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
44 |
return transcription
|
|
|
76 |
return 'hi' # Default to Hindi for simplicity
|
77 |
return 'en' # Default to English if no Indic script is detected
|
78 |
|
|
|
79 |
def indic_language_assistant(input_type, audio_input, text_input):
|
80 |
try:
|
81 |
if input_type == "audio" and audio_input is not None:
|