Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -13,9 +13,10 @@ print("Using GPU for operations when available")
|
|
13 |
def load_pipeline(model_name, **kwargs):
|
14 |
try:
|
15 |
device = 0 if torch.cuda.is_available() else "cpu"
|
|
|
16 |
return pipeline(model=model_name, device=device, **kwargs)
|
17 |
except Exception as e:
|
18 |
-
|
19 |
return None
|
20 |
|
21 |
# Load Whisper model for speech recognition within a GPU-decorated function
|
@@ -23,18 +24,31 @@ def load_pipeline(model_name, **kwargs):
|
|
23 |
def load_whisper():
|
24 |
try:
|
25 |
device = 0 if torch.cuda.is_available() else "cpu"
|
|
|
26 |
processor = WhisperProcessor.from_pretrained("openai/whisper-small")
|
27 |
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small").to(device)
|
28 |
return processor, model
|
29 |
except Exception as e:
|
30 |
-
|
31 |
return None, None
|
32 |
|
33 |
# Load sarvam-2b for text generation within a GPU-decorated function
|
34 |
@spaces.GPU
|
35 |
def load_sarvam():
|
|
|
36 |
return load_pipeline('sarvamai/sarvam-2b-v0.5')
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
# Process audio input within a GPU-decorated function
|
39 |
@spaces.GPU
|
40 |
def process_audio_input(audio, whisper_processor, whisper_model):
|
@@ -106,16 +120,17 @@ def detect_language(text):
|
|
106 |
@spaces.GPU
|
107 |
def indic_language_assistant(input_type, audio_input, text_input):
|
108 |
try:
|
109 |
-
# Load models within the GPU-decorated function
|
110 |
-
whisper_processor, whisper_model = load_whisper()
|
111 |
-
sarvam_pipe = load_sarvam()
|
112 |
-
|
113 |
if input_type == "audio" and audio_input is not None:
|
|
|
|
|
114 |
transcription = process_audio_input(audio_input, whisper_processor, whisper_model)
|
115 |
elif input_type == "text" and text_input:
|
116 |
transcription = text_input
|
117 |
else:
|
118 |
-
return "Please provide either audio or text input.", "
|
|
|
|
|
|
|
119 |
|
120 |
response = generate_response(transcription, sarvam_pipe)
|
121 |
lang = detect_language(response)
|
@@ -123,9 +138,8 @@ def indic_language_assistant(input_type, audio_input, text_input):
|
|
123 |
|
124 |
return transcription, response, audio_response
|
125 |
except Exception as e:
|
126 |
-
|
127 |
-
return
|
128 |
-
|
129 |
|
130 |
|
131 |
# Updated Custom CSS
|
|
|
13 |
def load_pipeline(model_name, **kwargs):
|
14 |
try:
|
15 |
device = 0 if torch.cuda.is_available() else "cpu"
|
16 |
+
logger.info(f"Loading {model_name} on device: {device}")
|
17 |
return pipeline(model=model_name, device=device, **kwargs)
|
18 |
except Exception as e:
|
19 |
+
logger.error(f"Error loading {model_name} pipeline: {e}")
|
20 |
return None
|
21 |
|
22 |
# Load Whisper model for speech recognition within a GPU-decorated function
|
|
|
24 |
def load_whisper():
|
25 |
try:
|
26 |
device = 0 if torch.cuda.is_available() else "cpu"
|
27 |
+
logger.info(f"Loading Whisper model on device: {device}")
|
28 |
processor = WhisperProcessor.from_pretrained("openai/whisper-small")
|
29 |
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small").to(device)
|
30 |
return processor, model
|
31 |
except Exception as e:
|
32 |
+
logger.error(f"Error loading Whisper model: {e}")
|
33 |
return None, None
|
34 |
|
35 |
# Load sarvam-2b for text generation within a GPU-decorated function
|
36 |
@spaces.GPU
|
37 |
def load_sarvam():
|
38 |
+
logger.info("Loading sarvam-2b model")
|
39 |
return load_pipeline('sarvamai/sarvam-2b-v0.5')
|
40 |
|
41 |
+
|
42 |
+
# Global variables for models
|
43 |
+
whisper_processor, whisper_model = load_whisper()
|
44 |
+
sarvam_pipe = load_sarvam()
|
45 |
+
|
46 |
+
# Check if models are loaded
|
47 |
+
if whisper_processor is None or whisper_model is None:
|
48 |
+
logger.error("Whisper model failed to load")
|
49 |
+
if sarvam_pipe is None:
|
50 |
+
logger.error("Sarvam model failed to load")
|
51 |
+
|
52 |
# Process audio input within a GPU-decorated function
|
53 |
@spaces.GPU
|
54 |
def process_audio_input(audio, whisper_processor, whisper_model):
|
|
|
120 |
@spaces.GPU
|
121 |
def indic_language_assistant(input_type, audio_input, text_input):
|
122 |
try:
|
|
|
|
|
|
|
|
|
123 |
if input_type == "audio" and audio_input is not None:
|
124 |
+
if whisper_processor is None or whisper_model is None:
|
125 |
+
return "Error: Speech recognition model is not available.", "", None
|
126 |
transcription = process_audio_input(audio_input, whisper_processor, whisper_model)
|
127 |
elif input_type == "text" and text_input:
|
128 |
transcription = text_input
|
129 |
else:
|
130 |
+
return "Please provide either audio or text input.", "", None
|
131 |
+
|
132 |
+
if sarvam_pipe is None:
|
133 |
+
return transcription, "Error: Text generation model is not available.", None
|
134 |
|
135 |
response = generate_response(transcription, sarvam_pipe)
|
136 |
lang = detect_language(response)
|
|
|
138 |
|
139 |
return transcription, response, audio_response
|
140 |
except Exception as e:
|
141 |
+
logger.error(f"An error occurred in indic_language_assistant: {str(e)}")
|
142 |
+
return str(e), "An error occurred while processing your request.", None
|
|
|
143 |
|
144 |
|
145 |
# Updated Custom CSS
|