Spaces:

sagar007
/

shuka_audio

Sleeping

sagar007 commited on Aug 23, 2024

Commit

68ce9a1

verified ·

1 Parent(s): 80d0076

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,15 +3,19 @@ import torch
 from tqdm import tqdm
 import transformers
 # Load the model pipeline
 pipe = transformers.pipeline(
     model='sarvamai/shuka_v1',
     trust_remote_code=True,
-    device='cpu',  # Force CPU usage
-    torch_dtype=torch.float32  # Use float32 instead of bfloat16
 )
-def process_audio_batched(audio_file, system_prompt, user_prompt, batch_size=2, segment_length=5):  # Reduced batch_size and segment_length
     # Load audio
     audio, sr = librosa.load(audio_file, sr=16000)
@@ -32,12 +36,14 @@ def process_audio_batched(audio_file, system_prompt, user_prompt, batch_size=2,
             {'role': 'user', 'content': f'<|audio|>{user_prompt}'}
         ]
-        batch_results = pipe([{'audio': seg, 'turns': turns, 'sampling_rate': sr} for seg in batch], max_new_tokens=512)
         full_result.extend([result[0]['generated_text'] for result in batch_results])
-        # Clear GPU memory if using GPU
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
     # Combine results
     return ' '.join(full_result)

 from tqdm import tqdm
 import transformers
+# Check for GPU availability
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
 # Load the model pipeline
 pipe = transformers.pipeline(
     model='sarvamai/shuka_v1',
     trust_remote_code=True,
+    device=device,
+    torch_dtype=torch.float16 if device.type == 'cuda' else torch.float32
 )
+def process_audio_batched(audio_file, system_prompt, user_prompt, batch_size=4, segment_length=10):
     # Load audio
     audio, sr = librosa.load(audio_file, sr=16000)
             {'role': 'user', 'content': f'<|audio|>{user_prompt}'}
         ]
+        # Move batch to GPU if available
+        batch_gpu = [torch.tensor(seg, device=device) for seg in batch]
+        batch_results = pipe([{'audio': seg, 'turns': turns, 'sampling_rate': sr} for seg in batch_gpu], max_new_tokens=512)
         full_result.extend([result[0]['generated_text'] for result in batch_results])
+        # Clear GPU memory
+        torch.cuda.empty_cache()
     # Combine results
     return ' '.join(full_result)