Spaces:

sagar007
/

shuka_audio

Sleeping

App Files Files Community

sagar007 commited on Aug 23, 2024

Commit

416ddd3

verified ·

1 Parent(s): 68ce9a1

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -44

app.py CHANGED Viewed

@@ -1,61 +1,57 @@
 import librosa
 import torch
-from tqdm import tqdm
-import transformers
 # Check for GPU availability
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
 # Load the model pipeline
-pipe = transformers.pipeline(
-    model='sarvamai/shuka_v1',
-    trust_remote_code=True,
-    device=device,
-    torch_dtype=torch.float16 if device.type == 'cuda' else torch.float32
-)
-def process_audio_batched(audio_file, system_prompt, user_prompt, batch_size=4, segment_length=10):
-    # Load audio
-    audio, sr = librosa.load(audio_file, sr=16000)
-    # Calculate number of samples per segment
-    samples_per_segment = segment_length * sr
-    # Split audio into segments
-    segments = [audio[i:i+samples_per_segment] for i in range(0, len(audio), samples_per_segment)]
-    full_result = []
-    # Process segments in batches
-    for i in tqdm(range(0, len(segments), batch_size)):
-        batch = segments[i:i+batch_size]
         turns = [
             {'role': 'system', 'content': system_prompt},
             {'role': 'user', 'content': f'<|audio|>{user_prompt}'}
         ]
-        # Move batch to GPU if available
-        batch_gpu = [torch.tensor(seg, device=device) for seg in batch]
-        batch_results = pipe([{'audio': seg, 'turns': turns, 'sampling_rate': sr} for seg in batch_gpu], max_new_tokens=512)
-        full_result.extend([result[0]['generated_text'] for result in batch_results])
-        # Clear GPU memory
-        torch.cuda.empty_cache()
-    # Combine results
-    return ' '.join(full_result)
-# Example usage
-audio_file = "path/to/your/audio/file.wav"
-system_prompt = "Transcribe the audio accurately."
-user_prompt = "What is being said in this audio?"
-try:
-    full_result = process_audio_batched(audio_file, system_prompt, user_prompt)
-    print(full_result)
-except Exception as e:
-    print(f"An error occurred: {str(e)}")
-    # Additional error handling and logging can be added here

+import gradio as gr
 import librosa
 import torch
+from transformers import pipeline
 # Check for GPU availability
+device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
 # Load the model pipeline
+try:
+    pipe = pipeline(
+        model='sarvamai/shuka_v1',
+        trust_remote_code=True,
+        device=device,
+        torch_dtype=torch.float16 if device == 'cuda' else torch.float32
+    )
+    print("Pipeline loaded successfully")
+except Exception as e:
+    print(f"Error loading pipeline: {str(e)}")
+    pipe = None
+def process_audio(audio_file, system_prompt, user_prompt):
+    if pipe is None:
+        return "Error: Model pipeline not initialized."
+    try:
+        # Load audio
+        audio, sr = librosa.load(audio_file, sr=16000)
         turns = [
             {'role': 'system', 'content': system_prompt},
             {'role': 'user', 'content': f'<|audio|>{user_prompt}'}
         ]
+        # Process audio
+        result = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
+        return result[0]['generated_text']
+    except Exception as e:
+        return f"Error processing audio: {str(e)}"
+# Define Gradio interface
+iface = gr.Interface(
+    fn=process_audio,
+    inputs=[
+        gr.Audio(type="filepath", label="Upload Audio"),
+        gr.Textbox(label="System Prompt", default="Transcribe the audio accurately."),
+        gr.Textbox(label="User Prompt", default="What is being said in this audio?")
+    ],
+    outputs="text",
+    title="Audio Processing with Shuka_v1",
+    description="Upload an audio file and get the transcription or analysis based on your prompts."
+)
+# Launch the app
+iface.launch()