Spaces:

iamomtiwari
/

ATTT

Running

App Files Files Community

iamomtiwari commited on May 15

Commit

09b115d

verified ·

1 Parent(s): 7dae75b

Create app.py

Browse files

Files changed (1) hide show

app.py +81 -0

app.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import gradio as gr
+import numpy as np
+import librosa
+from transformers import pipeline
+import tempfile
+from functools import lru_cache
+# Cache the model to avoid reloading on every interaction
+@lru_cache(maxsize=1)
+def load_model():
+    return pipeline(
+        model='fixie-ai/ultravox-v0_5-llama-3_2-1b',
+        trust_remote_code=True,
+        device_map="auto"  # Automatically uses GPU if available
+    )
+def process_audio(audio_file, user_message):
+    try:
+        # Load audio (supports file upload or microphone input)
+        if isinstance(audio_file, (str, tempfile._TemporaryFileWrapper)):
+            audio_path = audio_file.name if hasattr(audio_file, 'name') else audio_file
+            audio, sr = librosa.load(audio_path, sr=16000)
+        else:  # Handle direct numpy array from microphone
+            sr, audio = audio_file
+        # Initialize conversation
+        turns = [
+            {
+                "role": "system",
+                "content": "You are a friendly and helpful AI assistant. Respond conversationally to the user's audio input."
+            },
+            {
+                "role": "user",
+                "content": user_message if user_message else "Describe what you heard in the audio."
+            }
+        ]
+        # Get model prediction
+        pipe = load_model()
+        result = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=100)
+        return result[-1]["content"]
+    except Exception as e:
+        return f"Error processing audio: {str(e)}"
+# Gradio UI
+with gr.Blocks(title="UltraVox Audio Assistant") as demo:
+    gr.Markdown("## 🎤 UltraVox Audio Assistant")
+    gr.Markdown("Upload an audio file or speak via microphone, then ask questions about it.")
+    with gr.Row():
+        audio_input = gr.Audio(
+            sources=["upload", "microphone"],
+            type="filepath",
+            label="Input Audio"
+        )
+        text_input = gr.Textbox(
+            label="Your Question (Optional)",
+            placeholder="Ask me about the audio..."
+        )
+    submit_btn = gr.Button("Process")
+    output = gr.Textbox(label="AI Response", interactive=False)
+    submit_btn.click(
+        fn=process_audio,
+        inputs=[audio_input, text_input],
+        outputs=output
+    )
+    gr.Examples(
+        examples=[
+            ["examples/weather_report.wav", "What's the weather forecast?"],
+            ["examples/meeting_notes.mp3", "Summarize the key points"]
+        ],
+        inputs=[audio_input, text_input]
+    )
+if __name__ == "__main__":
+    demo.launch()