|
import gradio as gr |
|
import os |
|
from groq import Groq |
|
import tempfile |
|
|
|
def validate_file(file): |
|
"""Validate uploaded file type and size.""" |
|
if file is None: |
|
return False, "No file uploaded" |
|
|
|
|
|
file_size_mb = os.path.getsize(file.name) / (1024 * 1024) |
|
if file_size_mb > 25: |
|
return False, f"File size ({file_size_mb:.1f}MB) exceeds 25MB limit" |
|
|
|
|
|
valid_extensions = ['.mp3', '.mp4', '.mpeg', '.mpga', '.m4a', '.wav', '.webm', '.flac', '.ogg', '.aac'] |
|
file_extension = os.path.splitext(file.name)[1].lower() |
|
|
|
if file_extension not in valid_extensions: |
|
return False, f"Invalid file type. Supported formats: {', '.join(valid_extensions)}" |
|
|
|
return True, "File is valid" |
|
|
|
def transcribe_audio(audio_file, api_key): |
|
"""Transcribe audio/video files into text using Groq's Whisper model. |
|
|
|
This tool converts spoken content from audio and video files into written text. |
|
It supports multiple audio formats and handles files up to 25MB in size. |
|
|
|
Parameters: |
|
audio_file: An audio or video file to transcribe. |
|
Supported formats: MP3, MP4, MPEG, MPGA, M4A, WAV, WebM, FLAC, OGG, AAC. |
|
Maximum size: 25MB. |
|
api_key: Your Groq API key, required for authentication. |
|
You can obtain this from https://console.groq.com/ |
|
|
|
Returns: |
|
A text transcript of the spoken content in the audio file. |
|
|
|
Example: |
|
Upload a podcast episode to get a complete text transcript. |
|
""" |
|
try: |
|
|
|
if not api_key: |
|
return "Error: Please provide your Groq API key" |
|
|
|
if audio_file is None: |
|
return "Error: Please upload an audio or video file" |
|
|
|
|
|
is_valid, message = validate_file(audio_file) |
|
if not is_valid: |
|
return f"Error: {message}" |
|
|
|
|
|
client = Groq(api_key=api_key) |
|
|
|
|
|
with open(audio_file.name, "rb") as file: |
|
|
|
transcription = client.audio.transcriptions.create( |
|
file=(os.path.basename(audio_file.name), file.read()), |
|
model="whisper-large-v3-turbo" |
|
) |
|
|
|
return transcription.text |
|
|
|
except Exception as e: |
|
return f"Error: {str(e)}" |
|
|
|
|
|
with gr.Blocks(title="Audio/Video Transcription with Groq", theme=gr.themes.Soft()) as demo: |
|
gr.Markdown("# π΅ Audio/Video Transcription with Groq Whisper") |
|
gr.Markdown("Upload an audio or video file and get an AI-generated transcript using Groq's Whisper model.") |
|
|
|
with gr.Row(): |
|
|
|
with gr.Column(scale=1): |
|
gr.Markdown("### π€ Upload & Settings") |
|
|
|
audio_input = gr.File( |
|
label="Upload Audio/Video File", |
|
file_types=[".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm", ".flac", ".ogg", ".aac"], |
|
file_count="single" |
|
) |
|
|
|
api_key_input = gr.Textbox( |
|
label="Groq API Key", |
|
placeholder="Enter your Groq API key here...", |
|
type="password", |
|
lines=1 |
|
) |
|
|
|
transcribe_btn = gr.Button( |
|
"π― Transcribe Audio", |
|
variant="primary", |
|
size="lg" |
|
) |
|
|
|
gr.Markdown("### βΉοΈ File Requirements") |
|
gr.Markdown(""" |
|
- **Max file size**: 25MB |
|
- **Supported formats**: MP3, MP4, MPEG, MPGA, M4A, WAV, WebM, FLAC, OGG, AAC |
|
- **Get API key**: [Groq Console](https://console.groq.com/) |
|
""") |
|
|
|
|
|
with gr.Column(scale=1): |
|
gr.Markdown("### π Transcript") |
|
|
|
transcript_output = gr.Textbox( |
|
label="Generated Transcript", |
|
placeholder="Your transcript will appear here...", |
|
lines=20, |
|
max_lines=30, |
|
show_copy_button=True, |
|
interactive=False |
|
) |
|
|
|
|
|
transcribe_btn.click( |
|
fn=transcribe_audio, |
|
inputs=[audio_input, api_key_input], |
|
outputs=transcript_output, |
|
show_progress=True |
|
) |
|
|
|
|
|
gr.Markdown("### π Useful Links") |
|
gr.Markdown(""" |
|
- [Get your Groq API key](https://console.groq.com/) |
|
- [Groq Documentation](https://console.groq.com/docs) |
|
- [Supported audio formats](https://platform.openai.com/docs/guides/speech-to-text) |
|
""") |
|
|
|
if __name__ == "__main__": |
|
demo.launch(mcp_server=True) |