Enrique Cardoza commited on
Commit
6ec981e
Β·
1 Parent(s): c602a67

feat(app): implement audio/video transcription with Groq API

Browse files

- Replace letter counter with audio/video transcription functionality
- Implement two-column layout with upload controls and transcript display
- Add file validation for supported audio/video formats with 25MB size limit
- Integrate with Groq's Whisper-large-v3-turbo API for transcription
- Add secure API key input field with password protection
- Include helpful information and links for user guidance
- Implement comprehensive error handling with user-friendly messages
- Add detailed docstrings with MCP integration documentation
- Document input parameters with constraints and requirements
- Include usage examples to improve user understanding
- Format docstrings to comply with MCP protocol requirements
- Ensure proper tool exposure with mcp_server=True parameter

Files changed (1) hide show
  1. app.py +139 -0
app.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from groq import Groq
4
+ import tempfile
5
+
6
+ def validate_file(file):
7
+ """Validate uploaded file type and size."""
8
+ if file is None:
9
+ return False, "No file uploaded"
10
+
11
+ # Check file size (25MB limit)
12
+ file_size_mb = os.path.getsize(file.name) / (1024 * 1024)
13
+ if file_size_mb > 25:
14
+ return False, f"File size ({file_size_mb:.1f}MB) exceeds 25MB limit"
15
+
16
+ # Check file extension
17
+ valid_extensions = ['.mp3', '.mp4', '.mpeg', '.mpga', '.m4a', '.wav', '.webm', '.flac', '.ogg', '.aac']
18
+ file_extension = os.path.splitext(file.name)[1].lower()
19
+
20
+ if file_extension not in valid_extensions:
21
+ return False, f"Invalid file type. Supported formats: {', '.join(valid_extensions)}"
22
+
23
+ return True, "File is valid"
24
+
25
+ def transcribe_audio(audio_file, api_key):
26
+ """Transcribe audio/video files into text using Groq's Whisper model.
27
+
28
+ This tool converts spoken content from audio and video files into written text.
29
+ It supports multiple audio formats and handles files up to 25MB in size.
30
+
31
+ Parameters:
32
+ audio_file: An audio or video file to transcribe.
33
+ Supported formats: MP3, MP4, MPEG, MPGA, M4A, WAV, WebM, FLAC, OGG, AAC.
34
+ Maximum size: 25MB.
35
+ api_key: Your Groq API key, required for authentication.
36
+ You can obtain this from https://console.groq.com/
37
+
38
+ Returns:
39
+ A text transcript of the spoken content in the audio file.
40
+
41
+ Example:
42
+ Upload a podcast episode to get a complete text transcript.
43
+ """
44
+ try:
45
+ # Validate inputs
46
+ if not api_key:
47
+ return "Error: Please provide your Groq API key"
48
+
49
+ if audio_file is None:
50
+ return "Error: Please upload an audio or video file"
51
+
52
+ # Validate file
53
+ is_valid, message = validate_file(audio_file)
54
+ if not is_valid:
55
+ return f"Error: {message}"
56
+
57
+ # Initialize Groq client
58
+ client = Groq(api_key=api_key)
59
+
60
+ # Read the audio file
61
+ with open(audio_file.name, "rb") as file:
62
+ # Create transcription
63
+ transcription = client.audio.transcriptions.create(
64
+ file=(os.path.basename(audio_file.name), file.read()),
65
+ model="whisper-large-v3-turbo"
66
+ )
67
+
68
+ return transcription.text
69
+
70
+ except Exception as e:
71
+ return f"Error: {str(e)}"
72
+
73
+ # Create the Gradio interface with custom layout
74
+ with gr.Blocks(title="Audio/Video Transcription with Groq", theme=gr.themes.Soft()) as demo:
75
+ gr.Markdown("# 🎡 Audio/Video Transcription with Groq Whisper")
76
+ gr.Markdown("Upload an audio or video file and get an AI-generated transcript using Groq's Whisper model.")
77
+
78
+ with gr.Row():
79
+ # Left column - Input controls
80
+ with gr.Column(scale=1):
81
+ gr.Markdown("### πŸ“€ Upload & Settings")
82
+
83
+ audio_input = gr.File(
84
+ label="Upload Audio/Video File",
85
+ file_types=[".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm", ".flac", ".ogg", ".aac"],
86
+ file_count="single"
87
+ )
88
+
89
+ api_key_input = gr.Textbox(
90
+ label="Groq API Key",
91
+ placeholder="Enter your Groq API key here...",
92
+ type="password",
93
+ lines=1
94
+ )
95
+
96
+ transcribe_btn = gr.Button(
97
+ "🎯 Transcribe Audio",
98
+ variant="primary",
99
+ size="lg"
100
+ )
101
+
102
+ gr.Markdown("### ℹ️ File Requirements")
103
+ gr.Markdown("""
104
+ - **Max file size**: 25MB
105
+ - **Supported formats**: MP3, MP4, MPEG, MPGA, M4A, WAV, WebM, FLAC, OGG, AAC
106
+ - **Get API key**: [Groq Console](https://console.groq.com/)
107
+ """)
108
+
109
+ # Right column - Output
110
+ with gr.Column(scale=1):
111
+ gr.Markdown("### πŸ“ Transcript")
112
+
113
+ transcript_output = gr.Textbox(
114
+ label="Generated Transcript",
115
+ placeholder="Your transcript will appear here...",
116
+ lines=20,
117
+ max_lines=30,
118
+ show_copy_button=True,
119
+ interactive=False
120
+ )
121
+
122
+ # Connect the button to the transcription function
123
+ transcribe_btn.click(
124
+ fn=transcribe_audio,
125
+ inputs=[audio_input, api_key_input],
126
+ outputs=transcript_output,
127
+ show_progress=True
128
+ )
129
+
130
+ # Add examples section
131
+ gr.Markdown("### πŸ”— Useful Links")
132
+ gr.Markdown("""
133
+ - [Get your Groq API key](https://console.groq.com/)
134
+ - [Groq Documentation](https://console.groq.com/docs)
135
+ - [Supported audio formats](https://platform.openai.com/docs/guides/speech-to-text)
136
+ """)
137
+
138
+ if __name__ == "__main__":
139
+ demo.launch(mcp_server=True)