File size: 16,535 Bytes
6ec981e
 
 
 
d49e517
 
473f76d
6ec981e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d49e517
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
473f76d
 
 
 
 
 
 
 
 
 
 
 
 
6ec981e
 
 
 
 
 
 
 
 
 
 
473f76d
6ec981e
 
 
 
 
 
 
 
473f76d
 
 
 
 
 
 
 
 
 
 
 
 
80bdfa1
 
 
473f76d
 
 
 
 
 
80bdfa1
 
473f76d
6ec981e
 
 
 
 
 
 
 
 
 
80bdfa1
6ec981e
 
 
 
 
 
 
 
 
 
 
 
 
 
473f76d
d49e517
6ec981e
d49e517
 
 
 
 
 
 
 
 
473f76d
d49e517
 
 
 
 
 
 
 
473f76d
 
 
 
 
 
 
 
 
 
 
 
 
d49e517
 
 
473f76d
 
 
 
 
 
d49e517
 
473f76d
d49e517
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80bdfa1
d49e517
 
6ec981e
d49e517
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ec981e
d49e517
6ec981e
d49e517
 
 
 
 
 
 
 
 
 
473f76d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d49e517
 
 
 
6ec981e
d49e517
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
473f76d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d49e517
473f76d
d49e517
6ec981e
 
d49e517
 
 
 
 
 
 
 
6ec981e
 
 
473f76d
 
 
 
 
 
6ec981e
 
 
 
 
 
 
 
 
473f76d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
import gradio as gr
import os
from groq import Groq
import tempfile
import requests
import urllib.parse
import json

def validate_file(file):
    """Validate uploaded file type and size."""
    if file is None:
        return False, "No file uploaded"
    
    # Check file size (25MB limit)
    file_size_mb = os.path.getsize(file.name) / (1024 * 1024)
    if file_size_mb > 25:
        return False, f"File size ({file_size_mb:.1f}MB) exceeds 25MB limit"
    
    # Check file extension
    valid_extensions = ['.mp3', '.mp4', '.mpeg', '.mpga', '.m4a', '.wav', '.webm', '.flac', '.ogg', '.aac']
    file_extension = os.path.splitext(file.name)[1].lower()
    
    if file_extension not in valid_extensions:
        return False, f"Invalid file type. Supported formats: {', '.join(valid_extensions)}"
    
    return True, "File is valid"

def validate_url_file(url):
    """Validate file from URL based on extension and size."""
    if not url or url.strip() == "":
        return False, "No URL provided"
    
    try:
        # Check if the URL is valid
        parsed_url = urllib.parse.urlparse(url)
        if not all([parsed_url.scheme, parsed_url.netloc]):
            return False, "Invalid URL format"
        
        if parsed_url.scheme not in ['http', 'https']:
            return False, "URL must start with http:// or https://"
            
        # Check file extension from URL
        valid_extensions = ['.mp3', '.mp4', '.mpeg', '.mpga', '.m4a', '.wav', '.webm', '.flac', '.ogg', '.aac']
        file_extension = os.path.splitext(parsed_url.path)[1].lower()
        
        if file_extension not in valid_extensions:
            return False, f"Invalid file type in URL. Supported formats: {', '.join(valid_extensions)}"
            
        # Check file size with a HEAD request
        response = requests.head(url, allow_redirects=True, timeout=10)
        if response.status_code != 200:
            return False, f"Could not access URL (HTTP {response.status_code})"
            
        content_length = response.headers.get('content-length')
        if content_length:
            file_size_mb = int(content_length) / (1024 * 1024)
            if file_size_mb > 25:
                return False, f"File size ({file_size_mb:.1f}MB) exceeds 25MB limit"
                
        return True, "File is valid"
        
    except requests.exceptions.RequestException as e:
        return False, f"Error accessing URL: {str(e)}"
    except Exception as e:
        return False, f"Error validating URL: {str(e)}"

def get_request_headers(request: gr.Request):
    """Extract headers from the request object."""
    if request is None:
        return {"message": "No request object available"}
    
    try:
        # Extract all headers from the request
        headers = {key: value for key, value in request.headers.items()}
        return headers
    except Exception as e:
        return {"error": f"Error extracting headers: {str(e)}"}

def transcribe_audio(audio_file, api_key, request: gr.Request = None):
    """Transcribe audio/video files into text using Groq's Whisper model.
    
    This tool converts spoken content from audio and video files into written text.
    It supports multiple audio formats and handles files up to 25MB in size.
    
    Parameters:
        audio_file: An audio or video file to transcribe. 
                   Supported formats: MP3, MP4, MPEG, MPGA, M4A, WAV, WebM, FLAC, OGG, AAC.
                   Maximum size: 25MB.
        api_key: Your Groq API key, required for authentication.
                You can obtain this from https://console.groq.com/
        request: The Gradio request object containing headers.
    
    Returns:
        A text transcript of the spoken content in the audio file.
        
    Example:
        Upload a podcast episode to get a complete text transcript.
    """
    try:
        # Log request headers if available
        headers = {}
        if request is not None:
            headers = {key: value for key, value in request.headers.items()}
            print(f"Request Headers: {json.dumps(headers, indent=2)}")
            
            # Check for Authorization header
            auth_header = request.headers.get('Authorization')
            if auth_header and auth_header.startswith('Bearer '):
                # You could use the token from the header here
                token = auth_header[7:]  # Remove 'Bearer ' prefix
                print(f"Authorization token received: {token[:10]}...")
        
        # First check for environment variable, then use provided API key
        actual_api_key = os.environ.get("GROQ_API_KEY", api_key)
        
        # Check if API key is in Authorization header
        if not actual_api_key and request is not None:
            auth_header = request.headers.get('Authorization')
            if auth_header and auth_header.startswith('Bearer '):
                actual_api_key = auth_header[7:]  # Remove 'Bearer ' prefix
        
        # Validate API key
        if not actual_api_key:
            return "Error: Please provide your Groq API key or set the GROQ_API_KEY environment variable or include in Authorization header"
        
        if audio_file is None:
            return "Error: Please upload an audio or video file"
        
        # Validate file
        is_valid, message = validate_file(audio_file)
        if not is_valid:
            return f"Error: {message}"
        
        # Initialize Groq client
        client = Groq(api_key=actual_api_key)
        
        # Read the audio file
        with open(audio_file.name, "rb") as file:
            # Create transcription
            transcription = client.audio.transcriptions.create(
                file=(os.path.basename(audio_file.name), file.read()),
                model="whisper-large-v3-turbo"
            )
        
        return transcription.text
        
    except Exception as e:
        return f"Error: {str(e)}"

def transcribe_audio_from_url(audio_url, api_key, request: gr.Request = None):
    """Transcribe audio/video files from a URL into text using Groq's Whisper model.
    
    This tool converts spoken content from audio and video files into written text.
    It supports multiple audio formats and handles files up to 25MB in size.
    
    Parameters:
        audio_url: URL to an audio or video file to transcribe (http or https). 
                  Supported formats: MP3, MP4, MPEG, MPGA, M4A, WAV, WebM, FLAC, OGG, AAC.
                  Maximum size: 25MB.
        api_key: Your Groq API key, required for authentication.
                You can obtain this from https://console.groq.com/
        request: The Gradio request object containing headers.
    
    Returns:
        A text transcript of the spoken content in the audio file.
        
    Example:
        Provide a URL to a podcast episode to get a complete text transcript.
    """
    try:
        # Log request headers if available
        headers = {}
        if request is not None:
            headers = {key: value for key, value in request.headers.items()}
            print(f"Request Headers: {json.dumps(headers, indent=2)}")
            
            # Check for Authorization header
            auth_header = request.headers.get('Authorization')
            if auth_header and auth_header.startswith('Bearer '):
                # You could use the token from the header here
                token = auth_header[7:]  # Remove 'Bearer ' prefix
                print(f"Authorization token received: {token[:10]}...")
        
        # First check for environment variable, then use provided API key
        actual_api_key = os.environ.get("GROQ_API_KEY", api_key)
        
        # Check if API key is in Authorization header
        if not actual_api_key and request is not None:
            auth_header = request.headers.get('Authorization')
            if auth_header and auth_header.startswith('Bearer '):
                actual_api_key = auth_header[7:]  # Remove 'Bearer ' prefix
        
        # Validate API key
        if not actual_api_key:
            return "Error: Please provide your Groq API key or set the GROQ_API_KEY environment variable or include in Authorization header"
        
        if not audio_url or audio_url.strip() == "":
            return "Error: Please provide a URL to an audio or video file"
        
        # Validate file from URL
        is_valid, message = validate_url_file(audio_url)
        if not is_valid:
            return f"Error: {message}"
        
        # Initialize Groq client
        client = Groq(api_key=actual_api_key)
        
        # Download the file to a temporary location
        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
            response = requests.get(audio_url, stream=True, timeout=30)
            response.raise_for_status()
            
            for chunk in response.iter_content(chunk_size=8192):
                temp_file.write(chunk)
            
            temp_file_path = temp_file.name
        
        try:
            # Read the downloaded file
            with open(temp_file_path, "rb") as file:
                # Get the original filename from the URL
                filename = os.path.basename(urllib.parse.urlparse(audio_url).path)
                if not filename:
                    filename = "audio_from_url"
                
                # Create transcription
                transcription = client.audio.transcriptions.create(
                    file=(filename, file.read()),
                    model="whisper-large-v3-turbo"
                )
            
            return transcription.text
            
        finally:
            # Clean up the temporary file
            if os.path.exists(temp_file_path):
                os.unlink(temp_file_path)
        
    except requests.exceptions.RequestException as e:
        return f"Error downloading file: {str(e)}"
    except Exception as e:
        return f"Error: {str(e)}"

# Create a dedicated endpoint for viewing request headers
def view_headers(request: gr.Request = None):
    """View all request headers.
    
    This function displays all the headers sent in the HTTP request.
    
    Parameters:
        request: The Gradio request object.
        
    Returns:
        A formatted string containing all request headers.
    """
    if request is None:
        return "No request object available"
    
    try:
        # Extract all headers
        headers = {key: value for key, value in request.headers.items()}
        return json.dumps(headers, indent=2)
    except Exception as e:
        return f"Error extracting headers: {str(e)}"

# Create the Gradio interface with custom layout
with gr.Blocks(title="Audio/Video Transcription with Groq", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🎡 Audio/Video Transcription with Groq Whisper")
    gr.Markdown("Upload an audio/video file or provide a URL and get an AI-generated transcript using Groq's Whisper model.")
    
    # API Key input - shared between tabs
    api_key_note = "API key will be used from environment variable if set" if os.environ.get("GROQ_API_KEY") else ""
    api_key_input = gr.Textbox(
        label="Groq API Key",
        placeholder="Enter your Groq API key here or set GROQ_API_KEY environment variable",
        type="password",
        lines=1,
        info=api_key_note
    )
    
    with gr.Tabs():
        # Tab 1: File Upload
        with gr.TabItem("Upload File"):
            with gr.Row():
                # Left column - Input controls
                with gr.Column(scale=1):
                    gr.Markdown("### πŸ“€ Upload Audio/Video")
                    
                    audio_input = gr.File(
                        label="Upload Audio/Video File",
                        file_types=[".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm", ".flac", ".ogg", ".aac"],
                        file_count="single"
                    )
                    
                    upload_transcribe_btn = gr.Button(
                        "🎯 Transcribe Uploaded File",
                        variant="primary",
                        size="lg"
                    )
                    
                    gr.Markdown("### ℹ️ File Requirements")
                    gr.Markdown("""
                    - **Max file size**: 25MB
                    - **Supported formats**: MP3, MP4, MPEG, MPGA, M4A, WAV, WebM, FLAC, OGG, AAC
                    - **Get API key**: [Groq Console](https://console.groq.com/)
                    """)
                
                # Right column - Output
                with gr.Column(scale=1):
                    gr.Markdown("### πŸ“ Transcript")
                    
                    upload_transcript_output = gr.Textbox(
                        label="Generated Transcript",
                        placeholder="Your transcript will appear here...",
                        lines=20,
                        max_lines=30,
                        show_copy_button=True,
                        interactive=False
                    )
        
        # Tab 2: URL Input
        with gr.TabItem("Audio URL"):
            with gr.Row():
                # Left column - Input controls
                with gr.Column(scale=1):
                    gr.Markdown("### πŸ”— Audio/Video URL")
                    
                    url_input = gr.Textbox(
                        label="URL to Audio/Video File",
                        placeholder="Enter the http/https URL to an audio or video file",
                        lines=2
                    )
                    
                    url_transcribe_btn = gr.Button(
                        "🎯 Transcribe from URL",
                        variant="primary",
                        size="lg"
                    )
                    
                    gr.Markdown("### ℹ️ URL Requirements")
                    gr.Markdown("""
                    - **URL format**: Must start with http:// or https://
                    - **Max file size**: 25MB
                    - **Supported formats**: MP3, MP4, MPEG, MPGA, M4A, WAV, WebM, FLAC, OGG, AAC
                    - **Direct link**: URL must point directly to the audio/video file
                    """)
                
                # Right column - Output
                with gr.Column(scale=1):
                    gr.Markdown("### πŸ“ Transcript")
                    
                    url_transcript_output = gr.Textbox(
                        label="Generated Transcript",
                        placeholder="Your transcript will appear here...",
                        lines=20,
                        max_lines=30,
                        show_copy_button=True,
                        interactive=False
                    )
        
        # Tab 3: Request Headers
        with gr.TabItem("Request Headers"):
            with gr.Row():
                with gr.Column():
                    gr.Markdown("### πŸ” View Request Headers")
                    gr.Markdown("Click the button below to view all headers sent in the current request.")
                    
                    view_headers_btn = gr.Button(
                        "πŸ‘οΈ View Headers",
                        variant="primary",
                        size="lg"
                    )
                    
                    headers_output = gr.JSON(
                        label="Request Headers",
                        value={"message": "Click the button to view headers"}
                    )
    
    # Connect the buttons to their respective functions
    upload_transcribe_btn.click(
        fn=transcribe_audio,
        inputs=[audio_input, api_key_input],
        outputs=upload_transcript_output,
        show_progress=True
    )
    
    url_transcribe_btn.click(
        fn=transcribe_audio_from_url,
        inputs=[url_input, api_key_input],
        outputs=url_transcript_output,
        show_progress=True
    )
    
    view_headers_btn.click(
        fn=view_headers,
        inputs=[],
        outputs=headers_output
    )
    
    # Add examples section
    gr.Markdown("### πŸ”— Useful Links")
    gr.Markdown("""
    - [Get your Groq API key](https://console.groq.com/)
    - [Groq Documentation](https://console.groq.com/docs)
    - [Supported audio formats](https://platform.openai.com/docs/guides/speech-to-text)
    """)

if __name__ == "__main__":
    demo.launch(mcp_server=True)