Enrique Cardoza
commited on
Commit
Β·
d49e517
1
Parent(s):
80bdfa1
feat(transcription): add URL-based audio transcription
Browse files- Add new transcribe_audio_from_url function for processing audio from URLs
- Implement URL validation with extension and size checks
- Create tabbed interface separating upload and URL methods
- Add functionality to download and process remote audio files
- Reorganize UI to share API key input between both methods
- Add URL-specific requirements and instructions
- Improve error handling for network operations
- Implement temp file management for downloaded audio
app.py
CHANGED
@@ -2,6 +2,8 @@ import gradio as gr
|
|
2 |
import os
|
3 |
from groq import Groq
|
4 |
import tempfile
|
|
|
|
|
5 |
|
6 |
def validate_file(file):
|
7 |
"""Validate uploaded file type and size."""
|
@@ -22,6 +24,45 @@ def validate_file(file):
|
|
22 |
|
23 |
return True, "File is valid"
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
def transcribe_audio(audio_file, api_key):
|
26 |
"""Transcribe audio/video files into text using Groq's Whisper model.
|
27 |
|
@@ -73,64 +114,187 @@ def transcribe_audio(audio_file, api_key):
|
|
73 |
except Exception as e:
|
74 |
return f"Error: {str(e)}"
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
gr.Markdown("# π΅ Audio/Video Transcription with Groq Whisper")
|
79 |
-
gr.Markdown("Upload an audio or video file and get an AI-generated transcript using Groq's Whisper model.")
|
80 |
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
-
|
96 |
-
|
97 |
-
placeholder="Enter your Groq API key here or set GROQ_API_KEY environment variable",
|
98 |
-
type="password",
|
99 |
-
lines=1,
|
100 |
-
info=api_key_note
|
101 |
-
)
|
102 |
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
-
|
110 |
-
gr.Markdown("""
|
111 |
-
- **Max file size**: 25MB
|
112 |
-
- **Supported formats**: MP3, MP4, MPEG, MPGA, M4A, WAV, WebM, FLAC, OGG, AAC
|
113 |
-
- **Get API key**: [Groq Console](https://console.groq.com/)
|
114 |
-
""")
|
115 |
-
|
116 |
-
# Right column - Output
|
117 |
-
with gr.Column(scale=1):
|
118 |
-
gr.Markdown("### π Transcript")
|
119 |
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
-
#
|
130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
fn=transcribe_audio,
|
132 |
inputs=[audio_input, api_key_input],
|
133 |
-
outputs=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
show_progress=True
|
135 |
)
|
136 |
|
|
|
2 |
import os
|
3 |
from groq import Groq
|
4 |
import tempfile
|
5 |
+
import requests
|
6 |
+
import urllib.parse
|
7 |
|
8 |
def validate_file(file):
|
9 |
"""Validate uploaded file type and size."""
|
|
|
24 |
|
25 |
return True, "File is valid"
|
26 |
|
27 |
+
def validate_url_file(url):
|
28 |
+
"""Validate file from URL based on extension and size."""
|
29 |
+
if not url or url.strip() == "":
|
30 |
+
return False, "No URL provided"
|
31 |
+
|
32 |
+
try:
|
33 |
+
# Check if the URL is valid
|
34 |
+
parsed_url = urllib.parse.urlparse(url)
|
35 |
+
if not all([parsed_url.scheme, parsed_url.netloc]):
|
36 |
+
return False, "Invalid URL format"
|
37 |
+
|
38 |
+
if parsed_url.scheme not in ['http', 'https']:
|
39 |
+
return False, "URL must start with http:// or https://"
|
40 |
+
|
41 |
+
# Check file extension from URL
|
42 |
+
valid_extensions = ['.mp3', '.mp4', '.mpeg', '.mpga', '.m4a', '.wav', '.webm', '.flac', '.ogg', '.aac']
|
43 |
+
file_extension = os.path.splitext(parsed_url.path)[1].lower()
|
44 |
+
|
45 |
+
if file_extension not in valid_extensions:
|
46 |
+
return False, f"Invalid file type in URL. Supported formats: {', '.join(valid_extensions)}"
|
47 |
+
|
48 |
+
# Check file size with a HEAD request
|
49 |
+
response = requests.head(url, allow_redirects=True, timeout=10)
|
50 |
+
if response.status_code != 200:
|
51 |
+
return False, f"Could not access URL (HTTP {response.status_code})"
|
52 |
+
|
53 |
+
content_length = response.headers.get('content-length')
|
54 |
+
if content_length:
|
55 |
+
file_size_mb = int(content_length) / (1024 * 1024)
|
56 |
+
if file_size_mb > 25:
|
57 |
+
return False, f"File size ({file_size_mb:.1f}MB) exceeds 25MB limit"
|
58 |
+
|
59 |
+
return True, "File is valid"
|
60 |
+
|
61 |
+
except requests.exceptions.RequestException as e:
|
62 |
+
return False, f"Error accessing URL: {str(e)}"
|
63 |
+
except Exception as e:
|
64 |
+
return False, f"Error validating URL: {str(e)}"
|
65 |
+
|
66 |
def transcribe_audio(audio_file, api_key):
|
67 |
"""Transcribe audio/video files into text using Groq's Whisper model.
|
68 |
|
|
|
114 |
except Exception as e:
|
115 |
return f"Error: {str(e)}"
|
116 |
|
117 |
+
def transcribe_audio_from_url(audio_url, api_key):
|
118 |
+
"""Transcribe audio/video files from a URL into text using Groq's Whisper model.
|
|
|
|
|
119 |
|
120 |
+
This tool converts spoken content from audio and video files into written text.
|
121 |
+
It supports multiple audio formats and handles files up to 25MB in size.
|
122 |
+
|
123 |
+
Parameters:
|
124 |
+
audio_url: URL to an audio or video file to transcribe (http or https).
|
125 |
+
Supported formats: MP3, MP4, MPEG, MPGA, M4A, WAV, WebM, FLAC, OGG, AAC.
|
126 |
+
Maximum size: 25MB.
|
127 |
+
api_key: Your Groq API key, required for authentication.
|
128 |
+
You can obtain this from https://console.groq.com/
|
129 |
+
|
130 |
+
Returns:
|
131 |
+
A text transcript of the spoken content in the audio file.
|
132 |
+
|
133 |
+
Example:
|
134 |
+
Provide a URL to a podcast episode to get a complete text transcript.
|
135 |
+
"""
|
136 |
+
try:
|
137 |
+
# First check for environment variable, then use provided API key
|
138 |
+
actual_api_key = os.environ.get("GROQ_API_KEY", api_key)
|
139 |
+
|
140 |
+
# Validate API key
|
141 |
+
if not actual_api_key:
|
142 |
+
return "Error: Please provide your Groq API key or set the GROQ_API_KEY environment variable"
|
143 |
+
|
144 |
+
if not audio_url or audio_url.strip() == "":
|
145 |
+
return "Error: Please provide a URL to an audio or video file"
|
146 |
+
|
147 |
+
# Validate file from URL
|
148 |
+
is_valid, message = validate_url_file(audio_url)
|
149 |
+
if not is_valid:
|
150 |
+
return f"Error: {message}"
|
151 |
+
|
152 |
+
# Initialize Groq client
|
153 |
+
client = Groq(api_key=actual_api_key)
|
154 |
+
|
155 |
+
# Download the file to a temporary location
|
156 |
+
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
157 |
+
response = requests.get(audio_url, stream=True, timeout=30)
|
158 |
+
response.raise_for_status()
|
159 |
|
160 |
+
for chunk in response.iter_content(chunk_size=8192):
|
161 |
+
temp_file.write(chunk)
|
|
|
|
|
|
|
|
|
|
|
162 |
|
163 |
+
temp_file_path = temp_file.name
|
164 |
+
|
165 |
+
try:
|
166 |
+
# Read the downloaded file
|
167 |
+
with open(temp_file_path, "rb") as file:
|
168 |
+
# Get the original filename from the URL
|
169 |
+
filename = os.path.basename(urllib.parse.urlparse(audio_url).path)
|
170 |
+
if not filename:
|
171 |
+
filename = "audio_from_url"
|
172 |
+
|
173 |
+
# Create transcription
|
174 |
+
transcription = client.audio.transcriptions.create(
|
175 |
+
file=(filename, file.read()),
|
176 |
+
model="whisper-large-v3-turbo"
|
177 |
+
)
|
178 |
|
179 |
+
return transcription.text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
|
181 |
+
finally:
|
182 |
+
# Clean up the temporary file
|
183 |
+
if os.path.exists(temp_file_path):
|
184 |
+
os.unlink(temp_file_path)
|
185 |
+
|
186 |
+
except requests.exceptions.RequestException as e:
|
187 |
+
return f"Error downloading file: {str(e)}"
|
188 |
+
except Exception as e:
|
189 |
+
return f"Error: {str(e)}"
|
190 |
+
|
191 |
+
# Create the Gradio interface with custom layout
|
192 |
+
with gr.Blocks(title="Audio/Video Transcription with Groq", theme=gr.themes.Soft()) as demo:
|
193 |
+
gr.Markdown("# π΅ Audio/Video Transcription with Groq Whisper")
|
194 |
+
gr.Markdown("Upload an audio/video file or provide a URL and get an AI-generated transcript using Groq's Whisper model.")
|
195 |
|
196 |
+
# API Key input - shared between tabs
|
197 |
+
api_key_note = "API key will be used from environment variable if set" if os.environ.get("GROQ_API_KEY") else ""
|
198 |
+
api_key_input = gr.Textbox(
|
199 |
+
label="Groq API Key",
|
200 |
+
placeholder="Enter your Groq API key here or set GROQ_API_KEY environment variable",
|
201 |
+
type="password",
|
202 |
+
lines=1,
|
203 |
+
info=api_key_note
|
204 |
+
)
|
205 |
+
|
206 |
+
with gr.Tabs():
|
207 |
+
# Tab 1: File Upload
|
208 |
+
with gr.TabItem("Upload File"):
|
209 |
+
with gr.Row():
|
210 |
+
# Left column - Input controls
|
211 |
+
with gr.Column(scale=1):
|
212 |
+
gr.Markdown("### π€ Upload Audio/Video")
|
213 |
+
|
214 |
+
audio_input = gr.File(
|
215 |
+
label="Upload Audio/Video File",
|
216 |
+
file_types=[".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm", ".flac", ".ogg", ".aac"],
|
217 |
+
file_count="single"
|
218 |
+
)
|
219 |
+
|
220 |
+
upload_transcribe_btn = gr.Button(
|
221 |
+
"π― Transcribe Uploaded File",
|
222 |
+
variant="primary",
|
223 |
+
size="lg"
|
224 |
+
)
|
225 |
+
|
226 |
+
gr.Markdown("### βΉοΈ File Requirements")
|
227 |
+
gr.Markdown("""
|
228 |
+
- **Max file size**: 25MB
|
229 |
+
- **Supported formats**: MP3, MP4, MPEG, MPGA, M4A, WAV, WebM, FLAC, OGG, AAC
|
230 |
+
- **Get API key**: [Groq Console](https://console.groq.com/)
|
231 |
+
""")
|
232 |
+
|
233 |
+
# Right column - Output
|
234 |
+
with gr.Column(scale=1):
|
235 |
+
gr.Markdown("### π Transcript")
|
236 |
+
|
237 |
+
upload_transcript_output = gr.Textbox(
|
238 |
+
label="Generated Transcript",
|
239 |
+
placeholder="Your transcript will appear here...",
|
240 |
+
lines=20,
|
241 |
+
max_lines=30,
|
242 |
+
show_copy_button=True,
|
243 |
+
interactive=False
|
244 |
+
)
|
245 |
+
|
246 |
+
# Tab 2: URL Input
|
247 |
+
with gr.TabItem("Audio URL"):
|
248 |
+
with gr.Row():
|
249 |
+
# Left column - Input controls
|
250 |
+
with gr.Column(scale=1):
|
251 |
+
gr.Markdown("### π Audio/Video URL")
|
252 |
+
|
253 |
+
url_input = gr.Textbox(
|
254 |
+
label="URL to Audio/Video File",
|
255 |
+
placeholder="Enter the http/https URL to an audio or video file",
|
256 |
+
lines=2
|
257 |
+
)
|
258 |
+
|
259 |
+
url_transcribe_btn = gr.Button(
|
260 |
+
"π― Transcribe from URL",
|
261 |
+
variant="primary",
|
262 |
+
size="lg"
|
263 |
+
)
|
264 |
+
|
265 |
+
gr.Markdown("### βΉοΈ URL Requirements")
|
266 |
+
gr.Markdown("""
|
267 |
+
- **URL format**: Must start with http:// or https://
|
268 |
+
- **Max file size**: 25MB
|
269 |
+
- **Supported formats**: MP3, MP4, MPEG, MPGA, M4A, WAV, WebM, FLAC, OGG, AAC
|
270 |
+
- **Direct link**: URL must point directly to the audio/video file
|
271 |
+
""")
|
272 |
+
|
273 |
+
# Right column - Output
|
274 |
+
with gr.Column(scale=1):
|
275 |
+
gr.Markdown("### π Transcript")
|
276 |
+
|
277 |
+
url_transcript_output = gr.Textbox(
|
278 |
+
label="Generated Transcript",
|
279 |
+
placeholder="Your transcript will appear here...",
|
280 |
+
lines=20,
|
281 |
+
max_lines=30,
|
282 |
+
show_copy_button=True,
|
283 |
+
interactive=False
|
284 |
+
)
|
285 |
+
|
286 |
+
# Connect the buttons to their respective transcription functions
|
287 |
+
upload_transcribe_btn.click(
|
288 |
fn=transcribe_audio,
|
289 |
inputs=[audio_input, api_key_input],
|
290 |
+
outputs=upload_transcript_output,
|
291 |
+
show_progress=True
|
292 |
+
)
|
293 |
+
|
294 |
+
url_transcribe_btn.click(
|
295 |
+
fn=transcribe_audio_from_url,
|
296 |
+
inputs=[url_input, api_key_input],
|
297 |
+
outputs=url_transcript_output,
|
298 |
show_progress=True
|
299 |
)
|
300 |
|