amirjamali commited on
Commit
a9b23f3
·
unverified ·
1 Parent(s): 7eb21f1

Enhance Dockerfile for improved network reliability and add cookies directory; update Streamlit app for better YouTube authentication handling and user guidance

Browse files
Files changed (2) hide show
  1. Dockerfile +4 -0
  2. src/streamlit_app.py +83 -22
Dockerfile CHANGED
@@ -19,6 +19,7 @@ RUN apt-get update --allow-releaseinfo-change || (sleep 2 && apt-get update) &&
19
  # Set pip to have more retries and timeout
20
  ENV PIP_DEFAULT_TIMEOUT=100
21
  ENV PIP_RETRIES=3
 
22
 
23
  # Copy requirements and install Python dependencies
24
  COPY requirements.txt ./
@@ -26,6 +27,9 @@ RUN pip install --upgrade pip && \
26
  pip install --no-cache-dir -r requirements.txt || \
27
  (sleep 2 && pip install --no-cache-dir -r requirements.txt)
28
 
 
 
 
29
  # Copy source code
30
  COPY src/ ./src/
31
 
 
19
  # Set pip to have more retries and timeout
20
  ENV PIP_DEFAULT_TIMEOUT=100
21
  ENV PIP_RETRIES=3
22
+ ENV PYTHONUNBUFFERED=1
23
 
24
  # Copy requirements and install Python dependencies
25
  COPY requirements.txt ./
 
27
  pip install --no-cache-dir -r requirements.txt || \
28
  (sleep 2 && pip install --no-cache-dir -r requirements.txt)
29
 
30
+ # Create cookies directory for user uploads
31
+ RUN mkdir -p /app/cookies
32
+
33
  # Copy source code
34
  COPY src/ ./src/
35
 
src/streamlit_app.py CHANGED
@@ -56,23 +56,40 @@ def download_video(url, video_path="video.mp4", cookies_file=None):
56
  """Download a video from a URL"""
57
  ydl_opts = {
58
  "outtmpl": video_path,
 
 
 
59
  }
60
 
61
- # Try to use provided cookies file first if it exists
 
62
  if cookies_file and os.path.exists(cookies_file):
63
  ydl_opts["cookiefile"] = cookies_file
64
- else:
65
- # Fall back to browser cookies
66
- ydl_opts["cookiesfrombrowser"] = ("chrome",)
67
 
68
  try:
 
 
 
69
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
70
  ydl.download([url])
71
- return os.path.exists(video_path)
 
 
 
 
 
72
  except Exception as e:
73
- st.error(f"Download error: {str(e)}")
74
- if "youtube" in url.lower() and "bot" in str(e).lower():
75
- st.warning("YouTube is requesting authentication. Please see the 'Having trouble with YouTube videos?' section for help.")
 
 
 
 
 
 
 
 
76
  return False
77
 
78
  def extract_audio(video_path="video.mp4", audio_path="audio.wav"):
@@ -271,6 +288,20 @@ st.set_page_config(
271
 
272
  st.title("🎤 English Accent Detection Tool")
273
  st.markdown("""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  This app analyzes a speaker's English accent from a video or audio source.
275
  It provides:
276
  - Classification of the accent (British, American, etc.)
@@ -282,31 +313,47 @@ It provides:
282
  tab1, tab2 = st.tabs(["Video URL", "Upload Audio"])
283
 
284
  with tab1:
285
- url = st.text_input("Enter a public video URL (e.g. Loom, YouTube, or direct MP4 link)")
 
 
 
 
 
286
 
287
  # Add file uploader for cookies.txt
288
  cookies_file = None
289
- uploaded_cookies = st.file_uploader("Optional: Upload cookies.txt for YouTube authentication", type="txt", help="Only needed if you have issues with YouTube videos")
 
 
290
 
291
  if uploaded_cookies is not None:
292
  # Save the uploaded cookies file to a temporary file
293
  cookies_file = f"cookies_{int(time.time())}.txt"
294
  with open(cookies_file, "wb") as f:
295
  f.write(uploaded_cookies.getbuffer())
 
296
 
297
  with st.expander("Having trouble with YouTube videos?"):
298
  st.markdown("""
299
- **YouTube Authentication Issues**
 
 
300
 
301
- YouTube sometimes requires authentication to verify you're not a bot. If you encounter errors while analyzing YouTube videos, try these solutions:
 
 
 
 
 
302
 
303
- 1. **Use a different video source**: Try Loom or a direct MP4 link instead
304
- 2. **Export cookies from your browser**:
305
- - Install the [cookies.txt extension](https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc) for Chrome
306
- - Visit YouTube and login
307
- - Use the extension to export your cookies to a file
308
- - Place the cookies file in the app's directory
309
- 3. **Try a shorter video**: YouTube may be less likely to require authentication for shorter clips
 
310
  """)
311
 
312
  if st.button("Analyze Video"):
@@ -374,13 +421,27 @@ with tab1:
374
  st.error(f"Error during analysis: {str(e)}")
375
 
376
  with tab2:
377
- uploaded_file = st.file_uploader("Upload an audio file (WAV, MP3, etc.)", type=["wav", "mp3", "m4a", "ogg"])
 
 
 
 
 
378
 
379
  if uploaded_file is not None:
 
 
380
  st.audio(uploaded_file)
381
 
382
- if st.button("Analyze Audio"):
383
- with st.spinner("Analyzing audio... (this may take a moment)"):
 
 
 
 
 
 
 
384
  try:
385
  results = process_uploaded_audio(uploaded_file)
386
 
 
56
  """Download a video from a URL"""
57
  ydl_opts = {
58
  "outtmpl": video_path,
59
+ "quiet": False,
60
+ "no_warnings": False,
61
+ "verbose": True # More detailed output for debugging
62
  }
63
 
64
+ # Only use cookies if explicitly provided via file upload
65
+ # Don't try to access browser cookies in Docker container
66
  if cookies_file and os.path.exists(cookies_file):
67
  ydl_opts["cookiefile"] = cookies_file
 
 
 
68
 
69
  try:
70
+ # Special handling for YouTube URLs to try without cookies first
71
+ is_youtube = "youtube" in url.lower() or "youtu.be" in url.lower()
72
+
73
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
74
  ydl.download([url])
75
+
76
+ if os.path.exists(video_path):
77
+ return True
78
+ else:
79
+ st.error(f"Video downloaded but file not found: {video_path}")
80
+ return False
81
  except Exception as e:
82
+ error_msg = str(e)
83
+ st.error(f"Download error: {error_msg}")
84
+
85
+ # Provide specific guidance based on error type
86
+ if is_youtube and ("bot" in error_msg.lower() or "sign in" in error_msg.lower()):
87
+ st.warning("YouTube requires authentication. Please upload a cookies.txt file or try a direct video link.")
88
+ elif "not find" in error_msg.lower() and "cookies" in error_msg.lower():
89
+ st.warning("Browser cookies could not be accessed. Please upload a cookies.txt file.")
90
+ elif "network" in error_msg.lower() or "timeout" in error_msg.lower():
91
+ st.warning("Network error. Please check your internet connection and try again.")
92
+
93
  return False
94
 
95
  def extract_audio(video_path="video.mp4", audio_path="audio.wav"):
 
288
 
289
  st.title("🎤 English Accent Detection Tool")
290
  st.markdown("""
291
+ This application analyzes a speaker's English accent from video URLs or audio uploads,
292
+ providing detailed insights for hiring evaluation purposes.
293
+ """)
294
+
295
+ # Add container for tips
296
+ with st.container():
297
+ st.info("""
298
+ 💡 **Tips for best results:**
299
+ - Use **Loom** or **Vimeo** videos (more reliable than YouTube)
300
+ - For YouTube videos, you may need to provide cookies
301
+ - Audio clips of 15-30 seconds work best
302
+ - Clear speech with minimal background noise is ideal
303
+ """)
304
+ st.markdown("""
305
  This app analyzes a speaker's English accent from a video or audio source.
306
  It provides:
307
  - Classification of the accent (British, American, etc.)
 
313
  tab1, tab2 = st.tabs(["Video URL", "Upload Audio"])
314
 
315
  with tab1:
316
+ st.markdown("### 🎬 Analyze video from URL")
317
+ url = st.text_input("Enter a public video URL",
318
+ placeholder="https://www.loom.com/..., https://vimeo.com/..., or direct MP4 link")
319
+
320
+ # Recommend alternative sources
321
+ st.caption("⚠️ **Note**: YouTube videos often require authentication. For best results, use Loom, Vimeo or direct video links.")
322
 
323
  # Add file uploader for cookies.txt
324
  cookies_file = None
325
+ uploaded_cookies = st.file_uploader("Upload cookies.txt file for YouTube (if needed)",
326
+ type="txt",
327
+ help="Only needed for YouTube videos that require authentication")
328
 
329
  if uploaded_cookies is not None:
330
  # Save the uploaded cookies file to a temporary file
331
  cookies_file = f"cookies_{int(time.time())}.txt"
332
  with open(cookies_file, "wb") as f:
333
  f.write(uploaded_cookies.getbuffer())
334
+ st.success("Cookies file uploaded successfully!")
335
 
336
  with st.expander("Having trouble with YouTube videos?"):
337
  st.markdown("""
338
+ ### YouTube Authentication Issues
339
+
340
+ YouTube's anti-bot measures often block automated video downloads. To solve this:
341
 
342
+ #### Option 1: Use Alternative Video Sources (Recommended)
343
+ These typically work without authentication issues:
344
+ - [Loom](https://www.loom.com/) - Great for screen recordings
345
+ - [Vimeo](https://vimeo.com/) - High-quality video hosting
346
+ - [Streamable](https://streamable.com/) - Simple video sharing
347
+ - Any direct MP4 link
348
 
349
+ #### Option 2: Upload Cookies for YouTube
350
+ 1. Install a browser extension like [Get cookies.txt](https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc)
351
+ 2. Login to YouTube in your browser
352
+ 3. Use the extension to export cookies to a .txt file
353
+ 4. Upload the cookies.txt file using the uploader above
354
+
355
+ #### Option 3: Use Audio Upload Instead
356
+ The 'Upload Audio' tab allows direct analysis of audio files without URL issues.
357
  """)
358
 
359
  if st.button("Analyze Video"):
 
421
  st.error(f"Error during analysis: {str(e)}")
422
 
423
  with tab2:
424
+ st.markdown("### 🎵 Upload Audio File")
425
+ st.caption("**Recommended option!** Direct audio upload is more reliable than video URLs.")
426
+
427
+ uploaded_file = st.file_uploader("Upload an audio file",
428
+ type=["wav", "mp3", "m4a", "ogg", "flac"],
429
+ help="Support for WAV, MP3, M4A, OGG and FLAC formats")
430
 
431
  if uploaded_file is not None:
432
+ # Show a preview of the audio
433
+ st.markdown("#### Audio Preview:")
434
  st.audio(uploaded_file)
435
 
436
+ st.markdown("#### Ready for Analysis")
437
+ col1, col2 = st.columns([1, 3])
438
+ with col1:
439
+ analyze_button = st.button("Analyze Audio", type="primary", use_container_width=True)
440
+ with col2:
441
+ st.caption("Tip: 15-30 seconds of clear speech works best for accent detection")
442
+
443
+ if analyze_button:
444
+ with st.spinner("Analyzing audio... (this may take 15-30 seconds)"):
445
  try:
446
  results = process_uploaded_audio(uploaded_file)
447