amirjamali commited on
Commit
bb36a56
·
unverified ·
1 Parent(s): aa81f17

Update Dockerfile and requirements; add torchvision dependency and enhance audio visualization handling in Streamlit app

Browse files
Files changed (3) hide show
  1. Dockerfile +1 -1
  2. requirements.txt +1 -0
  3. src/streamlit_app.py +60 -27
Dockerfile CHANGED
@@ -35,7 +35,7 @@ COPY requirements.txt .
35
 
36
  # Install Python dependencies with specific order for compatibility
37
  RUN pip install --no-cache-dir --upgrade pip && \
38
- pip install --no-cache-dir torch==2.0.1 torchaudio==2.0.2 && \
39
  pip install --no-cache-dir -r requirements.txt && \
40
  pip install --no-cache-dir git+https://github.com/speechbrain/speechbrain.git@v0.5.14
41
 
 
35
 
36
  # Install Python dependencies with specific order for compatibility
37
  RUN pip install --no-cache-dir --upgrade pip && \
38
+ pip install --no-cache-dir torch==2.0.1 torchaudio==2.0.2 torchvision==0.15.2 && \
39
  pip install --no-cache-dir -r requirements.txt && \
40
  pip install --no-cache-dir git+https://github.com/speechbrain/speechbrain.git@v0.5.14
41
 
requirements.txt CHANGED
@@ -7,6 +7,7 @@ beautifulsoup4==4.12.2
7
  speechbrain==0.5.14
8
  torch==2.0.1
9
  torchaudio==2.0.2
 
10
  # Pin transformers to version that has AutoProcessor
11
  transformers==4.31.0
12
  librosa==0.10.1
 
7
  speechbrain==0.5.14
8
  torch==2.0.1
9
  torchaudio==2.0.2
10
+ torchvision==0.15.2
11
  # Pin transformers to version that has AutoProcessor
12
  transformers==4.31.0
13
  librosa==0.10.1
src/streamlit_app.py CHANGED
@@ -442,8 +442,7 @@ class AccentDetector:
442
 
443
  # Generate explanation
444
  explanation = self.generate_explanation(audio_path, accent, accent_confidence, is_english, lang)
445
-
446
- # Create visualization of the audio waveform
447
  try:
448
  y, sr = librosa.load(audio_path, sr=None)
449
  fig, ax = plt.subplots(figsize=(10, 2))
@@ -453,6 +452,25 @@ class AccentDetector:
453
  ax.set_title('Audio Waveform')
454
  plt.tight_layout()
455
  audio_viz = fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
456
  except Exception as e:
457
  st.warning(f"Could not generate audio visualization: {str(e)}")
458
  audio_viz = None
@@ -467,21 +485,34 @@ class AccentDetector:
467
  "audio_viz": audio_viz
468
  }
469
 
470
- def process_uploaded_audio(uploaded_file):
471
- """Process uploaded audio file"""
 
 
 
 
472
  try:
473
  # Create a unique filename based on timestamp
474
  timestamp = str(int(time.time()))
475
- file_extension = os.path.splitext(uploaded_file.name)[1].lower()
476
-
477
- # Create an uploads directory if it doesn't exist
478
- uploads_dir = os.path.join(os.getcwd(), "uploads")
479
- os.makedirs(uploads_dir, exist_ok=True)
480
 
481
- # Write the uploaded file to disk with proper extension in the uploads directory
482
- temp_input_path = os.path.join(uploads_dir, f"uploaded_audio_{timestamp}{file_extension}")
483
- with open(temp_input_path, "wb") as f:
484
- f.write(uploaded_file.getbuffer())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
485
  # For MP4 files, extract the audio using ffmpeg
486
  if file_extension == ".mp4":
487
  st.info("Extracting audio from video file...")
@@ -660,10 +691,13 @@ with tab1:
660
  # Show explanation in a box
661
  st.markdown("### Expert Analysis")
662
  st.info(results['explanation'])
663
-
664
- with col2:
665
  if results['audio_viz']:
666
- st.pyplot(results['audio_viz'])
 
 
 
 
667
 
668
  # Show audio playback
669
  st.audio(audio_path)
@@ -731,19 +765,15 @@ with tab2:
731
  # Create an uploads directory if it doesn't exist
732
  uploads_dir = os.path.join(os.getcwd(), "uploads")
733
  os.makedirs(uploads_dir, exist_ok=True)
734
-
735
- # Save the file first to avoid streaming it multiple times
736
  temp_file_path = os.path.join(uploads_dir, f"temp_{int(time.time())}_{uploaded_file.name}")
737
  with open(temp_file_path, "wb") as f:
738
  f.write(uploaded_file.getbuffer())
739
 
740
  progress_bar.progress(50, text="Analyzing audio...")
741
 
742
- # Now process from the saved file
743
- with open(temp_file_path, "rb") as f:
744
- # Create a new UploadedFile object from the saved file
745
- file_content = f.read()
746
- results = process_uploaded_audio(uploaded_file)
747
 
748
  progress_bar.progress(100, text="Analysis complete!")
749
  # Display results
@@ -760,11 +790,14 @@ with tab2:
760
  # Show explanation in a box
761
  st.markdown("### Expert Analysis")
762
  st.info(results['explanation'])
763
-
764
- with col2:
765
  if results['audio_viz']:
766
- st.pyplot(results['audio_viz'])
767
-
 
 
 
 
768
  except subprocess.CalledProcessError as e:
769
  st.error("Error processing audio file")
770
  st.error(f"FFmpeg error: {e.stderr.decode('utf-8') if e.stderr else str(e)}")
 
442
 
443
  # Generate explanation
444
  explanation = self.generate_explanation(audio_path, accent, accent_confidence, is_english, lang)
445
+ # Create visualization of the audio waveform
 
446
  try:
447
  y, sr = librosa.load(audio_path, sr=None)
448
  fig, ax = plt.subplots(figsize=(10, 2))
 
452
  ax.set_title('Audio Waveform')
453
  plt.tight_layout()
454
  audio_viz = fig
455
+
456
+ # Make sure the figure can be saved
457
+ try:
458
+ # Test if the figure can be saved
459
+ import tempfile
460
+ with tempfile.NamedTemporaryFile(suffix='.png') as tmp:
461
+ plt.savefig(tmp.name)
462
+ except Exception as viz_save_error:
463
+ st.warning(f"Could not save visualization: {str(viz_save_error)}. Using simpler visualization.")
464
+ # Create a simple alternative visualization
465
+ import numpy as np
466
+ # Downsample for performance
467
+ sample_rate = max(1, len(y) // 1000)
468
+ y_downsampled = y[::sample_rate]
469
+ fig2, ax2 = plt.subplots(figsize=(8, 2))
470
+ ax2.plot(np.arange(len(y_downsampled)), y_downsampled)
471
+ ax2.set_title("Audio Waveform (simplified)")
472
+ audio_viz = fig2
473
+
474
  except Exception as e:
475
  st.warning(f"Could not generate audio visualization: {str(e)}")
476
  audio_viz = None
 
485
  "audio_viz": audio_viz
486
  }
487
 
488
+ def process_uploaded_audio(file_input):
489
+ """Process uploaded audio file
490
+
491
+ Args:
492
+ file_input: Either a StreamlitUploadedFile object or a string path to a file
493
+ """
494
  try:
495
  # Create a unique filename based on timestamp
496
  timestamp = str(int(time.time()))
 
 
 
 
 
497
 
498
+ # Handle different input types
499
+ if isinstance(file_input, str):
500
+ # If it's already a file path
501
+ temp_input_path = file_input
502
+ file_extension = os.path.splitext(temp_input_path)[1].lower()
503
+ st.info(f"Processing from saved file: {os.path.basename(temp_input_path)}")
504
+ else:
505
+ # If it's a StreamlitUploadedFile
506
+ file_extension = os.path.splitext(file_input.name)[1].lower()
507
+
508
+ # Create an uploads directory if it doesn't exist
509
+ uploads_dir = os.path.join(os.getcwd(), "uploads")
510
+ os.makedirs(uploads_dir, exist_ok=True)
511
+
512
+ # Write the uploaded file to disk with proper extension in the uploads directory
513
+ temp_input_path = os.path.join(uploads_dir, f"uploaded_audio_{timestamp}{file_extension}")
514
+ with open(temp_input_path, "wb") as f:
515
+ f.write(file_input.getbuffer())
516
  # For MP4 files, extract the audio using ffmpeg
517
  if file_extension == ".mp4":
518
  st.info("Extracting audio from video file...")
 
691
  # Show explanation in a box
692
  st.markdown("### Expert Analysis")
693
  st.info(results['explanation'])
694
+ with col2:
 
695
  if results['audio_viz']:
696
+ try:
697
+ st.pyplot(results['audio_viz'])
698
+ except Exception as viz_error:
699
+ st.warning("Could not display visualization due to torchvision issue.")
700
+ st.info("Audio analysis was successful even though visualization failed.")
701
 
702
  # Show audio playback
703
  st.audio(audio_path)
 
765
  # Create an uploads directory if it doesn't exist
766
  uploads_dir = os.path.join(os.getcwd(), "uploads")
767
  os.makedirs(uploads_dir, exist_ok=True)
768
+ # Save the file first to avoid streaming it multiple times
 
769
  temp_file_path = os.path.join(uploads_dir, f"temp_{int(time.time())}_{uploaded_file.name}")
770
  with open(temp_file_path, "wb") as f:
771
  f.write(uploaded_file.getbuffer())
772
 
773
  progress_bar.progress(50, text="Analyzing audio...")
774
 
775
+ # Process using the saved file path directly
776
+ results = process_uploaded_audio(temp_file_path)
 
 
 
777
 
778
  progress_bar.progress(100, text="Analysis complete!")
779
  # Display results
 
790
  # Show explanation in a box
791
  st.markdown("### Expert Analysis")
792
  st.info(results['explanation'])
793
+ with col2:
 
794
  if results['audio_viz']:
795
+ try:
796
+ st.pyplot(results['audio_viz'])
797
+ except Exception as viz_error:
798
+ st.warning("Could not display visualization due to torchvision issue.")
799
+ st.info("Audio analysis was successful even though visualization failed.")
800
+
801
  except subprocess.CalledProcessError as e:
802
  st.error("Error processing audio file")
803
  st.error(f"FFmpeg error: {e.stderr.decode('utf-8') if e.stderr else str(e)}")