Spaces:
Sleeping
Sleeping
Update Dockerfile and requirements; add torchvision dependency and enhance audio visualization handling in Streamlit app
Browse files- Dockerfile +1 -1
- requirements.txt +1 -0
- src/streamlit_app.py +60 -27
Dockerfile
CHANGED
@@ -35,7 +35,7 @@ COPY requirements.txt .
|
|
35 |
|
36 |
# Install Python dependencies with specific order for compatibility
|
37 |
RUN pip install --no-cache-dir --upgrade pip && \
|
38 |
-
pip install --no-cache-dir torch==2.0.1 torchaudio==2.0.2 && \
|
39 |
pip install --no-cache-dir -r requirements.txt && \
|
40 |
pip install --no-cache-dir git+https://github.com/speechbrain/speechbrain.git@v0.5.14
|
41 |
|
|
|
35 |
|
36 |
# Install Python dependencies with specific order for compatibility
|
37 |
RUN pip install --no-cache-dir --upgrade pip && \
|
38 |
+
pip install --no-cache-dir torch==2.0.1 torchaudio==2.0.2 torchvision==0.15.2 && \
|
39 |
pip install --no-cache-dir -r requirements.txt && \
|
40 |
pip install --no-cache-dir git+https://github.com/speechbrain/speechbrain.git@v0.5.14
|
41 |
|
requirements.txt
CHANGED
@@ -7,6 +7,7 @@ beautifulsoup4==4.12.2
|
|
7 |
speechbrain==0.5.14
|
8 |
torch==2.0.1
|
9 |
torchaudio==2.0.2
|
|
|
10 |
# Pin transformers to version that has AutoProcessor
|
11 |
transformers==4.31.0
|
12 |
librosa==0.10.1
|
|
|
7 |
speechbrain==0.5.14
|
8 |
torch==2.0.1
|
9 |
torchaudio==2.0.2
|
10 |
+
torchvision==0.15.2
|
11 |
# Pin transformers to version that has AutoProcessor
|
12 |
transformers==4.31.0
|
13 |
librosa==0.10.1
|
src/streamlit_app.py
CHANGED
@@ -442,8 +442,7 @@ class AccentDetector:
|
|
442 |
|
443 |
# Generate explanation
|
444 |
explanation = self.generate_explanation(audio_path, accent, accent_confidence, is_english, lang)
|
445 |
-
|
446 |
-
# Create visualization of the audio waveform
|
447 |
try:
|
448 |
y, sr = librosa.load(audio_path, sr=None)
|
449 |
fig, ax = plt.subplots(figsize=(10, 2))
|
@@ -453,6 +452,25 @@ class AccentDetector:
|
|
453 |
ax.set_title('Audio Waveform')
|
454 |
plt.tight_layout()
|
455 |
audio_viz = fig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
456 |
except Exception as e:
|
457 |
st.warning(f"Could not generate audio visualization: {str(e)}")
|
458 |
audio_viz = None
|
@@ -467,21 +485,34 @@ class AccentDetector:
|
|
467 |
"audio_viz": audio_viz
|
468 |
}
|
469 |
|
470 |
-
def process_uploaded_audio(
|
471 |
-
"""Process uploaded audio file
|
|
|
|
|
|
|
|
|
472 |
try:
|
473 |
# Create a unique filename based on timestamp
|
474 |
timestamp = str(int(time.time()))
|
475 |
-
file_extension = os.path.splitext(uploaded_file.name)[1].lower()
|
476 |
-
|
477 |
-
# Create an uploads directory if it doesn't exist
|
478 |
-
uploads_dir = os.path.join(os.getcwd(), "uploads")
|
479 |
-
os.makedirs(uploads_dir, exist_ok=True)
|
480 |
|
481 |
-
#
|
482 |
-
|
483 |
-
|
484 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
485 |
# For MP4 files, extract the audio using ffmpeg
|
486 |
if file_extension == ".mp4":
|
487 |
st.info("Extracting audio from video file...")
|
@@ -660,10 +691,13 @@ with tab1:
|
|
660 |
# Show explanation in a box
|
661 |
st.markdown("### Expert Analysis")
|
662 |
st.info(results['explanation'])
|
663 |
-
|
664 |
-
with col2:
|
665 |
if results['audio_viz']:
|
666 |
-
|
|
|
|
|
|
|
|
|
667 |
|
668 |
# Show audio playback
|
669 |
st.audio(audio_path)
|
@@ -731,19 +765,15 @@ with tab2:
|
|
731 |
# Create an uploads directory if it doesn't exist
|
732 |
uploads_dir = os.path.join(os.getcwd(), "uploads")
|
733 |
os.makedirs(uploads_dir, exist_ok=True)
|
734 |
-
|
735 |
-
# Save the file first to avoid streaming it multiple times
|
736 |
temp_file_path = os.path.join(uploads_dir, f"temp_{int(time.time())}_{uploaded_file.name}")
|
737 |
with open(temp_file_path, "wb") as f:
|
738 |
f.write(uploaded_file.getbuffer())
|
739 |
|
740 |
progress_bar.progress(50, text="Analyzing audio...")
|
741 |
|
742 |
-
#
|
743 |
-
|
744 |
-
# Create a new UploadedFile object from the saved file
|
745 |
-
file_content = f.read()
|
746 |
-
results = process_uploaded_audio(uploaded_file)
|
747 |
|
748 |
progress_bar.progress(100, text="Analysis complete!")
|
749 |
# Display results
|
@@ -760,11 +790,14 @@ with tab2:
|
|
760 |
# Show explanation in a box
|
761 |
st.markdown("### Expert Analysis")
|
762 |
st.info(results['explanation'])
|
763 |
-
|
764 |
-
with col2:
|
765 |
if results['audio_viz']:
|
766 |
-
|
767 |
-
|
|
|
|
|
|
|
|
|
768 |
except subprocess.CalledProcessError as e:
|
769 |
st.error("Error processing audio file")
|
770 |
st.error(f"FFmpeg error: {e.stderr.decode('utf-8') if e.stderr else str(e)}")
|
|
|
442 |
|
443 |
# Generate explanation
|
444 |
explanation = self.generate_explanation(audio_path, accent, accent_confidence, is_english, lang)
|
445 |
+
# Create visualization of the audio waveform
|
|
|
446 |
try:
|
447 |
y, sr = librosa.load(audio_path, sr=None)
|
448 |
fig, ax = plt.subplots(figsize=(10, 2))
|
|
|
452 |
ax.set_title('Audio Waveform')
|
453 |
plt.tight_layout()
|
454 |
audio_viz = fig
|
455 |
+
|
456 |
+
# Make sure the figure can be saved
|
457 |
+
try:
|
458 |
+
# Test if the figure can be saved
|
459 |
+
import tempfile
|
460 |
+
with tempfile.NamedTemporaryFile(suffix='.png') as tmp:
|
461 |
+
plt.savefig(tmp.name)
|
462 |
+
except Exception as viz_save_error:
|
463 |
+
st.warning(f"Could not save visualization: {str(viz_save_error)}. Using simpler visualization.")
|
464 |
+
# Create a simple alternative visualization
|
465 |
+
import numpy as np
|
466 |
+
# Downsample for performance
|
467 |
+
sample_rate = max(1, len(y) // 1000)
|
468 |
+
y_downsampled = y[::sample_rate]
|
469 |
+
fig2, ax2 = plt.subplots(figsize=(8, 2))
|
470 |
+
ax2.plot(np.arange(len(y_downsampled)), y_downsampled)
|
471 |
+
ax2.set_title("Audio Waveform (simplified)")
|
472 |
+
audio_viz = fig2
|
473 |
+
|
474 |
except Exception as e:
|
475 |
st.warning(f"Could not generate audio visualization: {str(e)}")
|
476 |
audio_viz = None
|
|
|
485 |
"audio_viz": audio_viz
|
486 |
}
|
487 |
|
488 |
+
def process_uploaded_audio(file_input):
|
489 |
+
"""Process uploaded audio file
|
490 |
+
|
491 |
+
Args:
|
492 |
+
file_input: Either a StreamlitUploadedFile object or a string path to a file
|
493 |
+
"""
|
494 |
try:
|
495 |
# Create a unique filename based on timestamp
|
496 |
timestamp = str(int(time.time()))
|
|
|
|
|
|
|
|
|
|
|
497 |
|
498 |
+
# Handle different input types
|
499 |
+
if isinstance(file_input, str):
|
500 |
+
# If it's already a file path
|
501 |
+
temp_input_path = file_input
|
502 |
+
file_extension = os.path.splitext(temp_input_path)[1].lower()
|
503 |
+
st.info(f"Processing from saved file: {os.path.basename(temp_input_path)}")
|
504 |
+
else:
|
505 |
+
# If it's a StreamlitUploadedFile
|
506 |
+
file_extension = os.path.splitext(file_input.name)[1].lower()
|
507 |
+
|
508 |
+
# Create an uploads directory if it doesn't exist
|
509 |
+
uploads_dir = os.path.join(os.getcwd(), "uploads")
|
510 |
+
os.makedirs(uploads_dir, exist_ok=True)
|
511 |
+
|
512 |
+
# Write the uploaded file to disk with proper extension in the uploads directory
|
513 |
+
temp_input_path = os.path.join(uploads_dir, f"uploaded_audio_{timestamp}{file_extension}")
|
514 |
+
with open(temp_input_path, "wb") as f:
|
515 |
+
f.write(file_input.getbuffer())
|
516 |
# For MP4 files, extract the audio using ffmpeg
|
517 |
if file_extension == ".mp4":
|
518 |
st.info("Extracting audio from video file...")
|
|
|
691 |
# Show explanation in a box
|
692 |
st.markdown("### Expert Analysis")
|
693 |
st.info(results['explanation'])
|
694 |
+
with col2:
|
|
|
695 |
if results['audio_viz']:
|
696 |
+
try:
|
697 |
+
st.pyplot(results['audio_viz'])
|
698 |
+
except Exception as viz_error:
|
699 |
+
st.warning("Could not display visualization due to torchvision issue.")
|
700 |
+
st.info("Audio analysis was successful even though visualization failed.")
|
701 |
|
702 |
# Show audio playback
|
703 |
st.audio(audio_path)
|
|
|
765 |
# Create an uploads directory if it doesn't exist
|
766 |
uploads_dir = os.path.join(os.getcwd(), "uploads")
|
767 |
os.makedirs(uploads_dir, exist_ok=True)
|
768 |
+
# Save the file first to avoid streaming it multiple times
|
|
|
769 |
temp_file_path = os.path.join(uploads_dir, f"temp_{int(time.time())}_{uploaded_file.name}")
|
770 |
with open(temp_file_path, "wb") as f:
|
771 |
f.write(uploaded_file.getbuffer())
|
772 |
|
773 |
progress_bar.progress(50, text="Analyzing audio...")
|
774 |
|
775 |
+
# Process using the saved file path directly
|
776 |
+
results = process_uploaded_audio(temp_file_path)
|
|
|
|
|
|
|
777 |
|
778 |
progress_bar.progress(100, text="Analysis complete!")
|
779 |
# Display results
|
|
|
790 |
# Show explanation in a box
|
791 |
st.markdown("### Expert Analysis")
|
792 |
st.info(results['explanation'])
|
793 |
+
with col2:
|
|
|
794 |
if results['audio_viz']:
|
795 |
+
try:
|
796 |
+
st.pyplot(results['audio_viz'])
|
797 |
+
except Exception as viz_error:
|
798 |
+
st.warning("Could not display visualization due to torchvision issue.")
|
799 |
+
st.info("Audio analysis was successful even though visualization failed.")
|
800 |
+
|
801 |
except subprocess.CalledProcessError as e:
|
802 |
st.error("Error processing audio file")
|
803 |
st.error(f"FFmpeg error: {e.stderr.decode('utf-8') if e.stderr else str(e)}")
|