Spaces:

amirjamali
/

accent-detector

Sleeping

App Files Files Community

amirjamali commited on May 24

Commit

5bacc9d

unverified ·

1 Parent(s): 1b3a125

Refactor Dockerfile for improved dependency management and enhance Streamlit app initialization with better error handling for language identification and accent classification models

Browse files

Files changed (3) hide show

Dockerfile +31 -67
src/__pycache__/streamlit_app.cpython-312.pyc +0 -0
src/streamlit_app.py +7 -7

Dockerfile CHANGED Viewed

@@ -1,83 +1,47 @@
-FROM python:3.9
-WORKDIR /app
-# Set environment variables to avoid permission errors
-ENV HOME=/app \
-    PYTHONUNBUFFERED=1 \
-    PYTHONDONTWRITEBYTECODE=1 \
-    MPLCONFIGDIR=/tmp/matplotlib \
-    TRANSFORMERS_CACHE=/app/.cache/huggingface \
-    XDG_CACHE_HOME=/app/.cache
-# Create non-root user for better security
-RUN groupadd -g 1000 appuser && \
-    useradd -u 1000 -g appuser -s /bin/sh -m appuser
-# Install system dependencies including ffmpeg for audio processing
-# Add retry logic and better mirrors for improved network reliability
-RUN apt-get update --allow-releaseinfo-change || (sleep 2 && apt-get update) && \
-    apt-get install -y \
-    build-essential \
-    curl \
-    software-properties-common \
-    git \
-    ffmpeg \
-    libsndfile1 \
-    ca-certificates \
-    && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
-# Set pip to have more retries and timeout
-ENV PIP_DEFAULT_TIMEOUT=100
-ENV PIP_RETRIES=3
-# Copy requirements and install Python dependencies
-COPY requirements.txt ./
-# First install torch and torchaudio separately for better compatibility
-RUN pip install --upgrade pip && \
-    pip install torch==2.0.1 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cpu
-# Then install the rest of the requirements with retries
-RUN pip install --no-cache-dir -r requirements.txt || \
-    (sleep 2 && pip install --no-cache-dir -r requirements.txt) || \
-    (sleep 5 && pip install --no-cache-dir -r requirements.txt --use-deprecated=legacy-resolver)
-# Install SpeechBrain directly using Git for better compatibility
-RUN pip install git+https://github.com/speechbrain/speechbrain@v0.5.14
 # Copy source code
 COPY src/ ./src/
-# Create directories with proper permissions
-RUN mkdir -p /app/cookies /app/tmp_model /app/.streamlit /app/.cache && \
-    chown -R appuser:appuser /app
-# Switch to non-root user for better security
-USER appuser
-# Create a Streamlit configuration file to avoid permission issues
-RUN mkdir -p /app/.streamlit && \
-    echo '\
-[server]\n\
-port = 8501\n\
-address = "0.0.0.0"\n\
-headless = true\n\
-enableCORS = false\n\
-\n\
-[browser]\n\
-gatherUsageStats = false\n\
-\n\
-[runner]\n\
-fastReruns = true\n\
-' > /app/.streamlit/config.toml
-# Expose the port Streamlit will run on
 EXPOSE 8501
-# Health check to ensure the service is running
 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-# Run the Streamlit app
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py"]

+FROM python:3.9-slim
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 `
+    PYTHONDONTWRITEBYTECODE=1 `
+    MPLCONFIGDIR=/tmp/matplotlib
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && `
+    apt-get install -y --no-install-recommends `
+    build-essential `
+    curl `
+    git `
+    ffmpeg `
+    libsndfile1 `
+    && apt-get clean `
     && rm -rf /var/lib/apt/lists/*
+# Create necessary directories
+RUN mkdir -p /app/tmp_model /tmp/matplotlib
+# Copy requirements first (for better caching)
+COPY requirements.txt .
+# Install Python dependencies with specific order for compatibility
+RUN pip install --no-cache-dir --upgrade pip && `
+    pip install --no-cache-dir torch==2.0.1 torchaudio==2.0.2 && `
+    pip install --no-cache-dir -r requirements.txt && `
+    pip install --no-cache-dir git+https://github.com/speechbrain/speechbrain.git@v0.5.14
 # Copy source code
 COPY src/ ./src/
+# Set up Streamlit configuration
+RUN mkdir -p .streamlit && `
+    echo "[server]`nport = 8501`naddress = \"0.0.0.0\"`nheadless = true`n`n[browser]`ngatherUsageStats = false`n`n[runner]`nfastReruns = true" > ./.streamlit/config.toml
+# Expose port
 EXPOSE 8501
+# Health check
 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
+# Run the app
+ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

src/__pycache__/streamlit_app.cpython-312.pyc ADDED Viewed

Binary file (24.6 kB). View file

src/streamlit_app.py CHANGED Viewed

@@ -139,7 +139,8 @@ def extract_audio(video_path="video.mp4", audio_path="audio.wav"):
 class AccentDetector:
     def __init__(self):
-        # Initialize the language identification model
         try:
             if EncoderClassifier is not None:
                 self.lang_id = EncoderClassifier.from_hparams(
@@ -149,12 +150,10 @@ class AccentDetector:
                 self.have_lang_id = True
             else:
                 st.error("SpeechBrain not available. Language identification disabled.")
-                self.have_lang_id = False
         except Exception as e:
             st.error(f"Error loading language ID model: {str(e)}")
-            self.have_lang_id = False
-          # Initialize the English accent classifier - using VoxLingua107 for now
-        # In production, you'd use a more specialized accent model
         try:
             self.model_name = "speechbrain/lang-id-voxlingua107-ecapa"
@@ -162,10 +161,11 @@ class AccentDetector:
             if HAS_AUTO_PROCESSOR:
                 self.processor = AutoProcessor.from_pretrained(self.model_name)
             else:
-                # Fall back to using feature_extractor directly if AutoProcessor is not available
                 from transformers import AutoFeatureExtractor
                 self.processor = AutoFeatureExtractor.from_pretrained(self.model_name)
-                  self.model = AutoModelForAudioClassification.from_pretrained(self.model_name)
             self.have_accent_model = True
         except Exception as e:
             st.warning(f"Could not load accent model: {str(e)}")

 class AccentDetector:
     def __init__(self):
+        # Initialize language identification model
+        self.have_lang_id = False
         try:
             if EncoderClassifier is not None:
                 self.lang_id = EncoderClassifier.from_hparams(
                 self.have_lang_id = True
             else:
                 st.error("SpeechBrain not available. Language identification disabled.")
         except Exception as e:
             st.error(f"Error loading language ID model: {str(e)}")
+          # Initialize the accent classifier
+        self.have_accent_model = False
         try:
             self.model_name = "speechbrain/lang-id-voxlingua107-ecapa"
             if HAS_AUTO_PROCESSOR:
                 self.processor = AutoProcessor.from_pretrained(self.model_name)
             else:
+                # Fall back to using feature_extractor
                 from transformers import AutoFeatureExtractor
                 self.processor = AutoFeatureExtractor.from_pretrained(self.model_name)
+            self.model = AutoModelForAudioClassification.from_pretrained(self.model_name)
             self.have_accent_model = True
         except Exception as e:
             st.warning(f"Could not load accent model: {str(e)}")