amirjamali commited on
Commit
7eff467
·
unverified ·
1 Parent(s): a9b23f3

Add Streamlit configuration and enhance Dockerfile for improved security and environment setup; update requirements for specific package versions

Browse files
Files changed (4) hide show
  1. .streamlit/config.toml +18 -0
  2. Dockerfile +35 -7
  3. requirements.txt +12 -11
  4. src/streamlit_app.py +19 -4
.streamlit/config.toml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [server]
2
+ port = 8501
3
+ address = "0.0.0.0"
4
+ headless = true
5
+ enableCORS = false
6
+
7
+ [browser]
8
+ gatherUsageStats = false
9
+
10
+ [runner]
11
+ fastReruns = true
12
+
13
+ [theme]
14
+ primaryColor = "#2196F3"
15
+ backgroundColor = "#FFFFFF"
16
+ secondaryBackgroundColor = "#F0F2F6"
17
+ textColor = "#262730"
18
+ font = "sans serif"
Dockerfile CHANGED
@@ -2,6 +2,18 @@ FROM python:3.9-slim
2
 
3
  WORKDIR /app
4
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  # Install system dependencies including ffmpeg for audio processing
6
  # Add retry logic and better mirrors for improved network reliability
7
  RUN apt-get update --allow-releaseinfo-change || (sleep 2 && apt-get update) && \
@@ -19,7 +31,6 @@ RUN apt-get update --allow-releaseinfo-change || (sleep 2 && apt-get update) &&
19
  # Set pip to have more retries and timeout
20
  ENV PIP_DEFAULT_TIMEOUT=100
21
  ENV PIP_RETRIES=3
22
- ENV PYTHONUNBUFFERED=1
23
 
24
  # Copy requirements and install Python dependencies
25
  COPY requirements.txt ./
@@ -27,14 +38,31 @@ RUN pip install --upgrade pip && \
27
  pip install --no-cache-dir -r requirements.txt || \
28
  (sleep 2 && pip install --no-cache-dir -r requirements.txt)
29
 
30
- # Create cookies directory for user uploads
31
- RUN mkdir -p /app/cookies
32
-
33
  # Copy source code
34
  COPY src/ ./src/
35
 
36
- # Create directory for temporary model storage
37
- RUN mkdir -p /app/tmp_model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  # Expose the port Streamlit will run on
40
  EXPOSE 8501
@@ -43,4 +71,4 @@ EXPOSE 8501
43
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
44
 
45
  # Run the Streamlit app
46
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
2
 
3
  WORKDIR /app
4
 
5
+ # Set environment variables to avoid permission errors
6
+ ENV HOME=/app \
7
+ PYTHONUNBUFFERED=1 \
8
+ PYTHONDONTWRITEBYTECODE=1 \
9
+ MPLCONFIGDIR=/tmp/matplotlib \
10
+ TRANSFORMERS_CACHE=/app/.cache/huggingface \
11
+ XDG_CACHE_HOME=/app/.cache
12
+
13
+ # Create non-root user for better security
14
+ RUN groupadd -g 1000 appuser && \
15
+ useradd -u 1000 -g appuser -s /bin/sh -m appuser
16
+
17
  # Install system dependencies including ffmpeg for audio processing
18
  # Add retry logic and better mirrors for improved network reliability
19
  RUN apt-get update --allow-releaseinfo-change || (sleep 2 && apt-get update) && \
 
31
  # Set pip to have more retries and timeout
32
  ENV PIP_DEFAULT_TIMEOUT=100
33
  ENV PIP_RETRIES=3
 
34
 
35
  # Copy requirements and install Python dependencies
36
  COPY requirements.txt ./
 
38
  pip install --no-cache-dir -r requirements.txt || \
39
  (sleep 2 && pip install --no-cache-dir -r requirements.txt)
40
 
 
 
 
41
  # Copy source code
42
  COPY src/ ./src/
43
 
44
+ # Create directories with proper permissions
45
+ RUN mkdir -p /app/cookies /app/tmp_model /app/.streamlit /app/.cache && \
46
+ chown -R appuser:appuser /app
47
+
48
+ # Switch to non-root user for better security
49
+ USER appuser
50
+
51
+ # Create a Streamlit configuration file to avoid permission issues
52
+ RUN mkdir -p /app/.streamlit && \
53
+ echo '\
54
+ [server]\n\
55
+ port = 8501\n\
56
+ address = "0.0.0.0"\n\
57
+ headless = true\n\
58
+ enableCORS = false\n\
59
+ \n\
60
+ [browser]\n\
61
+ gatherUsageStats = false\n\
62
+ \n\
63
+ [runner]\n\
64
+ fastReruns = true\n\
65
+ ' > /app/.streamlit/config.toml
66
 
67
  # Expose the port Streamlit will run on
68
  EXPOSE 8501
 
71
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
72
 
73
  # Run the Streamlit app
74
+ ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py"]
requirements.txt CHANGED
@@ -1,11 +1,12 @@
1
- streamlit>=1.25.0
2
- yt_dlp>=2023.7.6
3
- speechbrain>=0.5.15
4
- torch>=2.0.0
5
- torchaudio>=2.0.0
6
- transformers>=4.30.0
7
- librosa>=0.10.0
8
- matplotlib>=3.7.0
9
- scikit-learn>=1.3.0
10
- openai>=1.0.0
11
- python-dotenv>=1.0.0
 
 
1
+ streamlit==1.31.0
2
+ yt_dlp==2023.11.16
3
+ speechbrain==0.5.15
4
+ torch==2.0.1
5
+ torchaudio==2.0.2
6
+ # Pin transformers to version that has AutoProcessor
7
+ transformers==4.31.0
8
+ librosa==0.10.1
9
+ matplotlib==3.7.2
10
+ scikit-learn==1.3.0
11
+ openai==1.0.0
12
+ python-dotenv==1.0.0
src/streamlit_app.py CHANGED
@@ -5,8 +5,16 @@ import subprocess
5
  import librosa
6
  import numpy as np
7
  import torch
 
8
  from speechbrain.inference.classifiers import EncoderClassifier
9
- from transformers import AutoProcessor, AutoModelForAudioClassification
 
 
 
 
 
 
 
10
  from dotenv import load_dotenv
11
  import matplotlib.pyplot as plt
12
  import tempfile
@@ -113,12 +121,19 @@ class AccentDetector:
113
  source="speechbrain/lang-id-commonlanguage_ecapa",
114
  savedir="tmp_model"
115
  )
116
-
117
- # Initialize the English accent classifier - using VoxLingua107 for now
118
  # In production, you'd use a more specialized accent model
119
  try:
120
  self.model_name = "speechbrain/lang-id-voxlingua107-ecapa"
121
- self.processor = AutoProcessor.from_pretrained(self.model_name)
 
 
 
 
 
 
 
 
122
  self.model = AutoModelForAudioClassification.from_pretrained(self.model_name)
123
  self.have_accent_model = True
124
  except Exception as e:
 
5
  import librosa
6
  import numpy as np
7
  import torch
8
+ import sys
9
  from speechbrain.inference.classifiers import EncoderClassifier
10
+ # Handle potential compatibility issues with transformers
11
+ try:
12
+ from transformers import AutoProcessor, AutoModelForAudioClassification
13
+ HAS_AUTO_PROCESSOR = True
14
+ except ImportError:
15
+ from transformers import AutoModelForAudioClassification
16
+ HAS_AUTO_PROCESSOR = False
17
+ st.warning("Using a compatible but limited version of transformers. Some features may be limited.")
18
  from dotenv import load_dotenv
19
  import matplotlib.pyplot as plt
20
  import tempfile
 
121
  source="speechbrain/lang-id-commonlanguage_ecapa",
122
  savedir="tmp_model"
123
  )
124
+ # Initialize the English accent classifier - using VoxLingua107 for now
 
125
  # In production, you'd use a more specialized accent model
126
  try:
127
  self.model_name = "speechbrain/lang-id-voxlingua107-ecapa"
128
+
129
+ # Handle case where AutoProcessor is not available
130
+ if HAS_AUTO_PROCESSOR:
131
+ self.processor = AutoProcessor.from_pretrained(self.model_name)
132
+ else:
133
+ # Fall back to using feature_extractor directly if AutoProcessor is not available
134
+ from transformers import AutoFeatureExtractor
135
+ self.processor = AutoFeatureExtractor.from_pretrained(self.model_name)
136
+
137
  self.model = AutoModelForAudioClassification.from_pretrained(self.model_name)
138
  self.have_accent_model = True
139
  except Exception as e: