Spaces:
Sleeping
Sleeping
Add Streamlit configuration and enhance Dockerfile for improved security and environment setup; update requirements for specific package versions
Browse files- .streamlit/config.toml +18 -0
- Dockerfile +35 -7
- requirements.txt +12 -11
- src/streamlit_app.py +19 -4
.streamlit/config.toml
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[server]
|
2 |
+
port = 8501
|
3 |
+
address = "0.0.0.0"
|
4 |
+
headless = true
|
5 |
+
enableCORS = false
|
6 |
+
|
7 |
+
[browser]
|
8 |
+
gatherUsageStats = false
|
9 |
+
|
10 |
+
[runner]
|
11 |
+
fastReruns = true
|
12 |
+
|
13 |
+
[theme]
|
14 |
+
primaryColor = "#2196F3"
|
15 |
+
backgroundColor = "#FFFFFF"
|
16 |
+
secondaryBackgroundColor = "#F0F2F6"
|
17 |
+
textColor = "#262730"
|
18 |
+
font = "sans serif"
|
Dockerfile
CHANGED
@@ -2,6 +2,18 @@ FROM python:3.9-slim
|
|
2 |
|
3 |
WORKDIR /app
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
# Install system dependencies including ffmpeg for audio processing
|
6 |
# Add retry logic and better mirrors for improved network reliability
|
7 |
RUN apt-get update --allow-releaseinfo-change || (sleep 2 && apt-get update) && \
|
@@ -19,7 +31,6 @@ RUN apt-get update --allow-releaseinfo-change || (sleep 2 && apt-get update) &&
|
|
19 |
# Set pip to have more retries and timeout
|
20 |
ENV PIP_DEFAULT_TIMEOUT=100
|
21 |
ENV PIP_RETRIES=3
|
22 |
-
ENV PYTHONUNBUFFERED=1
|
23 |
|
24 |
# Copy requirements and install Python dependencies
|
25 |
COPY requirements.txt ./
|
@@ -27,14 +38,31 @@ RUN pip install --upgrade pip && \
|
|
27 |
pip install --no-cache-dir -r requirements.txt || \
|
28 |
(sleep 2 && pip install --no-cache-dir -r requirements.txt)
|
29 |
|
30 |
-
# Create cookies directory for user uploads
|
31 |
-
RUN mkdir -p /app/cookies
|
32 |
-
|
33 |
# Copy source code
|
34 |
COPY src/ ./src/
|
35 |
|
36 |
-
# Create
|
37 |
-
RUN mkdir -p /app/tmp_model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
# Expose the port Streamlit will run on
|
40 |
EXPOSE 8501
|
@@ -43,4 +71,4 @@ EXPOSE 8501
|
|
43 |
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
44 |
|
45 |
# Run the Streamlit app
|
46 |
-
ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py"
|
|
|
2 |
|
3 |
WORKDIR /app
|
4 |
|
5 |
+
# Set environment variables to avoid permission errors
|
6 |
+
ENV HOME=/app \
|
7 |
+
PYTHONUNBUFFERED=1 \
|
8 |
+
PYTHONDONTWRITEBYTECODE=1 \
|
9 |
+
MPLCONFIGDIR=/tmp/matplotlib \
|
10 |
+
TRANSFORMERS_CACHE=/app/.cache/huggingface \
|
11 |
+
XDG_CACHE_HOME=/app/.cache
|
12 |
+
|
13 |
+
# Create non-root user for better security
|
14 |
+
RUN groupadd -g 1000 appuser && \
|
15 |
+
useradd -u 1000 -g appuser -s /bin/sh -m appuser
|
16 |
+
|
17 |
# Install system dependencies including ffmpeg for audio processing
|
18 |
# Add retry logic and better mirrors for improved network reliability
|
19 |
RUN apt-get update --allow-releaseinfo-change || (sleep 2 && apt-get update) && \
|
|
|
31 |
# Set pip to have more retries and timeout
|
32 |
ENV PIP_DEFAULT_TIMEOUT=100
|
33 |
ENV PIP_RETRIES=3
|
|
|
34 |
|
35 |
# Copy requirements and install Python dependencies
|
36 |
COPY requirements.txt ./
|
|
|
38 |
pip install --no-cache-dir -r requirements.txt || \
|
39 |
(sleep 2 && pip install --no-cache-dir -r requirements.txt)
|
40 |
|
|
|
|
|
|
|
41 |
# Copy source code
|
42 |
COPY src/ ./src/
|
43 |
|
44 |
+
# Create directories with proper permissions
|
45 |
+
RUN mkdir -p /app/cookies /app/tmp_model /app/.streamlit /app/.cache && \
|
46 |
+
chown -R appuser:appuser /app
|
47 |
+
|
48 |
+
# Switch to non-root user for better security
|
49 |
+
USER appuser
|
50 |
+
|
51 |
+
# Create a Streamlit configuration file to avoid permission issues
|
52 |
+
RUN mkdir -p /app/.streamlit && \
|
53 |
+
echo '\
|
54 |
+
[server]\n\
|
55 |
+
port = 8501\n\
|
56 |
+
address = "0.0.0.0"\n\
|
57 |
+
headless = true\n\
|
58 |
+
enableCORS = false\n\
|
59 |
+
\n\
|
60 |
+
[browser]\n\
|
61 |
+
gatherUsageStats = false\n\
|
62 |
+
\n\
|
63 |
+
[runner]\n\
|
64 |
+
fastReruns = true\n\
|
65 |
+
' > /app/.streamlit/config.toml
|
66 |
|
67 |
# Expose the port Streamlit will run on
|
68 |
EXPOSE 8501
|
|
|
71 |
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
72 |
|
73 |
# Run the Streamlit app
|
74 |
+
ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py"]
|
requirements.txt
CHANGED
@@ -1,11 +1,12 @@
|
|
1 |
-
streamlit
|
2 |
-
yt_dlp
|
3 |
-
speechbrain
|
4 |
-
torch
|
5 |
-
torchaudio
|
6 |
-
transformers
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
1 |
+
streamlit==1.31.0
|
2 |
+
yt_dlp==2023.11.16
|
3 |
+
speechbrain==0.5.15
|
4 |
+
torch==2.0.1
|
5 |
+
torchaudio==2.0.2
|
6 |
+
# Pin transformers to version that has AutoProcessor
|
7 |
+
transformers==4.31.0
|
8 |
+
librosa==0.10.1
|
9 |
+
matplotlib==3.7.2
|
10 |
+
scikit-learn==1.3.0
|
11 |
+
openai==1.0.0
|
12 |
+
python-dotenv==1.0.0
|
src/streamlit_app.py
CHANGED
@@ -5,8 +5,16 @@ import subprocess
|
|
5 |
import librosa
|
6 |
import numpy as np
|
7 |
import torch
|
|
|
8 |
from speechbrain.inference.classifiers import EncoderClassifier
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
from dotenv import load_dotenv
|
11 |
import matplotlib.pyplot as plt
|
12 |
import tempfile
|
@@ -113,12 +121,19 @@ class AccentDetector:
|
|
113 |
source="speechbrain/lang-id-commonlanguage_ecapa",
|
114 |
savedir="tmp_model"
|
115 |
)
|
116 |
-
|
117 |
-
# Initialize the English accent classifier - using VoxLingua107 for now
|
118 |
# In production, you'd use a more specialized accent model
|
119 |
try:
|
120 |
self.model_name = "speechbrain/lang-id-voxlingua107-ecapa"
|
121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
self.model = AutoModelForAudioClassification.from_pretrained(self.model_name)
|
123 |
self.have_accent_model = True
|
124 |
except Exception as e:
|
|
|
5 |
import librosa
|
6 |
import numpy as np
|
7 |
import torch
|
8 |
+
import sys
|
9 |
from speechbrain.inference.classifiers import EncoderClassifier
|
10 |
+
# Handle potential compatibility issues with transformers
|
11 |
+
try:
|
12 |
+
from transformers import AutoProcessor, AutoModelForAudioClassification
|
13 |
+
HAS_AUTO_PROCESSOR = True
|
14 |
+
except ImportError:
|
15 |
+
from transformers import AutoModelForAudioClassification
|
16 |
+
HAS_AUTO_PROCESSOR = False
|
17 |
+
st.warning("Using a compatible but limited version of transformers. Some features may be limited.")
|
18 |
from dotenv import load_dotenv
|
19 |
import matplotlib.pyplot as plt
|
20 |
import tempfile
|
|
|
121 |
source="speechbrain/lang-id-commonlanguage_ecapa",
|
122 |
savedir="tmp_model"
|
123 |
)
|
124 |
+
# Initialize the English accent classifier - using VoxLingua107 for now
|
|
|
125 |
# In production, you'd use a more specialized accent model
|
126 |
try:
|
127 |
self.model_name = "speechbrain/lang-id-voxlingua107-ecapa"
|
128 |
+
|
129 |
+
# Handle case where AutoProcessor is not available
|
130 |
+
if HAS_AUTO_PROCESSOR:
|
131 |
+
self.processor = AutoProcessor.from_pretrained(self.model_name)
|
132 |
+
else:
|
133 |
+
# Fall back to using feature_extractor directly if AutoProcessor is not available
|
134 |
+
from transformers import AutoFeatureExtractor
|
135 |
+
self.processor = AutoFeatureExtractor.from_pretrained(self.model_name)
|
136 |
+
|
137 |
self.model = AutoModelForAudioClassification.from_pretrained(self.model_name)
|
138 |
self.have_accent_model = True
|
139 |
except Exception as e:
|