Spaces:
Running
Running
Simplify Dockerfile, use /tmp/cache/huggingface, preload model in pipeline.py
Browse files- Dockerfile +5 -9
- pipeline.py +4 -4
- requirements.txt +1 -1
Dockerfile
CHANGED
@@ -11,21 +11,17 @@ RUN apt-get update && apt-get install -y \
|
|
11 |
|
12 |
COPY requirements.txt .
|
13 |
|
14 |
-
# Upgrade pip and install
|
15 |
RUN pip install --upgrade pip
|
16 |
-
RUN pip install --no-cache-dir vllm==0.10.1 --extra-index-url https://download.pytorch.org/whl/cu128
|
17 |
-
|
18 |
-
# Install remaining dependencies
|
19 |
RUN pip install --no-cache-dir -r requirements.txt
|
20 |
|
21 |
COPY app.py pipeline.py db_utils.py ./
|
22 |
|
23 |
-
# Set up cache directory
|
24 |
-
RUN mkdir -p /
|
25 |
-
chmod -R 777 /
|
26 |
-
python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('Snowflake/Arctic-Text2SQL-R1-7B', cache_dir='/app/cache/huggingface')"
|
27 |
|
28 |
-
ENV HF_HOME=/
|
29 |
ENV PORT=8501
|
30 |
ENV OMP_NUM_THREADS=8
|
31 |
|
|
|
11 |
|
12 |
COPY requirements.txt .
|
13 |
|
14 |
+
# Upgrade pip and install dependencies
|
15 |
RUN pip install --upgrade pip
|
|
|
|
|
|
|
16 |
RUN pip install --no-cache-dir -r requirements.txt
|
17 |
|
18 |
COPY app.py pipeline.py db_utils.py ./
|
19 |
|
20 |
+
# Set up cache directory
|
21 |
+
RUN mkdir -p /tmp/cache/huggingface && \
|
22 |
+
chmod -R 777 /tmp/cache/huggingface
|
|
|
23 |
|
24 |
+
ENV HF_HOME=/tmp/cache/huggingface
|
25 |
ENV PORT=8501
|
26 |
ENV OMP_NUM_THREADS=8
|
27 |
|
pipeline.py
CHANGED
@@ -3,13 +3,14 @@ from transformers import AutoTokenizer
|
|
3 |
from vllm import LLM, SamplingParams
|
4 |
from db_utils import get_schema, execute_sql
|
5 |
|
6 |
-
# Initialize model at startup
|
7 |
model = None
|
8 |
tokenizer = None
|
9 |
try:
|
10 |
tokenizer = AutoTokenizer.from_pretrained(
|
11 |
"Snowflake/Arctic-Text2SQL-R1-7B",
|
12 |
-
cache_dir="/
|
|
|
13 |
)
|
14 |
model = LLM(
|
15 |
model="Snowflake/Arctic-Text2SQL-R1-7B",
|
@@ -18,8 +19,7 @@ try:
|
|
18 |
max_model_len=1024,
|
19 |
max_num_seqs=1,
|
20 |
enforce_eager=True,
|
21 |
-
trust_remote_code=True
|
22 |
-
cache_dir="/app/cache/huggingface"
|
23 |
)
|
24 |
except Exception as e:
|
25 |
print(f"Error loading model at startup: {e}")
|
|
|
3 |
from vllm import LLM, SamplingParams
|
4 |
from db_utils import get_schema, execute_sql
|
5 |
|
6 |
+
# Initialize model at startup
|
7 |
model = None
|
8 |
tokenizer = None
|
9 |
try:
|
10 |
tokenizer = AutoTokenizer.from_pretrained(
|
11 |
"Snowflake/Arctic-Text2SQL-R1-7B",
|
12 |
+
cache_dir="/tmp/cache/huggingface",
|
13 |
+
trust_remote_code=True
|
14 |
)
|
15 |
model = LLM(
|
16 |
model="Snowflake/Arctic-Text2SQL-R1-7B",
|
|
|
19 |
max_model_len=1024,
|
20 |
max_num_seqs=1,
|
21 |
enforce_eager=True,
|
22 |
+
trust_remote_code=True
|
|
|
23 |
)
|
24 |
except Exception as e:
|
25 |
print(f"Error loading model at startup: {e}")
|
requirements.txt
CHANGED
@@ -5,4 +5,4 @@ sqlalchemy==2.0.43
|
|
5 |
python-dotenv==1.1.1
|
6 |
vllm==0.10.1
|
7 |
streamlit==1.39.0
|
8 |
-
torch==2.7.1
|
|
|
5 |
python-dotenv==1.1.1
|
6 |
vllm==0.10.1
|
7 |
streamlit==1.39.0
|
8 |
+
torch==2.7.1 --extra-index-url https://download.pytorch.org/whl/cu121
|