acadiaway commited on
Commit
b4bcb5e
·
1 Parent(s): a88c7b9

Simplify Dockerfile, use /tmp/cache/huggingface, preload model in pipeline.py

Browse files
Files changed (3) hide show
  1. Dockerfile +5 -9
  2. pipeline.py +4 -4
  3. requirements.txt +1 -1
Dockerfile CHANGED
@@ -11,21 +11,17 @@ RUN apt-get update && apt-get install -y \
11
 
12
  COPY requirements.txt .
13
 
14
- # Upgrade pip and install vLLM with GPU support first
15
  RUN pip install --upgrade pip
16
- RUN pip install --no-cache-dir vllm==0.10.1 --extra-index-url https://download.pytorch.org/whl/cu128
17
-
18
- # Install remaining dependencies
19
  RUN pip install --no-cache-dir -r requirements.txt
20
 
21
  COPY app.py pipeline.py db_utils.py ./
22
 
23
- # Set up cache directory and preload tokenizer
24
- RUN mkdir -p /app/cache/huggingface && \
25
- chmod -R 777 /app/cache/huggingface && \
26
- python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('Snowflake/Arctic-Text2SQL-R1-7B', cache_dir='/app/cache/huggingface')"
27
 
28
- ENV HF_HOME=/app/cache/huggingface
29
  ENV PORT=8501
30
  ENV OMP_NUM_THREADS=8
31
 
 
11
 
12
  COPY requirements.txt .
13
 
14
+ # Upgrade pip and install dependencies
15
  RUN pip install --upgrade pip
 
 
 
16
  RUN pip install --no-cache-dir -r requirements.txt
17
 
18
  COPY app.py pipeline.py db_utils.py ./
19
 
20
+ # Set up cache directory
21
+ RUN mkdir -p /tmp/cache/huggingface && \
22
+ chmod -R 777 /tmp/cache/huggingface
 
23
 
24
+ ENV HF_HOME=/tmp/cache/huggingface
25
  ENV PORT=8501
26
  ENV OMP_NUM_THREADS=8
27
 
pipeline.py CHANGED
@@ -3,13 +3,14 @@ from transformers import AutoTokenizer
3
  from vllm import LLM, SamplingParams
4
  from db_utils import get_schema, execute_sql
5
 
6
- # Initialize model at startup to avoid lazy loading
7
  model = None
8
  tokenizer = None
9
  try:
10
  tokenizer = AutoTokenizer.from_pretrained(
11
  "Snowflake/Arctic-Text2SQL-R1-7B",
12
- cache_dir="/app/cache/huggingface"
 
13
  )
14
  model = LLM(
15
  model="Snowflake/Arctic-Text2SQL-R1-7B",
@@ -18,8 +19,7 @@ try:
18
  max_model_len=1024,
19
  max_num_seqs=1,
20
  enforce_eager=True,
21
- trust_remote_code=True,
22
- cache_dir="/app/cache/huggingface"
23
  )
24
  except Exception as e:
25
  print(f"Error loading model at startup: {e}")
 
3
  from vllm import LLM, SamplingParams
4
  from db_utils import get_schema, execute_sql
5
 
6
+ # Initialize model at startup
7
  model = None
8
  tokenizer = None
9
  try:
10
  tokenizer = AutoTokenizer.from_pretrained(
11
  "Snowflake/Arctic-Text2SQL-R1-7B",
12
+ cache_dir="/tmp/cache/huggingface",
13
+ trust_remote_code=True
14
  )
15
  model = LLM(
16
  model="Snowflake/Arctic-Text2SQL-R1-7B",
 
19
  max_model_len=1024,
20
  max_num_seqs=1,
21
  enforce_eager=True,
22
+ trust_remote_code=True
 
23
  )
24
  except Exception as e:
25
  print(f"Error loading model at startup: {e}")
requirements.txt CHANGED
@@ -5,4 +5,4 @@ sqlalchemy==2.0.43
5
  python-dotenv==1.1.1
6
  vllm==0.10.1
7
  streamlit==1.39.0
8
- torch==2.7.1
 
5
  python-dotenv==1.1.1
6
  vllm==0.10.1
7
  streamlit==1.39.0
8
+ torch==2.7.1 --extra-index-url https://download.pytorch.org/whl/cu121