Spaces:

acadiaway
/

gemini_nl2sql

Running

App Files Files Community

acadiaway commited on 21 days ago

Commit

2741cd0

1 Parent(s): 3893c8b

Preload model in Dockerfile and pipeline.py to fix PermissionError

Browse files

Files changed (2) hide show

Dockerfile +6 -5
pipeline.py +22 -23

Dockerfile CHANGED Viewed

@@ -20,12 +20,13 @@ RUN pip install --no-cache-dir -r requirements.txt
 COPY app.py pipeline.py db_utils.py ./
-# Set up cache directory permissions and clear stale locks
-RUN mkdir -p /cache/huggingface && \
-    chmod -R 777 /cache/huggingface && \
-    rm -f /cache/huggingface/*.lock /cache/huggingface/*/*.lock
-ENV HF_HOME=/cache/huggingface
 ENV PORT=8501
 ENV OMP_NUM_THREADS=8

 COPY app.py pipeline.py db_utils.py ./
+# Set up cache directory and preload model
+RUN mkdir -p /app/cache/huggingface && \
+    chmod -R 777 /app/cache/huggingface && \
+    python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('Snowflake/Arctic-Text2SQL-R1-7B', cache_dir='/app/cache/huggingface')" && \
+    python -c "from vllm import LLM; LLM(model='Snowflake/Arctic-Text2SQL-R1-7B', dtype='float16', gpu_memory_utilization=0.75, max_model_len=1024, max_num_seqs=1, enforce_eager=True, trust_remote_code=True, cache_dir='/app/cache/huggingface')"
+ENV HF_HOME=/app/cache/huggingface
 ENV PORT=8501
 ENV OMP_NUM_THREADS=8

pipeline.py CHANGED Viewed

@@ -3,31 +3,30 @@ from transformers import AutoTokenizer
 from vllm import LLM, SamplingParams
 from db_utils import get_schema, execute_sql
-# Initialize model lazily
 model = None
 tokenizer = None
-def init_model():
-    global model, tokenizer
-    if model is None or tokenizer is None:
-        try:
-            tokenizer = AutoTokenizer.from_pretrained("Snowflake/Arctic-Text2SQL-R1-7B")
-            model = LLM(
-                model="Snowflake/Arctic-Text2SQL-R1-7B",
-                dtype="float16",
-                gpu_memory_utilization=0.75,  # Balanced for 30GB VRAM
-                max_model_len=1024,          # Reduced for speed
-                max_num_seqs=1,              # Single query
-                enforce_eager=True,          # Avoid graph compilation
-                trust_remote_code=True       # Model compatibility
-            )
-        except Exception as e:
-            print(f"Error loading model: {e}")
-            raise
 def text_to_sql(nl_query):
     try:
-        init_model()
         schema = get_schema()
         prompt = f"""### Task
 Generate a SQL query to answer the following natural language question: {nl_query}
@@ -39,9 +38,9 @@ Generate a SQL query to answer the following natural language question: {nl_quer
 Output only the SQL query.
 """
         sampling_params = SamplingParams(
-            temperature=0,     # Deterministic
-            max_tokens=128,    # Short queries
-            stop=["\n\n"]      # Stop at query end
         )
         outputs = model.generate([prompt], sampling_params)
         sql = outputs[0].outputs[0].text.strip()

 from vllm import LLM, SamplingParams
 from db_utils import get_schema, execute_sql
+# Initialize model at startup to avoid lazy loading
 model = None
 tokenizer = None
+try:
+    tokenizer = AutoTokenizer.from_pretrained(
+        "Snowflake/Arctic-Text2SQL-R1-7B",
+        cache_dir="/app/cache/huggingface"
+    )
+    model = LLM(
+        model="Snowflake/Arctic-Text2SQL-R1-7B",
+        dtype="float16",
+        gpu_memory_utilization=0.75,
+        max_model_len=1024,
+        max_num_seqs=1,
+        enforce_eager=True,
+        trust_remote_code=True,
+        cache_dir="/app/cache/huggingface"
+    )
+except Exception as e:
+    print(f"Error loading model at startup: {e}")
+    raise
 def text_to_sql(nl_query):
     try:
         schema = get_schema()
         prompt = f"""### Task
 Generate a SQL query to answer the following natural language question: {nl_query}
 Output only the SQL query.
 """
         sampling_params = SamplingParams(
+            temperature=0,
+            max_tokens=128,
+            stop=["\n\n"]
         )
         outputs = model.generate([prompt], sampling_params)
         sql = outputs[0].outputs[0].text.strip()