Spaces:

acadiaway
/

gemini_nl2sql

Running

acadiaway commited on 22 days ago

Commit

f6799fb

1 Parent(s): d94839d

Dockerfile pipeline.py requirements.txt optized parallelism

Files changed (3) hide show

Dockerfile CHANGED Viewed

@@ -22,6 +22,7 @@ COPY app.py pipeline.py db_utils.py ./
 ENV HF_HOME=/cache/huggingface
 ENV PORT=8501
 EXPOSE 8501

 ENV HF_HOME=/cache/huggingface
 ENV PORT=8501
+ENV OMP_NUM_THREADS=8  # Set to match 8vCPUs
 EXPOSE 8501

pipeline.py CHANGED Viewed

@@ -15,12 +15,15 @@ def init_model():
             model = LLM(
                 model="Snowflake/Arctic-Text2SQL-R1-7B",
                 dtype="float16",
-                gpu_memory_utilization=0.9,
-                max_model_len=4096
             )
         except Exception as e:
             print(f"Error loading model: {e}")
-            exit(1)
 def text_to_sql(nl_query):
     try:
@@ -35,7 +38,11 @@ Generate a SQL query to answer the following natural language question: {nl_quer
 ### Response Format
 Output only the SQL query.
 """
-        sampling_params = SamplingParams(temperature=0, max_tokens=512)
         outputs = model.generate([prompt], sampling_params)
         sql = outputs[0].outputs[0].text.strip()
         results = execute_sql(sql)

             model = LLM(
                 model="Snowflake/Arctic-Text2SQL-R1-7B",
                 dtype="float16",
+                gpu_memory_utilization=0.75,  # Balanced for 30GB VRAM
+                max_model_len=1024,          # Reduced for speed
+                max_num_seqs=1,              # Single query
+                enforce_eager=True,          # Avoid graph compilation
+                trust_remote_code=True       # Model compatibility
             )
         except Exception as e:
             print(f"Error loading model: {e}")
+            raise
 def text_to_sql(nl_query):
     try:
 ### Response Format
 Output only the SQL query.
 """
+        sampling_params = SamplingParams(
+            temperature=0,     # Deterministic
+            max_tokens=128,    # Short queries
+            stop=["\n\n"]      # Stop at query end
+        )
         outputs = model.generate([prompt], sampling_params)
         sql = outputs[0].outputs[0].text.strip()
         results = execute_sql(sql)

requirements.txt CHANGED Viewed

@@ -4,4 +4,5 @@ psycopg2-binary==2.9.10
 sqlalchemy==2.0.43
 python-dotenv==1.1.1
 vllm==0.10.1
-streamlit==1.39.0

 sqlalchemy==2.0.43
 python-dotenv==1.1.1
 vllm==0.10.1
+streamlit==1.39.0
+torch==2.8.0