Spaces:

acadiaway
/

gemini_nl2sql

Running

App Files Files Community

acadiaway commited on 15 days ago

Commit

eda83bc

1 Parent(s): 5356085

Switch to HF Inference API approach - eliminate model loading

Browse files

Files changed (3) hide show

Dockerfile +1 -15
pipeline.py +115 -107
requirements.txt +2 -5

Dockerfile CHANGED Viewed

@@ -6,28 +6,14 @@ RUN apt-get update && apt-get install -y \
     build-essential \
     libpq-dev \
     curl \
-    git \
     && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt .
-# Upgrade pip and install dependencies
-RUN pip install --upgrade pip
-RUN pip install --no-cache-dir -r requirements.txt
 COPY app.py pipeline.py db_utils.py ./
-# Set up cache directory with proper permissions
-RUN mkdir -p /tmp/cache/huggingface && \
-    chmod -R 777 /tmp/cache/huggingface
-# Environment variables
-ENV HF_HOME=/tmp/cache/huggingface
-ENV TRANSFORMERS_CACHE=/tmp/cache/huggingface
-ENV HF_DATASETS_CACHE=/tmp/cache/huggingface
 ENV PORT=8501
-ENV OMP_NUM_THREADS=4
-ENV TOKENIZERS_PARALLELISM=false
 EXPOSE 8501

     build-essential \
     libpq-dev \
     curl \
     && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt .
+RUN pip install --upgrade pip && pip install --no-cache-dir -r requirements.txt
 COPY app.py pipeline.py db_utils.py ./
 ENV PORT=8501
 EXPOSE 8501

pipeline.py CHANGED Viewed

@@ -1,132 +1,140 @@
 import os
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 from db_utils import get_schema, execute_sql
-# Initialize model and tokenizer as global variables
-model = None
-tokenizer = None
-def load_model():
-    """Load SQLCoder model with quantization for memory efficiency"""
-    global model, tokenizer
-    if model is not None and tokenizer is not None:
-        return model, tokenizer
     try:
-        # Configure quantization to reduce memory usage
-        quantization_config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_compute_dtype=torch.float16,
-            bnb_4bit_use_double_quant=True,
-            bnb_4bit_quant_type="nf4"
-        )
-        # Load tokenizer
-        tokenizer = AutoTokenizer.from_pretrained(
-            "defog/sqlcoder-7b-2",
-            trust_remote_code=True,
-            cache_dir="/tmp/cache/huggingface"
-        )
-        # Load model with quantization
-        model = AutoModelForCausalLM.from_pretrained(
-            "defog/sqlcoder-7b-2",
-            quantization_config=quantization_config,
-            device_map="auto",
-            trust_remote_code=True,
-            torch_dtype=torch.float16,
-            cache_dir="/tmp/cache/huggingface"
-        )
-        print("SQLCoder model loaded successfully!")
-        return model, tokenizer
-    except Exception as e:
-        print(f"Error loading SQLCoder model: {e}")
-        raise e
-def generate_sql(nl_query, schema):
-    """Generate SQL using SQLCoder with proper prompting"""
-    prompt = f"""### Task
 Generate a PostgreSQL query to answer this question: {nl_query}
 ### Database Schema
-The query will run on a database with the following schema:
 {schema}
 ### Instructions
-- Return only the SQL query, no explanation
-- Use proper PostgreSQL syntax
-- Include appropriate LIMIT clauses if the question asks for a specific number of results
-### SQL Query:
-"""
-    return prompt
-def text_to_sql(nl_query):
-    """Main function to convert natural language to SQL and execute it"""
-    try:
-        # Load model if not already loaded
-        model, tokenizer = load_model()
-        # Get database schema
-        schema = get_schema()
-        # Create the prompt
-        prompt = generate_sql(nl_query, schema)
-        # Tokenize input
-        inputs = tokenizer.encode(prompt, return_tensors="pt")
-        # Move to appropriate device
-        device = next(model.parameters()).device
-        inputs = inputs.to(device)
-        # Generate SQL
-        with torch.no_grad():
-            outputs = model.generate(
-                inputs,
-                max_new_tokens=200,
-                num_beams=4,
-                temperature=0.1,
-                do_sample=False,
-                pad_token_id=tokenizer.eos_token_id,
-                eos_token_id=tokenizer.eos_token_id
-            )
-        # Decode the output
-        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract just the SQL part (after the prompt)
-        sql_start = generated_text.find("### SQL Query:") + len("### SQL Query:")
-        sql = generated_text[sql_start:].strip()
-        # Clean up the SQL (remove any extra text after the query)
-        sql_lines = sql.split('\n')
-        sql = sql_lines[0].strip() if sql_lines else sql.strip()
-        # Remove any trailing semicolon if present and clean
-        sql = sql.rstrip(';').strip()
-        # Basic validation
-        if not sql or not sql.lower().startswith('select'):
-            raise ValueError(f"Generated invalid SQL: {sql}")
         print(f"Generated SQL: {sql}")
-        # Execute the SQL
         results = execute_sql(sql)
         return sql, results
     except Exception as e:
-        print(f"Error in text_to_sql: {e}")
-        return f"Error: {str(e)}", []
-# Initialize model on import (optional - can be lazy loaded)
-try:
-    load_model()
-except Exception as e:
-    print(f"Model will be loaded on first use due to: {e}")

 import os
+import requests
+import time
+import re
 from db_utils import get_schema, execute_sql
+# Hugging Face Inference API endpoint
+API_URL = "https://api-inference.huggingface.co/models/defog/sqlcoder-7b-2"
+def query_huggingface_api(prompt, max_retries=3):
+    """Query the Hugging Face Inference API"""
+    hf_token = os.getenv("HF_TOKEN")
+    if not hf_token:
+        raise ValueError("HF_TOKEN not found in environment variables. Add it to your Space secrets.")
+    headers = {"Authorization": f"Bearer {hf_token}"}
+    payload = {
+        "inputs": prompt,
+        "parameters": {
+            "max_new_tokens": 200,
+            "temperature": 0.1,
+            "do_sample": False,
+            "return_full_text": False
+        }
+    }
+    for attempt in range(max_retries):
+        try:
+            response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
+            if response.status_code == 200:
+                result = response.json()
+                if isinstance(result, list) and len(result) > 0:
+                    return result[0].get("generated_text", "").strip()
+                return str(result).strip()
+            elif response.status_code == 503:
+                wait_time = 20 * (attempt + 1)
+                print(f"Model loading, waiting {wait_time} seconds...")
+                time.sleep(wait_time)
+                continue
+            else:
+                error_msg = f"API Error {response.status_code}: {response.text}"
+                if attempt == max_retries - 1:
+                    raise Exception(error_msg)
+        except requests.exceptions.Timeout:
+            if attempt == max_retries - 1:
+                raise Exception("Request timed out after multiple attempts")
+            time.sleep(5)
+        except Exception as e:
+            if attempt == max_retries - 1:
+                raise e
+            time.sleep(5)
+    raise Exception("Failed to get response after all retries")
+def extract_user_requested_limit(nl_query):
+    """Extract user-requested number from natural language query"""
+    patterns = [
+        r'\b(\d+)\s+(?:ships?|vessels?|boats?|records?|results?|entries?|names?)\b',
+        r'(?:show|list|find|get)\s+(?:me\s+)?(?:the\s+)?(?:top\s+|first\s+)?(\d+)',
+        r'(?:names\s+of\s+)(\d+)\s+',
+        r'\b(\d+)\s+(?:oldest|newest|biggest|smallest|largest)',
+    ]
+    for pattern in patterns:
+        match = re.search(pattern, nl_query, re.IGNORECASE)
+        if match:
+            return int(match.group(1))
+    return None
+def clean_sql_output(sql_text, user_limit=None):
+    """Clean and validate SQL output from the model"""
+    sql_text = sql_text.strip()
+    # Remove markdown formatting
+    if sql_text.startswith("```"):
+        lines = sql_text.split('\n')
+        sql_text = '\n'.join(lines[1:-1]) if len(lines) > 2 else sql_text
+    # Extract SQL
+    lines = sql_text.split('\n')
+    sql = ""
+    for line in lines:
+        line = line.strip()
+        if line and (line.upper().startswith('SELECT') or sql):
+            sql += line + " "
+        if line.endswith(';'):
+            break
+    sql = sql.strip().rstrip(';')
+    # Apply user-requested limit
+    if user_limit:
+        sql = re.sub(r'\s+LIMIT\s+\d+', '', sql, flags=re.IGNORECASE)
+        sql += f" LIMIT {user_limit}"
+    return sql
+def text_to_sql(nl_query):
+    """Convert natural language to SQL using Hugging Face Inference API"""
     try:
+        schema = get_schema()
+        user_limit = extract_user_requested_limit(nl_query)
+        prompt = f"""### Task
 Generate a PostgreSQL query to answer this question: {nl_query}
 ### Database Schema
 {schema}
 ### Instructions
+- Return only the SQL query
+- Use PostgreSQL syntax
+- Be precise with table and column names
+### SQL Query:"""
+        print("Querying Hugging Face Inference API...")
+        generated_sql = query_huggingface_api(prompt)
+        if not generated_sql:
+            raise ValueError("No SQL generated from the model")
+        sql = clean_sql_output(generated_sql, user_limit)
+        if not sql or not sql.upper().startswith('SELECT'):
+            raise ValueError(f"Invalid SQL generated: {sql}")
         print(f"Generated SQL: {sql}")
         results = execute_sql(sql)
         return sql, results
     except Exception as e:
+        error_msg = str(e)
+        print(f"Error in text_to_sql: {error_msg}")
+        return f"Error: {error_msg}", []

requirements.txt CHANGED Viewed

@@ -1,8 +1,5 @@
-transformers==4.45.2
-accelerate==0.34.2
 psycopg2-binary==2.9.10
 sqlalchemy==2.0.43
 python-dotenv==1.1.1
-torch==2.4.1
-streamlit==1.39.0
-bitsandbytes==0.43.3

+requests==2.31.0
 psycopg2-binary==2.9.10
 sqlalchemy==2.0.43
 python-dotenv==1.1.1
+streamlit==1.39.0