Spaces:

acadiaway
/

gemini_nl2sql

Running

App Files Files Community

acadiaway commited on 15 days ago

Commit

d339486

1 Parent(s): 50136a9

pipeline with the bug messages for Google API -v3 syntax

Browse files

Files changed (1) hide show

pipeline.py +123 -2

pipeline.py CHANGED Viewed

@@ -14,7 +14,7 @@ def query_gemini_api(prompt, max_retries=3):
     # Gemini API endpoint
     url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key={api_key}"
-    print(f=== DEBUG: API URL: {url[:50]}...")  # Partial URL for debug
     headers = {
         "Content-Type": "application/json"
@@ -67,7 +67,7 @@ def query_gemini_api(prompt, max_retries=3):
                 if attempt == max_retries - 1:
                     raise Exception(error_msg)
-        except requests.exceptions.Timeout:  # Fixed: Added missing colon
             print(f"=== DEBUG: Timeout on attempt {attempt + 1}")
             if attempt == max_retries - 1:
                 raise Exception("Request timed out after multiple attempts")
@@ -80,3 +80,124 @@ def query_gemini_api(prompt, max_retries=3):
             time.sleep(5)
     raise Exception("Failed to get response after all retries")

     # Gemini API endpoint
     url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key={api_key}"
+    print(f"=== DEBUG: API URL: {url[:50]}...")  # Fixed: Proper f-string syntax
     headers = {
         "Content-Type": "application/json"
                 if attempt == max_retries - 1:
                     raise Exception(error_msg)
+        except requests.exceptions.Timeout:  # Previous fix retained
             print(f"=== DEBUG: Timeout on attempt {attempt + 1}")
             if attempt == max_retries - 1:
                 raise Exception("Request timed out after multiple attempts")
             time.sleep(5)
     raise Exception("Failed to get response after all retries")
+def extract_user_requested_limit(nl_query):
+    """Extract user-requested number from natural language query"""
+    patterns = [
+        r'\b(\d+)\s+(?:ships?|vessels?|boats?|records?|results?|entries?|names?)\b',
+        r'(?:show|list|find|get)\s+(?:me\s+)?(?:the\s+)?(?:top\s+|first\s+)?(\d+)',
+        r'(?:names\s+of\s+)(\d+)\s+',
+        r'\b(\d+)\s+(?:oldest|newest|biggest|smallest|largest)',
+    ]
+    for pattern in patterns:
+        match = re.search(pattern, nl_query, re.IGNORECASE)
+        if match:
+            return int(match.group(1))
+    return None
+def clean_sql_output(sql_text, user_limit=None):
+    """Clean and validate SQL output from the model"""
+    sql_text = sql_text.strip()
+    # Remove markdown formatting
+    if sql_text.startswith("```"):
+        lines = sql_text.split('\n')
+        # Find SQL content between backticks
+        sql_lines = []
+        in_sql = False
+        for line in lines:
+            if line.strip().startswith("```"):
+                in_sql = not in_sql
+                continue
+            if in_sql:
+                sql_lines.append(line)
+        sql_text = '\n'.join(sql_lines)
+    # Handle multiple lines - extract the main SELECT query
+    lines = sql_text.split('\n')
+    sql = ""
+    for line in lines:
+        line = line.strip()
+        if line and (line.upper().startswith('SELECT') or sql):
+            sql += line + " "
+            if line.endswith(';'):
+                break
+    if not sql:
+        # If no SELECT found, take the first non-empty line that looks like SQL
+        for line in lines:
+            line = line.strip()
+            if line and any(keyword in line.upper() for keyword in ['SELECT', 'WITH', 'FROM']):
+                sql = line
+                break
+    sql = sql.strip().rstrip(';')
+    # Apply user-requested limit
+    if user_limit:
+        sql = re.sub(r'\s+LIMIT\s+\d+', '', sql, flags=re.IGNORECASE)
+        sql += f" LIMIT {user_limit}"
+    return sql
+def text_to_sql(nl_query):
+    """Convert natural language to SQL using Google Gemini"""
+    try:
+        print(f"=== DEBUG: Starting text_to_sql with query: {nl_query}")
+        # Get database schema
+        try:
+            schema = get_schema()
+            print(f"=== DEBUG: Schema retrieved, length: {len(schema)}")
+        except Exception as e:
+            print(f"=== DEBUG: Schema error: {e}")
+            return f"Error: Database schema access failed: {str(e)}", []
+        # Extract user limit
+        user_limit = extract_user_requested_limit(nl_query)
+        print(f"=== DEBUG: Extracted user limit: {user_limit}")
+        # Create optimized prompt for Gemini
+        prompt = f"""You are an expert PostgreSQL developer. Convert this natural language question to a precise SQL query.
+Question: {nl_query}
+Database Schema:
+{schema[:1500]}
+Requirements:
+- Generate ONLY the SQL query, no explanation
+- Use PostgreSQL syntax
+- Be precise with table and column names from the schema
+- Return a single SELECT statement
+SQL Query:"""
+        print(f"=== DEBUG: Calling Google Gemini API...")
+        generated_sql = query_gemini_api(prompt)
+        print(f"=== DEBUG: Generated SQL raw: {generated_sql}")
+        if not generated_sql or "No valid response" in generated_sql:
+            return "Error: No SQL generated from Gemini", []
+        # Clean the SQL output
+        sql = clean_sql_output(generated_sql, user_limit)
+        print(f"=== DEBUG: Final cleaned SQL: {sql}")
+        if not sql or not sql.upper().strip().startswith('SELECT'):
+            return f"Error: Invalid SQL generated: {sql}", []
+        # Execute SQL
+        print(f"=== DEBUG: Executing SQL...")
+        try:
+            results = execute_sql(sql)
+            print(f"=== DEBUG: SQL executed successfully, {len(results)} results")
+            return sql, results
+        except Exception as e:
+            print(f"=== DEBUG: SQL execution error: {e}")
+            return f"Error: SQL execution failed: {str(e)}", []
+    except Exception as e:
+        print(f"=== DEBUG: General error in text_to_sql: {e}")
+        return f"Error: {str(e)}", []