Spaces:

AnshulS
/

endpointwebappshl

Sleeping

App Files Files Community

AnshulS commited on May 9

Commit

26b070d

verified ·

1 Parent(s): e5766c5

Update reranker.py

Browse files

Files changed (1) hide show

reranker.py +45 -16

reranker.py CHANGED Viewed

@@ -7,41 +7,70 @@ genai.configure(api_key=os.environ["GEMINI_API_KEY"])
 model = genai.GenerativeModel("models/gemini-2.0-flash")
 def rerank(query, candidates):
     prompt = f"""
     Job description: "{query}"
     Candidate SHL assessments: {json.dumps(candidates, indent=2)}
-    Rank the most relevant assessments and return a JSON list in this format:
     {{
       "recommended_assessments": [
         {{
-          "url": ...,
-          "adaptive_support": ...,
-          "remote_support": ...,
-          "description": ...,
-          "duration": ...,
-          "test_type": [...]
-        }}
       ]
     }}
-    Important instructions:
-    1. Return ONLY valid JSON without any markdown code blocks or extra text
-    2. The url field MUST be copied exactly as provided in the candidates, do not modify it
-    3. Make sure to include the full URL value as provided in the input candidates
-    4. Preserve all original data fields and their exact values (especially URLs)
     """
     response = model.generate_content(prompt)
     response_text = response.text
     # Try to extract JSON from possible markdown code blocks
-    json_match = re.search(r'```(?:json)?\s*(.*?)```', response_text, re.DOTALL)
     if json_match:
         response_text = json_match.group(1).strip()
     try:
-        return json.loads(response_text)
     except Exception as e:
-        return {"error": str(e), "raw_response": response_text}

 model = genai.GenerativeModel("models/gemini-2.0-flash")
 def rerank(query, candidates):
+    # Add debug print
+    print(f"Reranking {len(candidates)} candidates")
+    if candidates:
+        print(f"First candidate keys: {candidates[0].keys()}")
+    # Ensure candidates have all required fields
+    for candidate in candidates:
+        for field in ["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]:
+            if field not in candidate or candidate[field] is None:
+                candidate[field] = "N/A" if field != "test_type" else []
     prompt = f"""
     Job description: "{query}"
     Candidate SHL assessments: {json.dumps(candidates, indent=2)}
+    Rank the most relevant assessments for this job description and return a JSON object with the top 10 (or fewer if there aren't 10 good matches).
+    Your response must be ONLY valid JSON in this exact format:
     {{
       "recommended_assessments": [
         {{
+          "assessment_name": "Name of the assessment",
+          "url": "https://www.shl.com/...",
+          "remote_support": "Yes/No",
+          "adaptive_support": "Yes/No",
+          "duration": "duration value",
+          "test_type": "test type value"
+        }},
+        ...more assessments...
       ]
     }}
+    IMPORTANT RULES:
+    1. Return ONLY valid JSON without any markdown, explanations, or code blocks
+    2. Copy the URL exactly as provided in the input - do not modify URLs
+    3. Include at most 10 assessments, ranked by relevance to the job description
+    4. If a field is missing in the input, use "N/A" as the value
+    5. Make sure each assessment has all required fields
     """
     response = model.generate_content(prompt)
     response_text = response.text
     # Try to extract JSON from possible markdown code blocks
+    json_match = re.search(r'``````', response_text, re.DOTALL)
     if json_match:
         response_text = json_match.group(1).strip()
     try:
+        result = json.loads(response_text)
+        # Validate the structure
+        if "recommended_assessments" not in result:
+            result = {"recommended_assessments": []}
+        # Ensure all required fields exist in each assessment
+        for assessment in result["recommended_assessments"]:
+            for field in ["assessment_name", "url", "remote_support", "adaptive_support", "duration", "test_type"]:
+                if field not in assessment:
+                    assessment[field] = "N/A"
+        return result
     except Exception as e:
+        print(f"Error parsing JSON: {e}")
+        print(f"Raw response: {response_text}")
+        return {"error": str(e), "recommended_assessments": []}