endpointwebappshl / reranker.py
AnshulS's picture
Update reranker.py
26b070d verified
raw
history blame
2.86 kB
import os
import re
import google.generativeai as genai
import json
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
model = genai.GenerativeModel("models/gemini-2.0-flash")
def rerank(query, candidates):
# Add debug print
print(f"Reranking {len(candidates)} candidates")
if candidates:
print(f"First candidate keys: {candidates[0].keys()}")
# Ensure candidates have all required fields
for candidate in candidates:
for field in ["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]:
if field not in candidate or candidate[field] is None:
candidate[field] = "N/A" if field != "test_type" else []
prompt = f"""
Job description: "{query}"
Candidate SHL assessments: {json.dumps(candidates, indent=2)}
Rank the most relevant assessments for this job description and return a JSON object with the top 10 (or fewer if there aren't 10 good matches).
Your response must be ONLY valid JSON in this exact format:
{{
"recommended_assessments": [
{{
"assessment_name": "Name of the assessment",
"url": "https://www.shl.com/...",
"remote_support": "Yes/No",
"adaptive_support": "Yes/No",
"duration": "duration value",
"test_type": "test type value"
}},
...more assessments...
]
}}
IMPORTANT RULES:
1. Return ONLY valid JSON without any markdown, explanations, or code blocks
2. Copy the URL exactly as provided in the input - do not modify URLs
3. Include at most 10 assessments, ranked by relevance to the job description
4. If a field is missing in the input, use "N/A" as the value
5. Make sure each assessment has all required fields
"""
response = model.generate_content(prompt)
response_text = response.text
# Try to extract JSON from possible markdown code blocks
json_match = re.search(r'``````', response_text, re.DOTALL)
if json_match:
response_text = json_match.group(1).strip()
try:
result = json.loads(response_text)
# Validate the structure
if "recommended_assessments" not in result:
result = {"recommended_assessments": []}
# Ensure all required fields exist in each assessment
for assessment in result["recommended_assessments"]:
for field in ["assessment_name", "url", "remote_support", "adaptive_support", "duration", "test_type"]:
if field not in assessment:
assessment[field] = "N/A"
return result
except Exception as e:
print(f"Error parsing JSON: {e}")
print(f"Raw response: {response_text}")
return {"error": str(e), "recommended_assessments": []}