Spaces:
Sleeping
Sleeping
import os | |
import re | |
import json | |
import google.generativeai as genai | |
# Configure the Gemini API | |
genai.configure(api_key=os.environ.get("GEMINI_API_KEY", "")) | |
model = genai.GenerativeModel("models/gemini-2.0-flash") | |
def rerank(query, candidates): | |
""" | |
Rerank the candidate assessments using Gemini. | |
Args: | |
query: The job description | |
candidates: List of assessment dictionaries | |
Returns: | |
Dictionary containing the recommended assessments | |
""" | |
# Ensure we have candidates | |
if not candidates: | |
return {"error": "No candidate assessments to rerank"} | |
# Print debugging info | |
print(f"Reranking {len(candidates)} candidates") | |
print(f"Sample candidate: {json.dumps(candidates[0], indent=2)}") | |
# Clean up candidates data for API | |
cleaned_candidates = [] | |
for candidate in candidates: | |
# Create a clean copy | |
clean_candidate = {} | |
# Copy required fields | |
for field in ["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]: | |
if field in candidate: | |
# Special handling for test_type | |
if field == "test_type" and isinstance(candidate[field], list): | |
clean_candidate[field] = candidate[field] | |
else: | |
clean_candidate[field] = candidate[field] | |
else: | |
# Default values for missing fields | |
if field == "test_type": | |
clean_candidate[field] = ["Unknown"] | |
elif field == "duration": | |
clean_candidate[field] = None | |
else: | |
clean_candidate[field] = "Unknown" | |
cleaned_candidates.append(clean_candidate) | |
# Create the prompt for Gemini | |
prompt = f""" | |
Given a job description, rank the most relevant SHL assessments based on how well they match the job requirements. | |
Job description: "{query}" | |
Candidate SHL assessments: {json.dumps(cleaned_candidates, indent=2)} | |
Rank the most relevant assessments and return a JSON list in this format: | |
{{ | |
"recommended_assessments": [ | |
{{ | |
"url": "...", | |
"adaptive_support": "Yes/No", | |
"remote_support": "Yes/No", | |
"description": "...", | |
"duration": integer or null, | |
"test_type": ["type1", "type2", ...] | |
}} | |
] | |
}} | |
CRITICAL INSTRUCTIONS: | |
1. Return ONLY valid JSON without any markdown code blocks or extra text | |
2. Preserve the exact URL values from the input - do not modify them | |
3. Include all fields from the original assessment data | |
4. Limit to the top 10 most relevant assessments | |
5. Ensure the JSON is properly formatted with all fields | |
6. Keep all test_type values as arrays/lists, even if there's only one type | |
""" | |
# Generate response | |
try: | |
response = model.generate_content(prompt) | |
response_text = response.text | |
# Try to extract JSON from possible markdown code blocks | |
json_match = re.search(r'```(?:json)?\s*(.*?)```', response_text, re.DOTALL) | |
if json_match: | |
response_text = json_match.group(1).strip() | |
# Parse the JSON | |
result = json.loads(response_text) | |
# Validate the response structure | |
if "recommended_assessments" not in result: | |
return {"error": "Invalid response format: missing recommended_assessments key"} | |
# Ensure each assessment has the required fields | |
for assessment in result["recommended_assessments"]: | |
if "url" not in assessment: | |
assessment["url"] = "https://www.shl.com/missing-url" | |
if "test_type" not in assessment: | |
assessment["test_type"] = ["Unknown"] | |
if not isinstance(assessment["test_type"], list): | |
assessment["test_type"] = [assessment["test_type"]] | |
return result | |
except Exception as e: | |
error_msg = f"Error in reranking: {str(e)}" | |
print(error_msg) | |
return {"error": error_msg} |