Spaces:
Sleeping
Sleeping
File size: 4,241 Bytes
c0029e4 3120871 c0029e4 9d9d3fa c0029e4 9d9d3fa 06b8d86 c0029e4 9d9d3fa 9967a24 9d9d3fa 9967a24 9d9d3fa 9967a24 9d9d3fa c0029e4 3120871 9967a24 3120871 9967a24 c0029e4 9d9d3fa c0029e4 3120871 9d9d3fa 409518b 9d9d3fa 9967a24 c0029e4 3120871 9d9d3fa c0029e4 9d9d3fa c0029e4 9d9d3fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import os
import re
import json
import google.generativeai as genai
# Configure the Gemini API
genai.configure(api_key=os.environ.get("GEMINI_API_KEY", ""))
model = genai.GenerativeModel("models/gemini-2.0-flash")
def rerank(query, candidates):
"""
Rerank the candidate assessments using Gemini.
Args:
query: The job description
candidates: List of assessment dictionaries
Returns:
Dictionary containing the recommended assessments
"""
# Ensure we have candidates
if not candidates:
return {"error": "No candidate assessments to rerank"}
# Print debugging info
print(f"Reranking {len(candidates)} candidates")
print(f"Sample candidate: {json.dumps(candidates[0], indent=2)}")
# Clean up candidates data for API
cleaned_candidates = []
for candidate in candidates:
# Create a clean copy
clean_candidate = {}
# Copy required fields
for field in ["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]:
if field in candidate:
# Special handling for test_type
if field == "test_type" and isinstance(candidate[field], list):
clean_candidate[field] = candidate[field]
else:
clean_candidate[field] = candidate[field]
else:
# Default values for missing fields
if field == "test_type":
clean_candidate[field] = ["Unknown"]
elif field == "duration":
clean_candidate[field] = None
else:
clean_candidate[field] = "Unknown"
cleaned_candidates.append(clean_candidate)
# Create the prompt for Gemini
prompt = f"""
Given a job description, rank the most relevant SHL assessments based on how well they match the job requirements.
Job description: "{query}"
Candidate SHL assessments: {json.dumps(cleaned_candidates, indent=2)}
Rank the most relevant assessments and return a JSON list in this format:
{{
"recommended_assessments": [
{{
"url": "...",
"adaptive_support": "Yes/No",
"remote_support": "Yes/No",
"description": "...",
"duration": integer or null,
"test_type": ["type1", "type2", ...]
}}
]
}}
CRITICAL INSTRUCTIONS:
1. Return ONLY valid JSON without any markdown code blocks or extra text
2. Preserve the exact URL values from the input - do not modify them
3. Include all fields from the original assessment data
4. Limit to the top 10 most relevant assessments
5. Ensure the JSON is properly formatted with all fields
6. Keep all test_type values as arrays/lists, even if there's only one type
"""
# Generate response
try:
response = model.generate_content(prompt)
response_text = response.text
# Try to extract JSON from possible markdown code blocks
json_match = re.search(r'```(?:json)?\s*(.*?)```', response_text, re.DOTALL)
if json_match:
response_text = json_match.group(1).strip()
# Parse the JSON
result = json.loads(response_text)
# Validate the response structure
if "recommended_assessments" not in result:
return {"error": "Invalid response format: missing recommended_assessments key"}
# Ensure each assessment has the required fields
for assessment in result["recommended_assessments"]:
if "url" not in assessment:
assessment["url"] = "https://www.shl.com/missing-url"
if "test_type" not in assessment:
assessment["test_type"] = ["Unknown"]
if not isinstance(assessment["test_type"], list):
assessment["test_type"] = [assessment["test_type"]]
return result
except Exception as e:
error_msg = f"Error in reranking: {str(e)}"
print(error_msg)
return {"error": error_msg} |