Spaces:
Sleeping
Sleeping
File size: 7,246 Bytes
c0029e4 3120871 c0029e4 9d9d3fa c0029e4 9d9d3fa 06b8d86 c0029e4 2a13208 c0029e4 9d9d3fa 2a13208 9d9d3fa 2a13208 9d9d3fa 2a13208 9d9d3fa 2a13208 9d9d3fa 2a13208 9d9d3fa 2a13208 9d9d3fa c0029e4 3120871 2a13208 3120871 2a13208 c0029e4 9d9d3fa c0029e4 3120871 9d9d3fa 409518b 9d9d3fa 2a13208 c0029e4 3120871 9d9d3fa c0029e4 9d9d3fa c0029e4 9d9d3fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
import os
import re
import json
import google.generativeai as genai
# Configure the Gemini API
genai.configure(api_key=os.environ.get("GEMINI_API_KEY", ""))
model = genai.GenerativeModel("models/gemini-2.0-flash")
def extract_job_requirements(job_description):
"""
Extract key job requirements from the job description to improve assessment matching.
"""
# Common skills and requirements categories to look for
skill_categories = [
"technical skills", "soft skills", "communication", "leadership",
"management", "analytical", "problem-solving", "teamwork", "coding",
"programming", "data analysis", "project management", "sales",
"customer service", "administrative", "clerical", "organization",
"attention to detail", "decision making", "numerical", "verbal"
]
# Education and experience patterns
education_patterns = [
"bachelor", "master", "phd", "degree", "diploma", "certification",
"years of experience", "years experience"
]
# Extract requirements from the job description
requirements = []
job_desc_lower = job_description.lower()
# Check for skill categories
for skill in skill_categories:
if skill in job_desc_lower:
requirements.append(f"Need for {skill}")
# Check for education and experience
for pattern in education_patterns:
if pattern in job_desc_lower:
# Try to find the sentence containing this pattern
sentences = job_description.split('.')
for sentence in sentences:
if pattern in sentence.lower():
clean_sentence = sentence.strip()
if clean_sentence:
requirements.append(clean_sentence)
break
# If we couldn't find specific requirements, add some general ones
if not requirements:
requirements = [
"General job aptitude assessment needed",
"Personality and behavior evaluation",
"Competency assessment for job fit"
]
return requirements
def rerank(query, candidates):
"""
Rerank the candidate assessments using Gemini with improved instructions
for relevance and diversity.
Args:
query: The job description
candidates: List of assessment dictionaries
Returns:
Dictionary containing the recommended assessments
"""
# Ensure we have candidates
if not candidates:
return {"error": "No candidate assessments to rerank"}
# Print debugging info
print(f"Reranking {len(candidates)} candidates")
print(f"Sample candidate: {json.dumps(candidates[0], indent=2)}")
# Extract key job requirements to improve matching
job_requirements = extract_job_requirements(query)
job_req_str = "\n".join([f"- {req}" for req in job_requirements])
print(f"Extracted job requirements: {len(job_requirements)} items")
# Clean up candidates data for API
cleaned_candidates = []
unique_urls = set() # Track URLs to avoid duplicates
for candidate in candidates:
# Skip if we've already seen this URL
if candidate.get('url') in unique_urls:
continue
unique_urls.add(candidate.get('url', ''))
# Create a clean copy
clean_candidate = {}
# Copy required fields
for field in ["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]:
if field in candidate:
# Special handling for test_type
if field == "test_type" and isinstance(candidate[field], list):
clean_candidate[field] = candidate[field]
else:
clean_candidate[field] = candidate[field]
else:
# Default values for missing fields
if field == "test_type":
clean_candidate[field] = ["Unknown"]
elif field == "duration":
clean_candidate[field] = None
else:
clean_candidate[field] = "Unknown"
cleaned_candidates.append(clean_candidate)
# Create the enhanced prompt for Gemini
prompt = f"""
As an SHL assessment expert, your task is to select the most appropriate assessments for a job position.
Job description: "{query}"
Key job requirements identified:
{job_req_str}
Available SHL assessments: {json.dumps(cleaned_candidates, indent=2)}
Rank the 5 most relevant assessments based on how well they match the job requirements.
Focus on these ranking factors:
1. Direct relevance to the job skills required
2. Test types that assess the key job requirements
3. Diversity of assessment methods (include different test types)
4. Practical duration considering the role's seniority level
Return a JSON list in this format:
{{
"recommended_assessments": [
{{
"url": "...",
"adaptive_support": "Yes/No",
"remote_support": "Yes/No",
"description": "...",
"duration": integer or null,
"test_type": ["type1", "type2", ...]
}}
]
}}
CRITICAL INSTRUCTIONS:
1. Return ONLY valid JSON without any markdown code blocks or extra text
2. Preserve the exact URL values from the input - do not modify them
3. Include all fields from the original assessment data exactly as provided
4. Provide exactly 5 unique assessments with different URLs
5. Ensure the result has diverse test types to comprehensively evaluate candidates
6. Do not include duplicate assessments with the same URL
7. Keep all test_type values as arrays/lists, even if there's only one type
"""
# Generate response
try:
response = model.generate_content(prompt)
response_text = response.text
# Try to extract JSON from possible markdown code blocks
json_match = re.search(r'```(?:json)?\s*(.*?)```', response_text, re.DOTALL)
if json_match:
response_text = json_match.group(1).strip()
# Parse the JSON
result = json.loads(response_text)
# Validate the response structure
if "recommended_assessments" not in result:
return {"error": "Invalid response format: missing recommended_assessments key"}
# Ensure each assessment has the required fields
for assessment in result["recommended_assessments"]:
if "url" not in assessment:
assessment["url"] = "https://www.shl.com/missing-url"
if "test_type" not in assessment:
assessment["test_type"] = ["Unknown"]
if not isinstance(assessment["test_type"], list):
assessment["test_type"] = [assessment["test_type"]]
return result
except Exception as e:
error_msg = f"Error in reranking: {str(e)}"
print(error_msg)
return {"error": error_msg} |