Spaces:
Sleeping
Sleeping
import os | |
import re | |
import json | |
import google.generativeai as genai | |
# Configure the Gemini API | |
genai.configure(api_key=os.environ.get("GEMINI_API_KEY", "")) | |
model = genai.GenerativeModel("models/gemini-2.0-flash") | |
def extract_job_requirements(job_description): | |
""" | |
Extract key job requirements from the job description to improve assessment matching. | |
""" | |
# Common skills and requirements categories to look for | |
skill_categories = [ | |
"technical skills", "soft skills", "communication", "leadership", | |
"management", "analytical", "problem-solving", "teamwork", "coding", | |
"programming", "data analysis", "project management", "sales", | |
"customer service", "administrative", "clerical", "organization", | |
"attention to detail", "decision making", "numerical", "verbal" | |
] | |
# Education and experience patterns | |
education_patterns = [ | |
"bachelor", "master", "phd", "degree", "diploma", "certification", | |
"years of experience", "years experience" | |
] | |
# Extract requirements from the job description | |
requirements = [] | |
job_desc_lower = job_description.lower() | |
# Check for skill categories | |
for skill in skill_categories: | |
if skill in job_desc_lower: | |
requirements.append(f"Need for {skill}") | |
# Check for education and experience | |
for pattern in education_patterns: | |
if pattern in job_desc_lower: | |
# Try to find the sentence containing this pattern | |
sentences = job_description.split('.') | |
for sentence in sentences: | |
if pattern in sentence.lower(): | |
clean_sentence = sentence.strip() | |
if clean_sentence: | |
requirements.append(clean_sentence) | |
break | |
# If we couldn't find specific requirements, add some general ones | |
if not requirements: | |
requirements = [ | |
"General job aptitude assessment needed", | |
"Personality and behavior evaluation", | |
"Competency assessment for job fit" | |
] | |
return requirements | |
def rerank(query, candidates): | |
""" | |
Rerank the candidate assessments using Gemini with improved instructions | |
for relevance and diversity. | |
Args: | |
query: The job description | |
candidates: List of assessment dictionaries | |
Returns: | |
Dictionary containing the recommended assessments | |
""" | |
# Ensure we have candidates | |
if not candidates: | |
return {"error": "No candidate assessments to rerank"} | |
# Print debugging info | |
print(f"Reranking {len(candidates)} candidates") | |
print(f"Sample candidate: {json.dumps(candidates[0], indent=2)}") | |
# Extract key job requirements to improve matching | |
job_requirements = extract_job_requirements(query) | |
job_req_str = "\n".join([f"- {req}" for req in job_requirements]) | |
print(f"Extracted job requirements: {len(job_requirements)} items") | |
# Clean up candidates data for API | |
cleaned_candidates = [] | |
unique_urls = set() # Track URLs to avoid duplicates | |
for candidate in candidates: | |
# Skip if we've already seen this URL | |
if candidate.get('url') in unique_urls: | |
continue | |
unique_urls.add(candidate.get('url', '')) | |
# Create a clean copy | |
clean_candidate = {} | |
# Copy required fields | |
for field in ["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]: | |
if field in candidate: | |
# Special handling for test_type | |
if field == "test_type" and isinstance(candidate[field], list): | |
clean_candidate[field] = candidate[field] | |
else: | |
clean_candidate[field] = candidate[field] | |
else: | |
# Default values for missing fields | |
if field == "test_type": | |
clean_candidate[field] = ["Unknown"] | |
elif field == "duration": | |
clean_candidate[field] = None | |
else: | |
clean_candidate[field] = "Unknown" | |
cleaned_candidates.append(clean_candidate) | |
# Create the enhanced prompt for Gemini | |
prompt = f""" | |
As an SHL assessment expert, your task is to select the most appropriate assessments for a job position. | |
Job description: "{query}" | |
Key job requirements identified: | |
{job_req_str} | |
Available SHL assessments: {json.dumps(cleaned_candidates, indent=2)} | |
Rank the 5 most relevant assessments based on how well they match the job requirements. | |
Focus on these ranking factors: | |
1. Direct relevance to the job skills required | |
2. Test types that assess the key job requirements | |
3. Diversity of assessment methods (include different test types) | |
4. Practical duration considering the role's seniority level | |
Return a JSON list in this format: | |
{{ | |
"recommended_assessments": [ | |
{{ | |
"url": "...", | |
"adaptive_support": "Yes/No", | |
"remote_support": "Yes/No", | |
"description": "...", | |
"duration": integer or null, | |
"test_type": ["type1", "type2", ...] | |
}} | |
] | |
}} | |
CRITICAL INSTRUCTIONS: | |
1. Return ONLY valid JSON without any markdown code blocks or extra text | |
2. Preserve the exact URL values from the input - do not modify them | |
3. Include all fields from the original assessment data exactly as provided | |
4. Provide exactly 5 unique assessments with different URLs | |
5. Ensure the result has diverse test types to comprehensively evaluate candidates | |
6. Do not include duplicate assessments with the same URL | |
7. Keep all test_type values as arrays/lists, even if there's only one type | |
""" | |
# Generate response | |
try: | |
response = model.generate_content(prompt) | |
response_text = response.text | |
# Try to extract JSON from possible markdown code blocks | |
json_match = re.search(r'```(?:json)?\s*(.*?)```', response_text, re.DOTALL) | |
if json_match: | |
response_text = json_match.group(1).strip() | |
# Parse the JSON | |
result = json.loads(response_text) | |
# Validate the response structure | |
if "recommended_assessments" not in result: | |
return {"error": "Invalid response format: missing recommended_assessments key"} | |
# Ensure each assessment has the required fields | |
for assessment in result["recommended_assessments"]: | |
if "url" not in assessment: | |
assessment["url"] = "https://www.shl.com/missing-url" | |
if "test_type" not in assessment: | |
assessment["test_type"] = ["Unknown"] | |
if not isinstance(assessment["test_type"], list): | |
assessment["test_type"] = [assessment["test_type"]] | |
return result | |
except Exception as e: | |
error_msg = f"Error in reranking: {str(e)}" | |
print(error_msg) | |
return {"error": error_msg} |