File size: 7,246 Bytes
c0029e4
3120871
c0029e4
9d9d3fa
c0029e4
9d9d3fa
 
06b8d86
c0029e4
2a13208
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0029e4
9d9d3fa
2a13208
 
9d9d3fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a13208
 
 
 
 
9d9d3fa
 
2a13208
 
9d9d3fa
2a13208
 
 
 
 
 
9d9d3fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a13208
9d9d3fa
2a13208
9d9d3fa
c0029e4
3120871
2a13208
 
 
 
 
 
 
 
 
 
 
3120871
2a13208
c0029e4
9d9d3fa
 
 
 
 
 
 
 
 
 
c0029e4
3120871
9d9d3fa
409518b
9d9d3fa
2a13208
 
 
 
 
c0029e4
3120871
9d9d3fa
c0029e4
9d9d3fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0029e4
9d9d3fa
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import os
import re
import json
import google.generativeai as genai

# Configure the Gemini API
genai.configure(api_key=os.environ.get("GEMINI_API_KEY", ""))
model = genai.GenerativeModel("models/gemini-2.0-flash")

def extract_job_requirements(job_description):
    """
    Extract key job requirements from the job description to improve assessment matching.
    """
    # Common skills and requirements categories to look for
    skill_categories = [
        "technical skills", "soft skills", "communication", "leadership", 
        "management", "analytical", "problem-solving", "teamwork", "coding",
        "programming", "data analysis", "project management", "sales", 
        "customer service", "administrative", "clerical", "organization",
        "attention to detail", "decision making", "numerical", "verbal"
    ]
    
    # Education and experience patterns
    education_patterns = [
        "bachelor", "master", "phd", "degree", "diploma", "certification",
        "years of experience", "years experience"
    ]
    
    # Extract requirements from the job description
    requirements = []
    job_desc_lower = job_description.lower()
    
    # Check for skill categories
    for skill in skill_categories:
        if skill in job_desc_lower:
            requirements.append(f"Need for {skill}")
    
    # Check for education and experience
    for pattern in education_patterns:
        if pattern in job_desc_lower:
            # Try to find the sentence containing this pattern
            sentences = job_description.split('.')
            for sentence in sentences:
                if pattern in sentence.lower():
                    clean_sentence = sentence.strip()
                    if clean_sentence:
                        requirements.append(clean_sentence)
                    break
    
    # If we couldn't find specific requirements, add some general ones
    if not requirements:
        requirements = [
            "General job aptitude assessment needed",
            "Personality and behavior evaluation",
            "Competency assessment for job fit"
        ]
    
    return requirements

def rerank(query, candidates):
    """
    Rerank the candidate assessments using Gemini with improved instructions
    for relevance and diversity.
    
    Args:
        query: The job description
        candidates: List of assessment dictionaries
    
    Returns:
        Dictionary containing the recommended assessments
    """
    # Ensure we have candidates
    if not candidates:
        return {"error": "No candidate assessments to rerank"}
    
    # Print debugging info
    print(f"Reranking {len(candidates)} candidates")
    print(f"Sample candidate: {json.dumps(candidates[0], indent=2)}")
    
    # Extract key job requirements to improve matching
    job_requirements = extract_job_requirements(query)
    job_req_str = "\n".join([f"- {req}" for req in job_requirements])
    print(f"Extracted job requirements: {len(job_requirements)} items")
    
    # Clean up candidates data for API
    cleaned_candidates = []
    unique_urls = set()  # Track URLs to avoid duplicates
    
    for candidate in candidates:
        # Skip if we've already seen this URL
        if candidate.get('url') in unique_urls:
            continue
            
        unique_urls.add(candidate.get('url', ''))
        
        # Create a clean copy
        clean_candidate = {}
        
        # Copy required fields
        for field in ["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]:
            if field in candidate:
                # Special handling for test_type
                if field == "test_type" and isinstance(candidate[field], list):
                    clean_candidate[field] = candidate[field]
                else:
                    clean_candidate[field] = candidate[field]
            else:
                # Default values for missing fields
                if field == "test_type":
                    clean_candidate[field] = ["Unknown"]
                elif field == "duration":
                    clean_candidate[field] = None
                else:
                    clean_candidate[field] = "Unknown"
        
        cleaned_candidates.append(clean_candidate)
    
    # Create the enhanced prompt for Gemini
    prompt = f"""
    As an SHL assessment expert, your task is to select the most appropriate assessments for a job position.
    
    Job description: "{query}"
    
    Key job requirements identified:
    {job_req_str}
    
    Available SHL assessments: {json.dumps(cleaned_candidates, indent=2)}
    
    Rank the 5 most relevant assessments based on how well they match the job requirements. 
    Focus on these ranking factors:
    1. Direct relevance to the job skills required
    2. Test types that assess the key job requirements
    3. Diversity of assessment methods (include different test types)
    4. Practical duration considering the role's seniority level
    
    Return a JSON list in this format:
    {{
        "recommended_assessments": [
            {{
                "url": "...",
                "adaptive_support": "Yes/No",
                "remote_support": "Yes/No",
                "description": "...",
                "duration": integer or null,
                "test_type": ["type1", "type2", ...]
            }}
        ]
    }}
    
    CRITICAL INSTRUCTIONS:
    1. Return ONLY valid JSON without any markdown code blocks or extra text
    2. Preserve the exact URL values from the input - do not modify them
    3. Include all fields from the original assessment data exactly as provided
    4. Provide exactly 5 unique assessments with different URLs
    5. Ensure the result has diverse test types to comprehensively evaluate candidates
    6. Do not include duplicate assessments with the same URL
    7. Keep all test_type values as arrays/lists, even if there's only one type
    """
    
    # Generate response
    try:
        response = model.generate_content(prompt)
        response_text = response.text
        
        # Try to extract JSON from possible markdown code blocks
        json_match = re.search(r'```(?:json)?\s*(.*?)```', response_text, re.DOTALL)
        if json_match:
            response_text = json_match.group(1).strip()
        
        # Parse the JSON
        result = json.loads(response_text)
        
        # Validate the response structure
        if "recommended_assessments" not in result:
            return {"error": "Invalid response format: missing recommended_assessments key"}
        
        # Ensure each assessment has the required fields
        for assessment in result["recommended_assessments"]:
            if "url" not in assessment:
                assessment["url"] = "https://www.shl.com/missing-url"
            if "test_type" not in assessment:
                assessment["test_type"] = ["Unknown"]
            if not isinstance(assessment["test_type"], list):
                assessment["test_type"] = [assessment["test_type"]]
                
        return result
        
    except Exception as e:
        error_msg = f"Error in reranking: {str(e)}"
        print(error_msg)
        return {"error": error_msg}