AnshulS commited on
Commit
2a13208
·
verified ·
1 Parent(s): 071304e

Update reranker.py

Browse files
Files changed (1) hide show
  1. reranker.py +84 -9
reranker.py CHANGED
@@ -7,9 +7,60 @@ import google.generativeai as genai
7
  genai.configure(api_key=os.environ.get("GEMINI_API_KEY", ""))
8
  model = genai.GenerativeModel("models/gemini-2.0-flash")
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def rerank(query, candidates):
11
  """
12
- Rerank the candidate assessments using Gemini.
 
13
 
14
  Args:
15
  query: The job description
@@ -26,9 +77,22 @@ def rerank(query, candidates):
26
  print(f"Reranking {len(candidates)} candidates")
27
  print(f"Sample candidate: {json.dumps(candidates[0], indent=2)}")
28
 
 
 
 
 
 
29
  # Clean up candidates data for API
30
  cleaned_candidates = []
 
 
31
  for candidate in candidates:
 
 
 
 
 
 
32
  # Create a clean copy
33
  clean_candidate = {}
34
 
@@ -51,15 +115,25 @@ def rerank(query, candidates):
51
 
52
  cleaned_candidates.append(clean_candidate)
53
 
54
- # Create the prompt for Gemini
55
  prompt = f"""
56
- Given a job description, rank the most relevant SHL assessments based on how well they match the job requirements.
57
 
58
  Job description: "{query}"
59
 
60
- Candidate SHL assessments: {json.dumps(cleaned_candidates, indent=2)}
 
 
 
 
 
 
 
 
 
 
61
 
62
- Rank the most relevant assessments and return a JSON list in this format:
63
  {{
64
  "recommended_assessments": [
65
  {{
@@ -76,10 +150,11 @@ def rerank(query, candidates):
76
  CRITICAL INSTRUCTIONS:
77
  1. Return ONLY valid JSON without any markdown code blocks or extra text
78
  2. Preserve the exact URL values from the input - do not modify them
79
- 3. Include all fields from the original assessment data
80
- 4. Limit to the top 10 most relevant assessments
81
- 5. Ensure the JSON is properly formatted with all fields
82
- 6. Keep all test_type values as arrays/lists, even if there's only one type
 
83
  """
84
 
85
  # Generate response
 
7
  genai.configure(api_key=os.environ.get("GEMINI_API_KEY", ""))
8
  model = genai.GenerativeModel("models/gemini-2.0-flash")
9
 
10
+ def extract_job_requirements(job_description):
11
+ """
12
+ Extract key job requirements from the job description to improve assessment matching.
13
+ """
14
+ # Common skills and requirements categories to look for
15
+ skill_categories = [
16
+ "technical skills", "soft skills", "communication", "leadership",
17
+ "management", "analytical", "problem-solving", "teamwork", "coding",
18
+ "programming", "data analysis", "project management", "sales",
19
+ "customer service", "administrative", "clerical", "organization",
20
+ "attention to detail", "decision making", "numerical", "verbal"
21
+ ]
22
+
23
+ # Education and experience patterns
24
+ education_patterns = [
25
+ "bachelor", "master", "phd", "degree", "diploma", "certification",
26
+ "years of experience", "years experience"
27
+ ]
28
+
29
+ # Extract requirements from the job description
30
+ requirements = []
31
+ job_desc_lower = job_description.lower()
32
+
33
+ # Check for skill categories
34
+ for skill in skill_categories:
35
+ if skill in job_desc_lower:
36
+ requirements.append(f"Need for {skill}")
37
+
38
+ # Check for education and experience
39
+ for pattern in education_patterns:
40
+ if pattern in job_desc_lower:
41
+ # Try to find the sentence containing this pattern
42
+ sentences = job_description.split('.')
43
+ for sentence in sentences:
44
+ if pattern in sentence.lower():
45
+ clean_sentence = sentence.strip()
46
+ if clean_sentence:
47
+ requirements.append(clean_sentence)
48
+ break
49
+
50
+ # If we couldn't find specific requirements, add some general ones
51
+ if not requirements:
52
+ requirements = [
53
+ "General job aptitude assessment needed",
54
+ "Personality and behavior evaluation",
55
+ "Competency assessment for job fit"
56
+ ]
57
+
58
+ return requirements
59
+
60
  def rerank(query, candidates):
61
  """
62
+ Rerank the candidate assessments using Gemini with improved instructions
63
+ for relevance and diversity.
64
 
65
  Args:
66
  query: The job description
 
77
  print(f"Reranking {len(candidates)} candidates")
78
  print(f"Sample candidate: {json.dumps(candidates[0], indent=2)}")
79
 
80
+ # Extract key job requirements to improve matching
81
+ job_requirements = extract_job_requirements(query)
82
+ job_req_str = "\n".join([f"- {req}" for req in job_requirements])
83
+ print(f"Extracted job requirements: {len(job_requirements)} items")
84
+
85
  # Clean up candidates data for API
86
  cleaned_candidates = []
87
+ unique_urls = set() # Track URLs to avoid duplicates
88
+
89
  for candidate in candidates:
90
+ # Skip if we've already seen this URL
91
+ if candidate.get('url') in unique_urls:
92
+ continue
93
+
94
+ unique_urls.add(candidate.get('url', ''))
95
+
96
  # Create a clean copy
97
  clean_candidate = {}
98
 
 
115
 
116
  cleaned_candidates.append(clean_candidate)
117
 
118
+ # Create the enhanced prompt for Gemini
119
  prompt = f"""
120
+ As an SHL assessment expert, your task is to select the most appropriate assessments for a job position.
121
 
122
  Job description: "{query}"
123
 
124
+ Key job requirements identified:
125
+ {job_req_str}
126
+
127
+ Available SHL assessments: {json.dumps(cleaned_candidates, indent=2)}
128
+
129
+ Rank the 5 most relevant assessments based on how well they match the job requirements.
130
+ Focus on these ranking factors:
131
+ 1. Direct relevance to the job skills required
132
+ 2. Test types that assess the key job requirements
133
+ 3. Diversity of assessment methods (include different test types)
134
+ 4. Practical duration considering the role's seniority level
135
 
136
+ Return a JSON list in this format:
137
  {{
138
  "recommended_assessments": [
139
  {{
 
150
  CRITICAL INSTRUCTIONS:
151
  1. Return ONLY valid JSON without any markdown code blocks or extra text
152
  2. Preserve the exact URL values from the input - do not modify them
153
+ 3. Include all fields from the original assessment data exactly as provided
154
+ 4. Provide exactly 5 unique assessments with different URLs
155
+ 5. Ensure the result has diverse test types to comprehensively evaluate candidates
156
+ 6. Do not include duplicate assessments with the same URL
157
+ 7. Keep all test_type values as arrays/lists, even if there's only one type
158
  """
159
 
160
  # Generate response