AnshulS commited on
Commit
9967a24
·
verified ·
1 Parent(s): 831f81c

Update reranker.py

Browse files
Files changed (1) hide show
  1. reranker.py +9 -84
reranker.py CHANGED
@@ -7,60 +7,9 @@ import google.generativeai as genai
7
  genai.configure(api_key=os.environ.get("GEMINI_API_KEY", ""))
8
  model = genai.GenerativeModel("models/gemini-2.0-flash")
9
 
10
- def extract_job_requirements(job_description):
11
- """
12
- Extract key job requirements from the job description to improve assessment matching.
13
- """
14
- # Common skills and requirements categories to look for
15
- skill_categories = [
16
- "technical skills", "soft skills", "communication", "leadership",
17
- "management", "analytical", "problem-solving", "teamwork", "coding",
18
- "programming", "data analysis", "project management", "sales",
19
- "customer service", "administrative", "clerical", "organization",
20
- "attention to detail", "decision making", "numerical", "verbal"
21
- ]
22
-
23
- # Education and experience patterns
24
- education_patterns = [
25
- "bachelor", "master", "phd", "degree", "diploma", "certification",
26
- "years of experience", "years experience"
27
- ]
28
-
29
- # Extract requirements from the job description
30
- requirements = []
31
- job_desc_lower = job_description.lower()
32
-
33
- # Check for skill categories
34
- for skill in skill_categories:
35
- if skill in job_desc_lower:
36
- requirements.append(f"Need for {skill}")
37
-
38
- # Check for education and experience
39
- for pattern in education_patterns:
40
- if pattern in job_desc_lower:
41
- # Try to find the sentence containing this pattern
42
- sentences = job_description.split('.')
43
- for sentence in sentences:
44
- if pattern in sentence.lower():
45
- clean_sentence = sentence.strip()
46
- if clean_sentence:
47
- requirements.append(clean_sentence)
48
- break
49
-
50
- # If we couldn't find specific requirements, add some general ones
51
- if not requirements:
52
- requirements = [
53
- "General job aptitude assessment needed",
54
- "Personality and behavior evaluation",
55
- "Competency assessment for job fit"
56
- ]
57
-
58
- return requirements
59
-
60
  def rerank(query, candidates):
61
  """
62
- Rerank the candidate assessments using Gemini with improved instructions
63
- for relevance and diversity.
64
 
65
  Args:
66
  query: The job description
@@ -77,22 +26,9 @@ def rerank(query, candidates):
77
  print(f"Reranking {len(candidates)} candidates")
78
  print(f"Sample candidate: {json.dumps(candidates[0], indent=2)}")
79
 
80
- # Extract key job requirements to improve matching
81
- job_requirements = extract_job_requirements(query)
82
- job_req_str = "\n".join([f"- {req}" for req in job_requirements])
83
- print(f"Extracted job requirements: {len(job_requirements)} items")
84
-
85
  # Clean up candidates data for API
86
  cleaned_candidates = []
87
- unique_urls = set() # Track URLs to avoid duplicates
88
-
89
  for candidate in candidates:
90
- # Skip if we've already seen this URL
91
- if candidate.get('url') in unique_urls:
92
- continue
93
-
94
- unique_urls.add(candidate.get('url', ''))
95
-
96
  # Create a clean copy
97
  clean_candidate = {}
98
 
@@ -115,25 +51,15 @@ def rerank(query, candidates):
115
 
116
  cleaned_candidates.append(clean_candidate)
117
 
118
- # Create the enhanced prompt for Gemini
119
  prompt = f"""
120
- As an SHL assessment expert, your task is to select the most appropriate assessments for a job position.
121
 
122
  Job description: "{query}"
123
 
124
- Key job requirements identified:
125
- {job_req_str}
126
-
127
- Available SHL assessments: {json.dumps(cleaned_candidates, indent=2)}
128
-
129
- Rank the 5 most relevant assessments based on how well they match the job requirements.
130
- Focus on these ranking factors:
131
- 1. Direct relevance to the job skills required
132
- 2. Test types that assess the key job requirements
133
- 3. Diversity of assessment methods (include different test types)
134
- 4. Practical duration considering the role's seniority level
135
 
136
- Return a JSON list in this format:
137
  {{
138
  "recommended_assessments": [
139
  {{
@@ -150,11 +76,10 @@ def rerank(query, candidates):
150
  CRITICAL INSTRUCTIONS:
151
  1. Return ONLY valid JSON without any markdown code blocks or extra text
152
  2. Preserve the exact URL values from the input - do not modify them
153
- 3. Include all fields from the original assessment data exactly as provided
154
- 4. Provide exactly 5 unique assessments with different URLs
155
- 5. Ensure the result has diverse test types to comprehensively evaluate candidates
156
- 6. Do not include duplicate assessments with the same URL
157
- 7. Keep all test_type values as arrays/lists, even if there's only one type
158
  """
159
 
160
  # Generate response
 
7
  genai.configure(api_key=os.environ.get("GEMINI_API_KEY", ""))
8
  model = genai.GenerativeModel("models/gemini-2.0-flash")
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def rerank(query, candidates):
11
  """
12
+ Rerank the candidate assessments using Gemini.
 
13
 
14
  Args:
15
  query: The job description
 
26
  print(f"Reranking {len(candidates)} candidates")
27
  print(f"Sample candidate: {json.dumps(candidates[0], indent=2)}")
28
 
 
 
 
 
 
29
  # Clean up candidates data for API
30
  cleaned_candidates = []
 
 
31
  for candidate in candidates:
 
 
 
 
 
 
32
  # Create a clean copy
33
  clean_candidate = {}
34
 
 
51
 
52
  cleaned_candidates.append(clean_candidate)
53
 
54
+ # Create the prompt for Gemini
55
  prompt = f"""
56
+ Given a job description, rank the most relevant SHL assessments based on how well they match the job requirements.
57
 
58
  Job description: "{query}"
59
 
60
+ Candidate SHL assessments: {json.dumps(cleaned_candidates, indent=2)}
 
 
 
 
 
 
 
 
 
 
61
 
62
+ Rank the most relevant assessments and return a JSON list in this format:
63
  {{
64
  "recommended_assessments": [
65
  {{
 
76
  CRITICAL INSTRUCTIONS:
77
  1. Return ONLY valid JSON without any markdown code blocks or extra text
78
  2. Preserve the exact URL values from the input - do not modify them
79
+ 3. Include all fields from the original assessment data
80
+ 4. Limit to the top 10 most relevant assessments
81
+ 5. Ensure the JSON is properly formatted with all fields
82
+ 6. Keep all test_type values as arrays/lists, even if there's only one type
 
83
  """
84
 
85
  # Generate response