Spaces:

AnshulS
/

endpointwebappshl

Sleeping

App Files Files Community

AnshulS commited on May 9

Commit

2a13208

verified ·

1 Parent(s): 071304e

Update reranker.py

Browse files

Files changed (1) hide show

reranker.py +84 -9

reranker.py CHANGED Viewed

@@ -7,9 +7,60 @@ import google.generativeai as genai
 genai.configure(api_key=os.environ.get("GEMINI_API_KEY", ""))
 model = genai.GenerativeModel("models/gemini-2.0-flash")
 def rerank(query, candidates):
     """
-    Rerank the candidate assessments using Gemini.
     Args:
         query: The job description
@@ -26,9 +77,22 @@ def rerank(query, candidates):
     print(f"Reranking {len(candidates)} candidates")
     print(f"Sample candidate: {json.dumps(candidates[0], indent=2)}")
     # Clean up candidates data for API
     cleaned_candidates = []
     for candidate in candidates:
         # Create a clean copy
         clean_candidate = {}
@@ -51,15 +115,25 @@ def rerank(query, candidates):
         cleaned_candidates.append(clean_candidate)
-    # Create the prompt for Gemini
     prompt = f"""
-    Given a job description, rank the most relevant SHL assessments based on how well they match the job requirements.
     Job description: "{query}"
-    Candidate SHL assessments: {json.dumps(cleaned_candidates, indent=2)}
-    Rank the most relevant assessments and return a JSON list in this format:
     {{
         "recommended_assessments": [
             {{
@@ -76,10 +150,11 @@ def rerank(query, candidates):
     CRITICAL INSTRUCTIONS:
     1. Return ONLY valid JSON without any markdown code blocks or extra text
     2. Preserve the exact URL values from the input - do not modify them
-    3. Include all fields from the original assessment data
-    4. Limit to the top 10 most relevant assessments
-    5. Ensure the JSON is properly formatted with all fields
-    6. Keep all test_type values as arrays/lists, even if there's only one type
     """
     # Generate response

 genai.configure(api_key=os.environ.get("GEMINI_API_KEY", ""))
 model = genai.GenerativeModel("models/gemini-2.0-flash")
+def extract_job_requirements(job_description):
+    """
+    Extract key job requirements from the job description to improve assessment matching.
+    """
+    # Common skills and requirements categories to look for
+    skill_categories = [
+        "technical skills", "soft skills", "communication", "leadership",
+        "management", "analytical", "problem-solving", "teamwork", "coding",
+        "programming", "data analysis", "project management", "sales",
+        "customer service", "administrative", "clerical", "organization",
+        "attention to detail", "decision making", "numerical", "verbal"
+    ]
+    # Education and experience patterns
+    education_patterns = [
+        "bachelor", "master", "phd", "degree", "diploma", "certification",
+        "years of experience", "years experience"
+    ]
+    # Extract requirements from the job description
+    requirements = []
+    job_desc_lower = job_description.lower()
+    # Check for skill categories
+    for skill in skill_categories:
+        if skill in job_desc_lower:
+            requirements.append(f"Need for {skill}")
+    # Check for education and experience
+    for pattern in education_patterns:
+        if pattern in job_desc_lower:
+            # Try to find the sentence containing this pattern
+            sentences = job_description.split('.')
+            for sentence in sentences:
+                if pattern in sentence.lower():
+                    clean_sentence = sentence.strip()
+                    if clean_sentence:
+                        requirements.append(clean_sentence)
+                    break
+    # If we couldn't find specific requirements, add some general ones
+    if not requirements:
+        requirements = [
+            "General job aptitude assessment needed",
+            "Personality and behavior evaluation",
+            "Competency assessment for job fit"
+        ]
+    return requirements
 def rerank(query, candidates):
     """
+    Rerank the candidate assessments using Gemini with improved instructions
+    for relevance and diversity.
     Args:
         query: The job description
     print(f"Reranking {len(candidates)} candidates")
     print(f"Sample candidate: {json.dumps(candidates[0], indent=2)}")
+    # Extract key job requirements to improve matching
+    job_requirements = extract_job_requirements(query)
+    job_req_str = "\n".join([f"- {req}" for req in job_requirements])
+    print(f"Extracted job requirements: {len(job_requirements)} items")
     # Clean up candidates data for API
     cleaned_candidates = []
+    unique_urls = set()  # Track URLs to avoid duplicates
     for candidate in candidates:
+        # Skip if we've already seen this URL
+        if candidate.get('url') in unique_urls:
+            continue
+        unique_urls.add(candidate.get('url', ''))
         # Create a clean copy
         clean_candidate = {}
         cleaned_candidates.append(clean_candidate)
+    # Create the enhanced prompt for Gemini
     prompt = f"""
+    As an SHL assessment expert, your task is to select the most appropriate assessments for a job position.
     Job description: "{query}"
+    Key job requirements identified:
+    {job_req_str}
+    Available SHL assessments: {json.dumps(cleaned_candidates, indent=2)}
+    Rank the 5 most relevant assessments based on how well they match the job requirements.
+    Focus on these ranking factors:
+    1. Direct relevance to the job skills required
+    2. Test types that assess the key job requirements
+    3. Diversity of assessment methods (include different test types)
+    4. Practical duration considering the role's seniority level
+    Return a JSON list in this format:
     {{
         "recommended_assessments": [
             {{
     CRITICAL INSTRUCTIONS:
     1. Return ONLY valid JSON without any markdown code blocks or extra text
     2. Preserve the exact URL values from the input - do not modify them
+    3. Include all fields from the original assessment data exactly as provided
+    4. Provide exactly 5 unique assessments with different URLs
+    5. Ensure the result has diverse test types to comprehensively evaluate candidates
+    6. Do not include duplicate assessments with the same URL
+    7. Keep all test_type values as arrays/lists, even if there's only one type
     """
     # Generate response