Spaces:

AnshulS
/

endpointwebappshl

Sleeping

App Files Files Community

AnshulS commited on May 9

Commit

9967a24

verified ·

1 Parent(s): 831f81c

Update reranker.py

Browse files

Files changed (1) hide show

reranker.py +9 -84

reranker.py CHANGED Viewed

@@ -7,60 +7,9 @@ import google.generativeai as genai
 genai.configure(api_key=os.environ.get("GEMINI_API_KEY", ""))
 model = genai.GenerativeModel("models/gemini-2.0-flash")
-def extract_job_requirements(job_description):
-    """
-    Extract key job requirements from the job description to improve assessment matching.
-    """
-    # Common skills and requirements categories to look for
-    skill_categories = [
-        "technical skills", "soft skills", "communication", "leadership",
-        "management", "analytical", "problem-solving", "teamwork", "coding",
-        "programming", "data analysis", "project management", "sales",
-        "customer service", "administrative", "clerical", "organization",
-        "attention to detail", "decision making", "numerical", "verbal"
-    ]
-    # Education and experience patterns
-    education_patterns = [
-        "bachelor", "master", "phd", "degree", "diploma", "certification",
-        "years of experience", "years experience"
-    ]
-    # Extract requirements from the job description
-    requirements = []
-    job_desc_lower = job_description.lower()
-    # Check for skill categories
-    for skill in skill_categories:
-        if skill in job_desc_lower:
-            requirements.append(f"Need for {skill}")
-    # Check for education and experience
-    for pattern in education_patterns:
-        if pattern in job_desc_lower:
-            # Try to find the sentence containing this pattern
-            sentences = job_description.split('.')
-            for sentence in sentences:
-                if pattern in sentence.lower():
-                    clean_sentence = sentence.strip()
-                    if clean_sentence:
-                        requirements.append(clean_sentence)
-                    break
-    # If we couldn't find specific requirements, add some general ones
-    if not requirements:
-        requirements = [
-            "General job aptitude assessment needed",
-            "Personality and behavior evaluation",
-            "Competency assessment for job fit"
-        ]
-    return requirements
 def rerank(query, candidates):
     """
-    Rerank the candidate assessments using Gemini with improved instructions
-    for relevance and diversity.
     Args:
         query: The job description
@@ -77,22 +26,9 @@ def rerank(query, candidates):
     print(f"Reranking {len(candidates)} candidates")
     print(f"Sample candidate: {json.dumps(candidates[0], indent=2)}")
-    # Extract key job requirements to improve matching
-    job_requirements = extract_job_requirements(query)
-    job_req_str = "\n".join([f"- {req}" for req in job_requirements])
-    print(f"Extracted job requirements: {len(job_requirements)} items")
     # Clean up candidates data for API
     cleaned_candidates = []
-    unique_urls = set()  # Track URLs to avoid duplicates
     for candidate in candidates:
-        # Skip if we've already seen this URL
-        if candidate.get('url') in unique_urls:
-            continue
-        unique_urls.add(candidate.get('url', ''))
         # Create a clean copy
         clean_candidate = {}
@@ -115,25 +51,15 @@ def rerank(query, candidates):
         cleaned_candidates.append(clean_candidate)
-    # Create the enhanced prompt for Gemini
     prompt = f"""
-    As an SHL assessment expert, your task is to select the most appropriate assessments for a job position.
     Job description: "{query}"
-    Key job requirements identified:
-    {job_req_str}
-    Available SHL assessments: {json.dumps(cleaned_candidates, indent=2)}
-    Rank the 5 most relevant assessments based on how well they match the job requirements.
-    Focus on these ranking factors:
-    1. Direct relevance to the job skills required
-    2. Test types that assess the key job requirements
-    3. Diversity of assessment methods (include different test types)
-    4. Practical duration considering the role's seniority level
-    Return a JSON list in this format:
     {{
         "recommended_assessments": [
             {{
@@ -150,11 +76,10 @@ def rerank(query, candidates):
     CRITICAL INSTRUCTIONS:
     1. Return ONLY valid JSON without any markdown code blocks or extra text
     2. Preserve the exact URL values from the input - do not modify them
-    3. Include all fields from the original assessment data exactly as provided
-    4. Provide exactly 5 unique assessments with different URLs
-    5. Ensure the result has diverse test types to comprehensively evaluate candidates
-    6. Do not include duplicate assessments with the same URL
-    7. Keep all test_type values as arrays/lists, even if there's only one type
     """
     # Generate response

 genai.configure(api_key=os.environ.get("GEMINI_API_KEY", ""))
 model = genai.GenerativeModel("models/gemini-2.0-flash")
 def rerank(query, candidates):
     """
+    Rerank the candidate assessments using Gemini.
     Args:
         query: The job description
     print(f"Reranking {len(candidates)} candidates")
     print(f"Sample candidate: {json.dumps(candidates[0], indent=2)}")
     # Clean up candidates data for API
     cleaned_candidates = []
     for candidate in candidates:
         # Create a clean copy
         clean_candidate = {}
         cleaned_candidates.append(clean_candidate)
+    # Create the prompt for Gemini
     prompt = f"""
+    Given a job description, rank the most relevant SHL assessments based on how well they match the job requirements.
     Job description: "{query}"
+    Candidate SHL assessments: {json.dumps(cleaned_candidates, indent=2)}
+    Rank the most relevant assessments and return a JSON list in this format:
     {{
         "recommended_assessments": [
             {{
     CRITICAL INSTRUCTIONS:
     1. Return ONLY valid JSON without any markdown code blocks or extra text
     2. Preserve the exact URL values from the input - do not modify them
+    3. Include all fields from the original assessment data
+    4. Limit to the top 10 most relevant assessments
+    5. Ensure the JSON is properly formatted with all fields
+    6. Keep all test_type values as arrays/lists, even if there's only one type
     """
     # Generate response