File size: 4,241 Bytes
c0029e4
3120871
c0029e4
9d9d3fa
c0029e4
9d9d3fa
 
06b8d86
c0029e4
 
9d9d3fa
9967a24
9d9d3fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9967a24
9d9d3fa
9967a24
9d9d3fa
c0029e4
3120871
9967a24
3120871
9967a24
c0029e4
9d9d3fa
 
 
 
 
 
 
 
 
 
c0029e4
3120871
9d9d3fa
409518b
9d9d3fa
9967a24
 
 
 
c0029e4
3120871
9d9d3fa
c0029e4
9d9d3fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0029e4
9d9d3fa
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
import re
import json
import google.generativeai as genai

# Configure the Gemini API
genai.configure(api_key=os.environ.get("GEMINI_API_KEY", ""))
model = genai.GenerativeModel("models/gemini-2.0-flash")

def rerank(query, candidates):
    """
    Rerank the candidate assessments using Gemini.
    
    Args:
        query: The job description
        candidates: List of assessment dictionaries
    
    Returns:
        Dictionary containing the recommended assessments
    """
    # Ensure we have candidates
    if not candidates:
        return {"error": "No candidate assessments to rerank"}
    
    # Print debugging info
    print(f"Reranking {len(candidates)} candidates")
    print(f"Sample candidate: {json.dumps(candidates[0], indent=2)}")
    
    # Clean up candidates data for API
    cleaned_candidates = []
    for candidate in candidates:
        # Create a clean copy
        clean_candidate = {}
        
        # Copy required fields
        for field in ["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]:
            if field in candidate:
                # Special handling for test_type
                if field == "test_type" and isinstance(candidate[field], list):
                    clean_candidate[field] = candidate[field]
                else:
                    clean_candidate[field] = candidate[field]
            else:
                # Default values for missing fields
                if field == "test_type":
                    clean_candidate[field] = ["Unknown"]
                elif field == "duration":
                    clean_candidate[field] = None
                else:
                    clean_candidate[field] = "Unknown"
        
        cleaned_candidates.append(clean_candidate)
    
    # Create the prompt for Gemini
    prompt = f"""
    Given a job description, rank the most relevant SHL assessments based on how well they match the job requirements.
    
    Job description: "{query}"
    
    Candidate SHL assessments: {json.dumps(cleaned_candidates, indent=2)}
    
    Rank the most relevant assessments and return a JSON list in this format:
    {{
        "recommended_assessments": [
            {{
                "url": "...",
                "adaptive_support": "Yes/No",
                "remote_support": "Yes/No",
                "description": "...",
                "duration": integer or null,
                "test_type": ["type1", "type2", ...]
            }}
        ]
    }}
    
    CRITICAL INSTRUCTIONS:
    1. Return ONLY valid JSON without any markdown code blocks or extra text
    2. Preserve the exact URL values from the input - do not modify them
    3. Include all fields from the original assessment data
    4. Limit to the top 10 most relevant assessments
    5. Ensure the JSON is properly formatted with all fields
    6. Keep all test_type values as arrays/lists, even if there's only one type
    """
    
    # Generate response
    try:
        response = model.generate_content(prompt)
        response_text = response.text
        
        # Try to extract JSON from possible markdown code blocks
        json_match = re.search(r'```(?:json)?\s*(.*?)```', response_text, re.DOTALL)
        if json_match:
            response_text = json_match.group(1).strip()
        
        # Parse the JSON
        result = json.loads(response_text)
        
        # Validate the response structure
        if "recommended_assessments" not in result:
            return {"error": "Invalid response format: missing recommended_assessments key"}
        
        # Ensure each assessment has the required fields
        for assessment in result["recommended_assessments"]:
            if "url" not in assessment:
                assessment["url"] = "https://www.shl.com/missing-url"
            if "test_type" not in assessment:
                assessment["test_type"] = ["Unknown"]
            if not isinstance(assessment["test_type"], list):
                assessment["test_type"] = [assessment["test_type"]]
                
        return result
        
    except Exception as e:
        error_msg = f"Error in reranking: {str(e)}"
        print(error_msg)
        return {"error": error_msg}