File size: 4,240 Bytes
c0029e4
3120871
c0029e4
9d9d3fa
c0029e4
9d9d3fa
 
06b8d86
c0029e4
 
9d9d3fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0029e4
3120871
9d9d3fa
3120871
409518b
c0029e4
9d9d3fa
 
 
 
 
 
 
 
 
 
c0029e4
3120871
9d9d3fa
409518b
9d9d3fa
 
 
 
 
c0029e4
3120871
9d9d3fa
c0029e4
9d9d3fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0029e4
9d9d3fa
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
import re
import json
import google.generativeai as genai

# Configure the Gemini API
genai.configure(api_key=os.environ.get("GEMINI_API_KEY", ""))
model = genai.GenerativeModel("models/gemini-2.0-flash")

def rerank(query, candidates):
    """
    Rerank the candidate assessments using Gemini.
    
    Args:
        query: The job description
        candidates: List of assessment dictionaries
    
    Returns:
        Dictionary containing the recommended assessments
    """
    # Ensure we have candidates
    if not candidates:
        return {"error": "No candidate assessments to rerank"}
    
    # Print debugging info
    print(f"Reranking {len(candidates)} candidates")
    print(f"Sample candidate: {json.dumps(candidates[0], indent=2)}")
    
    # Clean up candidates data for API
    cleaned_candidates = []
    for candidate in candidates:
        # Create a clean copy
        clean_candidate = {}
        
        # Copy required fields
        for field in ["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]:
            if field in candidate:
                # Special handling for test_type
                if field == "test_type" and isinstance(candidate[field], list):
                    clean_candidate[field] = candidate[field]
                else:
                    clean_candidate[field] = candidate[field]
            else:
                # Default values for missing fields
                if field == "test_type":
                    clean_candidate[field] = ["Unknown"]
                elif field == "duration":
                    clean_candidate[field] = None
                else:
                    clean_candidate[field] = "Unknown"
        
        cleaned_candidates.append(clean_candidate)
    
    # Create the prompt for Gemini
    prompt = f"""
    Given a job description, rank the most relevant SHL assessments based on how well they match the job requirements.
    
    Job description: "{query}"
    
    Candidate SHL assessments: {json.dumps(cleaned_candidates, indent=2)}
    
    Rank the most relevant assessments and return a JSON list in this format:
    {{
        "recommended_assessments": [
            {{
                "url": "...",
                "adaptive_support": "Yes/No",
                "remote_support": "Yes/No",
                "description": "...",
                "duration": integer or null,
                "test_type": ["type1", "type2", ...]
            }}
        ]
    }}
    
    CRITICAL INSTRUCTIONS:
    1. Return ONLY valid JSON without any markdown code blocks or extra text
    2. Preserve the exact URL values from the input - do not modify them
    3. Include all fields from the original assessment data
    4. Limit to the top 5 most relevant assessments
    5. Ensure the JSON is properly formatted with all fields
    6. Keep all test_type values as arrays/lists, even if there's only one type
    """
    
    # Generate response
    try:
        response = model.generate_content(prompt)
        response_text = response.text
        
        # Try to extract JSON from possible markdown code blocks
        json_match = re.search(r'```(?:json)?\s*(.*?)```', response_text, re.DOTALL)
        if json_match:
            response_text = json_match.group(1).strip()
        
        # Parse the JSON
        result = json.loads(response_text)
        
        # Validate the response structure
        if "recommended_assessments" not in result:
            return {"error": "Invalid response format: missing recommended_assessments key"}
        
        # Ensure each assessment has the required fields
        for assessment in result["recommended_assessments"]:
            if "url" not in assessment:
                assessment["url"] = "https://www.shl.com/missing-url"
            if "test_type" not in assessment:
                assessment["test_type"] = ["Unknown"]
            if not isinstance(assessment["test_type"], list):
                assessment["test_type"] = [assessment["test_type"]]
                
        return result
        
    except Exception as e:
        error_msg = f"Error in reranking: {str(e)}"
        print(error_msg)
        return {"error": error_msg}