AnshulS commited on
Commit
9d9d3fa
·
verified ·
1 Parent(s): 7cf2b90

Update reranker.py

Browse files
Files changed (1) hide show
  1. reranker.py +97 -28
reranker.py CHANGED
@@ -1,47 +1,116 @@
1
  import os
2
  import re
3
- import google.generativeai as genai
4
  import json
 
5
 
6
- genai.configure(api_key=os.environ["GEMINI_API_KEY"])
 
7
  model = genai.GenerativeModel("models/gemini-2.0-flash")
8
 
9
  def rerank(query, candidates):
10
- prompt = f"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  Job description: "{query}"
12
 
13
- Candidate SHL assessments: {json.dumps(candidates, indent=2)}
14
 
15
  Rank the most relevant assessments and return a JSON list in this format:
16
  {{
17
- "recommended_assessments": [
18
- {{
19
- "url": ...,
20
- "adaptive_support": ...,
21
- "remote_support": ...,
22
- "description": ...,
23
- "duration": ...,
24
- "test_type": [...]
25
- }}
26
- ]
27
  }}
28
 
29
- Important instructions:
30
  1. Return ONLY valid JSON without any markdown code blocks or extra text
31
- 2. The url field MUST be copied exactly as provided in the candidates, do not modify it
32
- 3. Make sure to include the full URL value as provided in the input candidates
33
- 4. Preserve all original data fields and their exact values (especially URLs)
 
 
34
  """
35
 
36
- response = model.generate_content(prompt)
37
- response_text = response.text
38
-
39
- # Try to extract JSON from possible markdown code blocks
40
- json_match = re.search(r'```(?:json)?\s*(.*?)```', response_text, re.DOTALL)
41
- if json_match:
42
- response_text = json_match.group(1).strip()
43
-
44
  try:
45
- return json.loads(response_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  except Exception as e:
47
- return {"error": str(e), "raw_response": response_text}
 
 
 
1
  import os
2
  import re
 
3
  import json
4
+ import google.generativeai as genai
5
 
6
+ # Configure the Gemini API
7
+ genai.configure(api_key=os.environ.get("GEMINI_API_KEY", ""))
8
  model = genai.GenerativeModel("models/gemini-2.0-flash")
9
 
10
  def rerank(query, candidates):
11
+ """
12
+ Rerank the candidate assessments using Gemini.
13
+
14
+ Args:
15
+ query: The job description
16
+ candidates: List of assessment dictionaries
17
+
18
+ Returns:
19
+ Dictionary containing the recommended assessments
20
+ """
21
+ # Ensure we have candidates
22
+ if not candidates:
23
+ return {"error": "No candidate assessments to rerank"}
24
+
25
+ # Print debugging info
26
+ print(f"Reranking {len(candidates)} candidates")
27
+ print(f"Sample candidate: {json.dumps(candidates[0], indent=2)}")
28
+
29
+ # Clean up candidates data for API
30
+ cleaned_candidates = []
31
+ for candidate in candidates:
32
+ # Create a clean copy
33
+ clean_candidate = {}
34
+
35
+ # Copy required fields
36
+ for field in ["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]:
37
+ if field in candidate:
38
+ # Special handling for test_type
39
+ if field == "test_type" and isinstance(candidate[field], list):
40
+ clean_candidate[field] = candidate[field]
41
+ else:
42
+ clean_candidate[field] = candidate[field]
43
+ else:
44
+ # Default values for missing fields
45
+ if field == "test_type":
46
+ clean_candidate[field] = ["Unknown"]
47
+ elif field == "duration":
48
+ clean_candidate[field] = None
49
+ else:
50
+ clean_candidate[field] = "Unknown"
51
+
52
+ cleaned_candidates.append(clean_candidate)
53
+
54
+ # Create the prompt for Gemini
55
+ prompt = f"""
56
+ Given a job description, rank the most relevant SHL assessments based on how well they match the job requirements.
57
+
58
  Job description: "{query}"
59
 
60
+ Candidate SHL assessments: {json.dumps(cleaned_candidates, indent=2)}
61
 
62
  Rank the most relevant assessments and return a JSON list in this format:
63
  {{
64
+ "recommended_assessments": [
65
+ {{
66
+ "url": "...",
67
+ "adaptive_support": "Yes/No",
68
+ "remote_support": "Yes/No",
69
+ "description": "...",
70
+ "duration": integer or null,
71
+ "test_type": ["type1", "type2", ...]
72
+ }}
73
+ ]
74
  }}
75
 
76
+ CRITICAL INSTRUCTIONS:
77
  1. Return ONLY valid JSON without any markdown code blocks or extra text
78
+ 2. Preserve the exact URL values from the input - do not modify them
79
+ 3. Include all fields from the original assessment data
80
+ 4. Limit to the top 5 most relevant assessments
81
+ 5. Ensure the JSON is properly formatted with all fields
82
+ 6. Keep all test_type values as arrays/lists, even if there's only one type
83
  """
84
 
85
+ # Generate response
 
 
 
 
 
 
 
86
  try:
87
+ response = model.generate_content(prompt)
88
+ response_text = response.text
89
+
90
+ # Try to extract JSON from possible markdown code blocks
91
+ json_match = re.search(r'```(?:json)?\s*(.*?)```', response_text, re.DOTALL)
92
+ if json_match:
93
+ response_text = json_match.group(1).strip()
94
+
95
+ # Parse the JSON
96
+ result = json.loads(response_text)
97
+
98
+ # Validate the response structure
99
+ if "recommended_assessments" not in result:
100
+ return {"error": "Invalid response format: missing recommended_assessments key"}
101
+
102
+ # Ensure each assessment has the required fields
103
+ for assessment in result["recommended_assessments"]:
104
+ if "url" not in assessment:
105
+ assessment["url"] = "https://www.shl.com/missing-url"
106
+ if "test_type" not in assessment:
107
+ assessment["test_type"] = ["Unknown"]
108
+ if not isinstance(assessment["test_type"], list):
109
+ assessment["test_type"] = [assessment["test_type"]]
110
+
111
+ return result
112
+
113
  except Exception as e:
114
+ error_msg = f"Error in reranking: {str(e)}"
115
+ print(error_msg)
116
+ return {"error": error_msg}