Spaces:

AnshulS
/

endpointwebappshl

Sleeping

App Files Files Community

endpointwebappshl / reranker.py

AnshulS

Update reranker.py

9967a24 verified about 1 month ago

raw

history blame contribute delete

4.24 kB

	import os
	import re
	import json
	import google.generativeai as genai

	# Configure the Gemini API
	genai.configure(api_key=os.environ.get("GEMINI_API_KEY", ""))
	model = genai.GenerativeModel("models/gemini-2.0-flash")

	def rerank(query, candidates):
	"""
	Rerank the candidate assessments using Gemini.

	Args:
	query: The job description
	candidates: List of assessment dictionaries

	Returns:
	Dictionary containing the recommended assessments
	"""
	# Ensure we have candidates
	if not candidates:
	return {"error": "No candidate assessments to rerank"}

	# Print debugging info
	print(f"Reranking {len(candidates)} candidates")
	print(f"Sample candidate: {json.dumps(candidates[0], indent=2)}")

	# Clean up candidates data for API
	cleaned_candidates = []
	for candidate in candidates:
	# Create a clean copy
	clean_candidate = {}

	# Copy required fields
	for field in ["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]:
	if field in candidate:
	# Special handling for test_type
	if field == "test_type" and isinstance(candidate[field], list):
	clean_candidate[field] = candidate[field]
	else:
	clean_candidate[field] = candidate[field]
	else:
	# Default values for missing fields
	if field == "test_type":
	clean_candidate[field] = ["Unknown"]
	elif field == "duration":
	clean_candidate[field] = None
	else:
	clean_candidate[field] = "Unknown"

	cleaned_candidates.append(clean_candidate)

	# Create the prompt for Gemini
	prompt = f"""
	Given a job description, rank the most relevant SHL assessments based on how well they match the job requirements.

	Job description: "{query}"

	Candidate SHL assessments: {json.dumps(cleaned_candidates, indent=2)}

	Rank the most relevant assessments and return a JSON list in this format:
	{{
	"recommended_assessments": [
	{{
	"url": "...",
	"adaptive_support": "Yes/No",
	"remote_support": "Yes/No",
	"description": "...",
	"duration": integer or null,
	"test_type": ["type1", "type2", ...]
	}}
	]
	}}

	CRITICAL INSTRUCTIONS:
	1. Return ONLY valid JSON without any markdown code blocks or extra text
	2. Preserve the exact URL values from the input - do not modify them
	3. Include all fields from the original assessment data
	4. Limit to the top 10 most relevant assessments
	5. Ensure the JSON is properly formatted with all fields
	6. Keep all test_type values as arrays/lists, even if there's only one type
	"""

	# Generate response
	try:
	response = model.generate_content(prompt)
	response_text = response.text

	# Try to extract JSON from possible markdown code blocks
	json_match = re.search(r'```(?:json)?\s(.?)```', response_text, re.DOTALL)
	if json_match:
	response_text = json_match.group(1).strip()

	# Parse the JSON
	result = json.loads(response_text)

	# Validate the response structure
	if "recommended_assessments" not in result:
	return {"error": "Invalid response format: missing recommended_assessments key"}

	# Ensure each assessment has the required fields
	for assessment in result["recommended_assessments"]:
	if "url" not in assessment:
	assessment["url"] = "https://www.shl.com/missing-url"
	if "test_type" not in assessment:
	assessment["test_type"] = ["Unknown"]
	if not isinstance(assessment["test_type"], list):
	assessment["test_type"] = [assessment["test_type"]]

	return result

	except Exception as e:
	error_msg = f"Error in reranking: {str(e)}"
	print(error_msg)
	return {"error": error_msg}