Spaces:

AnshulS
/

endpointwebappshl

Sleeping

App Files Files Community

endpointwebappshl / reranker.py

AnshulS

Update reranker.py

9d9d3fa verified about 2 months ago

raw

history blame

4.24 kB

	import os
	import re
	import json
	import google.generativeai as genai

	# Configure the Gemini API
	genai.configure(api_key=os.environ.get("GEMINI_API_KEY", ""))
	model = genai.GenerativeModel("models/gemini-2.0-flash")

	def rerank(query, candidates):
	"""
	Rerank the candidate assessments using Gemini.

	Args:
	query: The job description
	candidates: List of assessment dictionaries

	Returns:
	Dictionary containing the recommended assessments
	"""
	# Ensure we have candidates
	if not candidates:
	return {"error": "No candidate assessments to rerank"}

	# Print debugging info
	print(f"Reranking {len(candidates)} candidates")
	print(f"Sample candidate: {json.dumps(candidates[0], indent=2)}")

	# Clean up candidates data for API
	cleaned_candidates = []
	for candidate in candidates:
	# Create a clean copy
	clean_candidate = {}

	# Copy required fields
	for field in ["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]:
	if field in candidate:
	# Special handling for test_type
	if field == "test_type" and isinstance(candidate[field], list):
	clean_candidate[field] = candidate[field]
	else:
	clean_candidate[field] = candidate[field]
	else:
	# Default values for missing fields
	if field == "test_type":
	clean_candidate[field] = ["Unknown"]
	elif field == "duration":
	clean_candidate[field] = None
	else:
	clean_candidate[field] = "Unknown"

	cleaned_candidates.append(clean_candidate)

	# Create the prompt for Gemini
	prompt = f"""
	Given a job description, rank the most relevant SHL assessments based on how well they match the job requirements.

	Job description: "{query}"

	Candidate SHL assessments: {json.dumps(cleaned_candidates, indent=2)}

	Rank the most relevant assessments and return a JSON list in this format:
	{{
	"recommended_assessments": [
	{{
	"url": "...",
	"adaptive_support": "Yes/No",
	"remote_support": "Yes/No",
	"description": "...",
	"duration": integer or null,
	"test_type": ["type1", "type2", ...]
	}}
	]
	}}

	CRITICAL INSTRUCTIONS:
	1. Return ONLY valid JSON without any markdown code blocks or extra text
	2. Preserve the exact URL values from the input - do not modify them
	3. Include all fields from the original assessment data
	4. Limit to the top 5 most relevant assessments
	5. Ensure the JSON is properly formatted with all fields
	6. Keep all test_type values as arrays/lists, even if there's only one type
	"""

	# Generate response
	try:
	response = model.generate_content(prompt)
	response_text = response.text

	# Try to extract JSON from possible markdown code blocks
	json_match = re.search(r'```(?:json)?\s(.?)```', response_text, re.DOTALL)
	if json_match:
	response_text = json_match.group(1).strip()

	# Parse the JSON
	result = json.loads(response_text)

	# Validate the response structure
	if "recommended_assessments" not in result:
	return {"error": "Invalid response format: missing recommended_assessments key"}

	# Ensure each assessment has the required fields
	for assessment in result["recommended_assessments"]:
	if "url" not in assessment:
	assessment["url"] = "https://www.shl.com/missing-url"
	if "test_type" not in assessment:
	assessment["test_type"] = ["Unknown"]
	if not isinstance(assessment["test_type"], list):
	assessment["test_type"] = [assessment["test_type"]]

	return result

	except Exception as e:
	error_msg = f"Error in reranking: {str(e)}"
	print(error_msg)
	return {"error": error_msg}