Spaces:

AnshulS
/

endpointwebappshl

Sleeping

App Files Files Community

endpointwebappshl / reranker.py

AnshulS

Update reranker.py

26b070d verified about 2 months ago

raw

history blame

2.86 kB

	import os
	import re
	import google.generativeai as genai
	import json

	genai.configure(api_key=os.environ["GEMINI_API_KEY"])
	model = genai.GenerativeModel("models/gemini-2.0-flash")

	def rerank(query, candidates):
	# Add debug print
	print(f"Reranking {len(candidates)} candidates")
	if candidates:
	print(f"First candidate keys: {candidates[0].keys()}")

	# Ensure candidates have all required fields
	for candidate in candidates:
	for field in ["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]:
	if field not in candidate or candidate[field] is None:
	candidate[field] = "N/A" if field != "test_type" else []

	prompt = f"""
	Job description: "{query}"

	Candidate SHL assessments: {json.dumps(candidates, indent=2)}

	Rank the most relevant assessments for this job description and return a JSON object with the top 10 (or fewer if there aren't 10 good matches).

	Your response must be ONLY valid JSON in this exact format:
	{{
	"recommended_assessments": [
	{{
	"assessment_name": "Name of the assessment",
	"url": "https://www.shl.com/...",
	"remote_support": "Yes/No",
	"adaptive_support": "Yes/No",
	"duration": "duration value",
	"test_type": "test type value"
	}},
	...more assessments...
	]
	}}

	IMPORTANT RULES:
	1. Return ONLY valid JSON without any markdown, explanations, or code blocks
	2. Copy the URL exactly as provided in the input - do not modify URLs
	3. Include at most 10 assessments, ranked by relevance to the job description
	4. If a field is missing in the input, use "N/A" as the value
	5. Make sure each assessment has all required fields
	"""

	response = model.generate_content(prompt)
	response_text = response.text

	# Try to extract JSON from possible markdown code blocks
	json_match = re.search(r'``````', response_text, re.DOTALL)
	if json_match:
	response_text = json_match.group(1).strip()

	try:
	result = json.loads(response_text)

	# Validate the structure
	if "recommended_assessments" not in result:
	result = {"recommended_assessments": []}

	# Ensure all required fields exist in each assessment
	for assessment in result["recommended_assessments"]:
	for field in ["assessment_name", "url", "remote_support", "adaptive_support", "duration", "test_type"]:
	if field not in assessment:
	assessment[field] = "N/A"

	return result
	except Exception as e:
	print(f"Error parsing JSON: {e}")
	print(f"Raw response: {response_text}")
	return {"error": str(e), "recommended_assessments": []}