Spaces:
Sleeping
Sleeping
File size: 4,379 Bytes
8b6aa48 9e9d5ee 8b6aa48 9e9d5ee bef0a51 d56d4c8 bef0a51 d56d4c8 bef0a51 d56d4c8 bef0a51 d56d4c8 bef0a51 d56d4c8 bef0a51 d56d4c8 8b6aa48 bef0a51 8b6aa48 cbb8b01 9e9d5ee bef0a51 cbb8b01 d56d4c8 cbb8b01 bef0a51 cbb8b01 d56d4c8 cbb8b01 bef0a51 cbb8b01 bef0a51 cbb8b01 bef0a51 cbb8b01 bef0a51 8b6aa48 d56d4c8 8b6aa48 9e9d5ee 8b6aa48 9e9d5ee 8b6aa48 bef0a51 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import pandas as pd
import gradio as gr
from retriever import get_relevant_passages
from reranker import rerank
# Load and clean CSV
def clean_df(df):
df = df.copy()
# Extract the assessment name from the URL
df["assessment_name"] = df["Pre-packaged Job Solutions"].str.split('/').str[-2].str.replace('-', ' ').str.title()
# Create proper URLs
df["url"] = "https://www.shl.com" + df["Pre-packaged Job Solutions"]
# Convert T/F to Yes/No
df["remote_support"] = df["Remote Testing"].map(lambda x: "Yes" if x == "T" else "No")
df["adaptive_support"] = df["Adaptive/IRT"].map(lambda x: "Yes" if x == "T" else "No")
# Handle test_type properly - it's already in list format as a string
df["test_type"] = df["Test Type"]
# Keep the description as is
df["description"] = df["Description"]
# Extract duration with proper handling
df["duration"] = df["Assessment_Length"].str.extract(r'(\d+)').fillna("N/A")
# Select only the columns we need
return df[["assessment_name", "url", "remote_support", "adaptive_support",
"description", "duration", "test_type"]]
try:
df = pd.read_csv("assesments.csv")
df_clean = clean_df(df)
except Exception as e:
print(f"Error loading or cleaning data: {e}")
# Create an empty DataFrame with required columns as fallback
df_clean = pd.DataFrame(columns=["url", "adaptive_support", "remote_support",
"description", "duration", "test_type"])
def validate_and_fix_urls(candidates):
"""Validates and fixes URLs in candidate assessments."""
for candidate in candidates:
# Ensure URL exists
if 'url' not in candidate or not candidate['url']:
candidate['url'] = 'https://www.shl.com/missing-url'
continue
url = str(candidate['url'])
# Fix URLs that are just numbers
if url.isdigit() or (url.startswith('https://www.shl.com') and url[len('https://www.shl.com'):].isdigit()):
candidate['url'] = f"https://www.shl.com/{url.replace('https://www.shl.com', '')}"
continue
# Add protocol if missing
if not url.startswith(('http://', 'https://')):
candidate['url'] = f"https://{url}"
return candidates
def recommend(query):
if not query.strip():
return {"error": "Please enter a job description"}
try:
# Print some debug info
print(f"Processing query: {query[:50]}...")
print(f"DataFrame shape: {df_clean.shape}")
print(f"DataFrame columns: {df_clean.columns.tolist()}")
if df_clean.empty:
return {"error": "No assessment data available"}
# Print a sample row for debugging
print("Sample row:")
print(df_clean.iloc[0].to_dict())
top_k_df = get_relevant_passages(query, df_clean, top_k=20)
# Debug: Check retrieved data
print(f"Retrieved {len(top_k_df)} assessments")
if not top_k_df.empty:
print(f"Sample URLs from retrieval: {top_k_df['url'].iloc[:3].tolist()}")
candidates = top_k_df.to_dict(orient="records")
# Additional URL validation before sending to reranker
for c in candidates:
if 'url' in c:
if not str(c['url']).startswith(('http://', 'https://')):
c['url'] = f"https://www.shl.com/{str(c['url']).lstrip('/')}"
result = rerank(query, candidates)
# Post-process result to ensure URLs are properly formatted
if 'recommended_assessments' in result:
result['recommended_assessments'] = validate_and_fix_urls(result['recommended_assessments'])
return result
except Exception as e:
import traceback
error_details = traceback.format_exc()
print(f"Error: {str(e)}\n{error_details}")
return {"error": f"Error processing request: {str(e)}"}
iface = gr.Interface(
fn=recommend,
inputs=gr.Textbox(label="Enter Job Description", lines=4),
outputs="json",
title="SHL Assessment Recommender",
description="Paste a job description to get the most relevant SHL assessments."
)
if __name__ == "__main__":
iface.launch() |