Spaces:

AnshulS
/

endpointwebappshl

Sleeping

File size: 3,216 Bytes

d93bcf7
 
c8d23f0
 
306d267
b1ba5f4
d93bcf7
 
306d267
d93bcf7
306d267
d93bcf7
 
3ed9ca7
 
 
 
fdb3da7
d93bcf7
 
 
 
 
 
c8d23f0
306d267
d93bcf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08dabce
d93bcf7
 
 
c8d23f0
 
d93bcf7
 
 
 
 
 
 
08dabce
d93bcf7
 
 
c8d23f0
 
306d267
b1ba5f4
cb71a6a
b1ba5f4
bb65b65
 
 
 
cb71a6a

import pandas as pd
import gradio as gr
from retriever import get_relevant_passages
from reranker import rerank

# === Load and Clean CSV ===
def clean_df(df):
    df = df.copy()
    second_col = df.iloc[:, 2].astype(str)
    if second_col.str.contains('http').any() or second_col.str.contains('www').any():
        df["url"] = second_col
    else:
        df["url"] = "https://www.shl.com" + second_col.str.replace(r'^(?!/)', '/', regex=True)
    df["remote_support"] = df.iloc[:, 3].map(lambda x: "Yes" if x == "T" else "No")
    df["adaptive_support"] = df.iloc[:, 4].map(lambda x: "Yes" if x == "T" else "No")
    df["test_type"] = df.iloc[:, 5].apply(lambda x: eval(x) if isinstance(x, str) else x)
    df["description"] = df.iloc[:, 6]
    df["duration"] = pd.to_numeric(df.iloc[:, 9].astype(str).str.extract(r'(\d+)')[0], errors='coerce')
    return df[["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]]

try:
    df = pd.read_csv("assesments.csv", encoding='utf-8')
    df_clean = clean_df(df)
except Exception as e:
    print(f"Error loading data: {e}")
    df_clean = pd.DataFrame(columns=["url", "adaptive_support", "remote_support", "description", "duration", "test_type"])

def validate_and_fix_urls(candidates):
    for candidate in candidates:
        if not isinstance(candidate, dict):
            continue
        if 'url' not in candidate or not candidate['url']:
            candidate['url'] = 'https://www.shl.com/missing-url'
            continue
        url = str(candidate['url'])
        if url.isdigit():
            candidate['url'] = f"https://www.shl.com/{url}"
            continue
        if not url.startswith(('http://', 'https://')):
            candidate['url'] = f"https://www.shl.com{url}" if url.startswith('/') else f"https://www.shl.com/{url}"
    return candidates

def recommend(query):
    if not query.strip():
        return {"error": "Please enter a job description"}
    try:
        top_k_df = get_relevant_passages(query, df_clean, top_k=20)
        if top_k_df.empty:
            return {"error": "No matching assessments found"}
        top_k_df['test_type'] = top_k_df['test_type'].apply(
            lambda x: x if isinstance(x, list) else
            (eval(x) if isinstance(x, str) and x.startswith('[') else [str(x)])
        )
        top_k_df['duration'] = top_k_df['duration'].fillna(-1).astype(int)
        top_k_df.loc[top_k_df['duration'] == -1, 'duration'] = None
        candidates = top_k_df.to_dict(orient="records")
        candidates = validate_and_fix_urls(candidates)
        result = rerank(query, candidates)
        if 'recommended_assessments' in result:
            result['recommended_assessments'] = validate_and_fix_urls(result['recommended_assessments'])
        return result
    except Exception as e:
        import traceback
        print(traceback.format_exc())
        return {"error": f"Error processing request: {str(e)}"}

# === Gradio UI ===
gr.Interface(
    fn=recommend,
    inputs=gr.Textbox(label="Enter Job Description", lines=4),
    outputs="json",
    title="SHL Assessment Recommender",
    description="Paste a job description to get the most relevant SHL assessments."
).launch()