Spaces:

AnshulS
/

endpointwebappshl

Sleeping

File size: 2,269 Bytes

8b6aa48
 
9e9d5ee
 
8b6aa48
 
9e9d5ee
 
bef0a51
 
 
 
 
 
 
 
 
 
9e9d5ee
 
bef0a51
 
9e9d5ee
bef0a51
9e9d5ee
bef0a51
 
 
 
 
 
 
9e9d5ee
8b6aa48
bef0a51
 
 
 
 
 
 
 
8b6aa48
9e9d5ee
bef0a51
 
 
 
 
 
 
 
 
 
8b6aa48
 
9e9d5ee
8b6aa48
 
9e9d5ee
 
8b6aa48
 
 
bef0a51

import pandas as pd
import gradio as gr
from retriever import get_relevant_passages
from reranker import rerank

# Load and clean CSV
def clean_df(df):
    df = df.copy()
    
    # Ensure clean URLs
    # Check if the second column contains URLs or just IDs
    second_col = df.iloc[:, 1].astype(str)
    if second_col.str.contains('http').any() or second_col.str.contains('www').any():
        df["url"] = second_col  # Already has full URLs
    else:
        # Create full URLs from IDs
        df["url"] = "https://www.shl.com/" + second_col.str.replace(r'^[\s/]*', '', regex=True)
    
    df["remote_support"] = df.iloc[:, 2].map(lambda x: "Yes" if x == "T" else "No")
    df["adaptive_support"] = df.iloc[:, 3].map(lambda x: "Yes" if x == "T" else "No")
    
    # Handle test_type with error checking
    df["test_type"] = df.iloc[:, 4].astype(str).str.split("\\n")
    
    df["description"] = df.iloc[:, 5]
    
    # Extract duration with error handling
    df["duration"] = pd.to_numeric(
        df.iloc[:, 8].astype(str).str.extract(r'(\d+)')[0], 
        errors='coerce'
    )
    
    return df[["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]]

try:
    df = pd.read_csv("assesments.csv")
    df_clean = clean_df(df)
except Exception as e:
    print(f"Error loading or cleaning data: {e}")
    # Create an empty DataFrame with required columns as fallback
    df_clean = pd.DataFrame(columns=["url", "adaptive_support", "remote_support", 
                                     "description", "duration", "test_type"])

def recommend(query):
    if not query.strip():
        return {"error": "Please enter a job description"}
    
    try:
        top_k_df = get_relevant_passages(query, df_clean, top_k=20)
        candidates = top_k_df.to_dict(orient="records")
        result = rerank(query, candidates)
        return result
    except Exception as e:
        return {"error": f"Error processing request: {str(e)}"}

iface = gr.Interface(
    fn=recommend,
    inputs=gr.Textbox(label="Enter Job Description", lines=4),
    outputs="json",
    title="SHL Assessment Recommender",
    description="Paste a job description to get the most relevant SHL assessments."
)

if __name__ == "__main__":
    iface.launch()