Spaces:
Sleeping
Sleeping
File size: 4,514 Bytes
d93bcf7 c8d23f0 306d267 b0d04b3 306d267 c8d23f0 306d267 fdb3da7 c8d23f0 d93bcf7 fdb3da7 d93bcf7 306d267 c8d23f0 d93bcf7 306d267 d93bcf7 c8d23f0 3ed9ca7 fdb3da7 c8d23f0 d93bcf7 c8d23f0 306d267 d93bcf7 fdb3da7 d93bcf7 fdb3da7 d93bcf7 08dabce d93bcf7 c8d23f0 d93bcf7 08dabce d93bcf7 c8d23f0 306d267 c8d23f0 fdb3da7 c8d23f0 fdb3da7 306d267 354f738 fdb3da7 354f738 c8d23f0 fdb3da7 306d267 fdb3da7 354f738 fdb3da7 354f738 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import pandas as pd
import gradio as gr
import json
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
from gradio.routes import mount_gradio_app
from retriever import get_relevant_passages
from reranker import rerank
# --- Initialize FastAPI ---
app = FastAPI()
# --- Load and clean CSV ---
def clean_df(df):
df = df.copy()
second_col = df.iloc[:, 2].astype(str)
if second_col.str.contains('http').any() or second_col.str.contains('www').any():
df["url"] = second_col
else:
df["url"] = "https://www.shl.com" + second_col.str.replace(r'^(?!/)', '/', regex=True)
df["remote_support"] = df.iloc[:, 3].map(lambda x: "Yes" if x == "T" else "No")
df["adaptive_support"] = df.iloc[:, 4].map(lambda x: "Yes" if x == "T" else "No")
df["test_type"] = df.iloc[:, 5].apply(lambda x: eval(x) if isinstance(x, str) else x)
df["description"] = df.iloc[:, 6]
df["duration"] = pd.to_numeric(df.iloc[:, 9].astype(str).str.extract(r'(\d+)')[0], errors='coerce')
return df[["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]]
try:
df = pd.read_csv("assesments.csv", encoding='utf-8')
df_clean = clean_df(df)
except Exception as e:
print(f"Error loading data: {e}")
df_clean = pd.DataFrame(columns=["url", "adaptive_support", "remote_support", "description", "duration", "test_type"])
# --- Fix URLs ---
def validate_and_fix_urls(candidates):
for candidate in candidates:
if not isinstance(candidate, dict):
continue
if 'url' not in candidate or not candidate['url']:
candidate['url'] = 'https://www.shl.com/missing-url'
continue
url = str(candidate['url'])
if url.isdigit():
candidate['url'] = f"https://www.shl.com/{url}"
continue
if not url.startswith(('http://', 'https://')):
candidate['url'] = f"https://www.shl.com{url}" if url.startswith('/') else f"https://www.shl.com/{url}"
return candidates
# --- Recommendation Logic ---
def recommend(query):
if not query.strip():
return {"error": "Please enter a job description"}
try:
top_k_df = get_relevant_passages(query, df_clean, top_k=20)
if top_k_df.empty:
return {"error": "No matching assessments found"}
top_k_df['test_type'] = top_k_df['test_type'].apply(
lambda x: x if isinstance(x, list) else
(eval(x) if isinstance(x, str) and x.startswith('[') else [str(x)])
)
top_k_df['duration'] = top_k_df['duration'].fillna(-1).astype(int)
top_k_df.loc[top_k_df['duration'] == -1, 'duration'] = None
candidates = top_k_df.to_dict(orient="records")
candidates = validate_and_fix_urls(candidates)
result = rerank(query, candidates)
if 'recommended_assessments' in result:
result['recommended_assessments'] = validate_and_fix_urls(result['recommended_assessments'])
return result
except Exception as e:
import traceback
print(traceback.format_exc())
return {"error": f"Error processing request: {str(e)}"}
# --- Gradio Interface ---
iface = gr.Interface(
fn=recommend,
inputs=gr.Textbox(label="Enter Job Description", lines=4),
outputs="json",
title="SHL Assessment Recommender",
description="Paste a job description to get the most relevant SHL assessments."
)
# ✅ Mount Gradio UI without overwriting `app`
mount_gradio_app(app, iface, path="/")
# --- Health Endpoint ---
@app.get("/health")
async def health_check():
return JSONResponse(
content={"status": "healthy"},
media_type="application/json",
status_code=200
)
# --- API Recommendation Endpoint ---
@app.post("/recommend")
async def recommend_api(request: Request):
try:
body = await request.json()
query = body.get("query", "").strip()
if not query:
return JSONResponse(
content={"error": "Missing 'query' in request body"},
media_type="application/json",
status_code=400
)
result = recommend(query)
return JSONResponse(
content=result,
media_type="application/json",
status_code=200
)
except Exception as e:
return JSONResponse(
content={"error": str(e)},
media_type="application/json",
status_code=500
)
|