AnshulS's picture
Update app.py
9729849 verified
raw
history blame
4.38 kB
import pandas as pd
import gradio as gr
import json
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
from gradio.routes import mount_gradio_app
from retriever import get_relevant_passages
from reranker import rerank
# FastAPI app instance
app = FastAPI()
# --- CSV Loading and Cleaning ---
def clean_df(df):
df = df.copy()
second_col = df.iloc[:, 2].astype(str)
if second_col.str.contains('http').any() or second_col.str.contains('www').any():
df["url"] = second_col
else:
df["url"] = "https://www.shl.com" + second_col.str.replace(r'^(?!/)', '/', regex=True)
df["remote_support"] = df.iloc[:, 3].map(lambda x: "Yes" if x == "T" else "No")
df["adaptive_support"] = df.iloc[:, 4].map(lambda x: "Yes" if x == "T" else "No")
df["test_type"] = df.iloc[:, 5].apply(lambda x: eval(x) if isinstance(x, str) else x)
df["description"] = df.iloc[:, 6]
df["duration"] = pd.to_numeric(
df.iloc[:, 9].astype(str).str.extract(r'(\d+)')[0],
errors='coerce'
)
return df[["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]]
try:
df = pd.read_csv("assesments.csv", encoding='utf-8')
df_clean = clean_df(df)
except Exception as e:
print(f"Error loading data: {e}")
df_clean = pd.DataFrame(columns=["url", "adaptive_support", "remote_support", "description", "duration", "test_type"])
# --- Utility ---
def validate_and_fix_urls(candidates):
for candidate in candidates:
if not isinstance(candidate, dict):
continue
if 'url' not in candidate or not candidate['url']:
candidate['url'] = 'https://www.shl.com/missing-url'
continue
url = str(candidate['url'])
if url.isdigit():
candidate['url'] = f"https://www.shl.com/{url}"
continue
if not url.startswith(('http://', 'https://')):
candidate['url'] = f"https://www.shl.com{url}" if url.startswith('/') else f"https://www.shl.com/{url}"
return candidates
# --- Core Recommend Logic ---
def recommend(query):
if not query.strip():
return {"error": "Please enter a job description"}
try:
top_k_df = get_relevant_passages(query, df_clean, top_k=20)
if top_k_df.empty:
return {"error": "No matching assessments found"}
top_k_df['test_type'] = top_k_df['test_type'].apply(
lambda x: x if isinstance(x, list) else
(eval(x) if isinstance(x, str) and x.startswith('[') else [str(x)])
)
top_k_df['duration'] = top_k_df['duration'].fillna(-1).astype(int)
top_k_df.loc[top_k_df['duration'] == -1, 'duration'] = None
candidates = top_k_df.to_dict(orient="records")
candidates = validate_and_fix_urls(candidates)
result = rerank(query, candidates)
if 'recommended_assessments' in result:
result['recommended_assessments'] = validate_and_fix_urls(result['recommended_assessments'])
return result
except Exception as e:
import traceback
print(traceback.format_exc())
return {"error": f"Error processing request: {str(e)}"}
# --- Gradio Interface ---
iface = gr.Interface(
fn=recommend,
inputs=gr.Textbox(label="Enter Job Description", lines=4),
outputs="json",
title="SHL Assessment Recommender",
description="Paste a job description to get the most relevant SHL assessments."
)
mount_gradio_app(app, iface, path="/")
# --- Mount Gradio at "/" ---
#app = mount_gradio_app(app, iface, path="/")
# --- /health Endpoint ---
@app.get("/health", response_class=JSONResponse)
async def health_check():
return JSONResponse(
status_code=200,
content={"status": "healthy"},
media_type="application/json"
)
# --- /recommend Endpoint ---
@app.post("/recommend", response_class=JSONResponse)
async def recommend_api(request: Request):
body = await request.json()
query = body.get("query", "").strip()
if not query:
return JSONResponse(
status_code=400,
content={"error": "Missing 'query' in request body"},
media_type="application/json"
)
result = recommend(query)
return JSONResponse(
status_code=200,
content=result,
media_type="application/json"
)