File size: 4,375 Bytes
d93bcf7
 
c8d23f0
306d267
b0d04b3
306d267
 
c8d23f0
 
306d267
c8d23f0
 
d93bcf7
c8d23f0
d93bcf7
 
306d267
c8d23f0
d93bcf7
306d267
d93bcf7
 
c8d23f0
3ed9ca7
 
 
 
c8d23f0
 
 
 
 
 
d93bcf7
 
 
 
 
 
c8d23f0
306d267
d93bcf7
c8d23f0
d93bcf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8d23f0
d93bcf7
 
 
 
08dabce
d93bcf7
 
 
c8d23f0
 
d93bcf7
 
 
 
 
 
 
08dabce
d93bcf7
 
 
c8d23f0
 
306d267
c8d23f0
 
 
 
 
 
 
 
 
9729849
c8d23f0
9729849
c8d23f0
 
 
306d267
354f738
 
 
 
 
c8d23f0
 
 
306d267
 
 
 
354f738
 
 
 
 
306d267
354f738
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import pandas as pd
import gradio as gr
import json
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
from gradio.routes import mount_gradio_app

from retriever import get_relevant_passages
from reranker import rerank

# FastAPI app instance
app = FastAPI()

# --- CSV Loading and Cleaning ---
def clean_df(df):
    df = df.copy()
    second_col = df.iloc[:, 2].astype(str)

    if second_col.str.contains('http').any() or second_col.str.contains('www').any():
        df["url"] = second_col
    else:
        df["url"] = "https://www.shl.com" + second_col.str.replace(r'^(?!/)', '/', regex=True)

    df["remote_support"] = df.iloc[:, 3].map(lambda x: "Yes" if x == "T" else "No")
    df["adaptive_support"] = df.iloc[:, 4].map(lambda x: "Yes" if x == "T" else "No")
    df["test_type"] = df.iloc[:, 5].apply(lambda x: eval(x) if isinstance(x, str) else x)
    df["description"] = df.iloc[:, 6]

    df["duration"] = pd.to_numeric(
        df.iloc[:, 9].astype(str).str.extract(r'(\d+)')[0],
        errors='coerce'
    )

    return df[["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]]

try:
    df = pd.read_csv("assesments.csv", encoding='utf-8')
    df_clean = clean_df(df)
except Exception as e:
    print(f"Error loading data: {e}")
    df_clean = pd.DataFrame(columns=["url", "adaptive_support", "remote_support", "description", "duration", "test_type"])

# --- Utility ---
def validate_and_fix_urls(candidates):
    for candidate in candidates:
        if not isinstance(candidate, dict):
            continue
        if 'url' not in candidate or not candidate['url']:
            candidate['url'] = 'https://www.shl.com/missing-url'
            continue
        url = str(candidate['url'])
        if url.isdigit():
            candidate['url'] = f"https://www.shl.com/{url}"
            continue
        if not url.startswith(('http://', 'https://')):
            candidate['url'] = f"https://www.shl.com{url}" if url.startswith('/') else f"https://www.shl.com/{url}"
    return candidates

# --- Core Recommend Logic ---
def recommend(query):
    if not query.strip():
        return {"error": "Please enter a job description"}
    try:
        top_k_df = get_relevant_passages(query, df_clean, top_k=20)
        if top_k_df.empty:
            return {"error": "No matching assessments found"}
        top_k_df['test_type'] = top_k_df['test_type'].apply(
            lambda x: x if isinstance(x, list) else
            (eval(x) if isinstance(x, str) and x.startswith('[') else [str(x)])
        )
        top_k_df['duration'] = top_k_df['duration'].fillna(-1).astype(int)
        top_k_df.loc[top_k_df['duration'] == -1, 'duration'] = None
        candidates = top_k_df.to_dict(orient="records")
        candidates = validate_and_fix_urls(candidates)
        result = rerank(query, candidates)
        if 'recommended_assessments' in result:
            result['recommended_assessments'] = validate_and_fix_urls(result['recommended_assessments'])
        return result
    except Exception as e:
        import traceback
        print(traceback.format_exc())
        return {"error": f"Error processing request: {str(e)}"}

# --- Gradio Interface ---
iface = gr.Interface(
    fn=recommend,
    inputs=gr.Textbox(label="Enter Job Description", lines=4),
    outputs="json",
    title="SHL Assessment Recommender",
    description="Paste a job description to get the most relevant SHL assessments."
)

mount_gradio_app(app, iface, path="/") 
# --- Mount Gradio at "/" ---
#app = mount_gradio_app(app, iface, path="/")

# --- /health Endpoint ---
@app.get("/health", response_class=JSONResponse)
async def health_check():
    return JSONResponse(
        status_code=200,
        content={"status": "healthy"},
        media_type="application/json"
    )

# --- /recommend Endpoint ---
@app.post("/recommend", response_class=JSONResponse)
async def recommend_api(request: Request):
    body = await request.json()
    query = body.get("query", "").strip()
    if not query:
        return JSONResponse(
            status_code=400,
            content={"error": "Missing 'query' in request body"},
            media_type="application/json"
        )
    result = recommend(query)
    return JSONResponse(
        status_code=200,
        content=result,
        media_type="application/json"
    )