File size: 4,311 Bytes
d93bcf7
 
c8d23f0
bb65b65
b0d04b3
c8d23f0
 
306d267
bb65b65
d93bcf7
 
306d267
bb65b65
d93bcf7
306d267
d93bcf7
 
bb65b65
3ed9ca7
 
 
 
fdb3da7
c8d23f0
d93bcf7
 
bb65b65
d93bcf7
 
 
 
c8d23f0
306d267
d93bcf7
bb65b65
d93bcf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb65b65
d93bcf7
 
 
 
08dabce
d93bcf7
 
 
c8d23f0
 
d93bcf7
 
 
 
 
 
 
08dabce
d93bcf7
 
 
c8d23f0
 
306d267
bb65b65
 
 
 
 
 
 
 
 
 
 
 
 
acd9b52
 
 
 
bb65b65
acd9b52
 
 
 
 
 
 
 
 
 
 
 
bb65b65
 
c8d23f0
ba3e546
 
bb65b65
ba3e546
bb65b65
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import pandas as pd
import gradio as gr
import json
from fastapi import FastAPI
from fastapi.responses import JSONResponse
from retriever import get_relevant_passages
from reranker import rerank

# Load and clean CSV
def clean_df(df):
    df = df.copy()
    second_col = df.iloc[:, 2].astype(str)
    
    if second_col.str.contains('http').any() or second_col.str.contains('www').any():
        df["url"] = second_col
    else:
        df["url"] = "https://www.shl.com" + second_col.str.replace(r'^(?!/)', '/', regex=True)
    
    df["remote_support"] = df.iloc[:, 3].map(lambda x: "Yes" if x == "T" else "No")
    df["adaptive_support"] = df.iloc[:, 4].map(lambda x: "Yes" if x == "T" else "No")
    df["test_type"] = df.iloc[:, 5].apply(lambda x: eval(x) if isinstance(x, str) else x)
    df["description"] = df.iloc[:, 6]
    df["duration"] = pd.to_numeric(df.iloc[:, 9].astype(str).str.extract(r'(\d+)')[0], errors='coerce')

    return df[["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]]

# Load data and clean
try:
    df = pd.read_csv("assesments.csv", encoding='utf-8')
    df_clean = clean_df(df)
except Exception as e:
    print(f"Error loading data: {e}")
    df_clean = pd.DataFrame(columns=["url", "adaptive_support", "remote_support", "description", "duration", "test_type"])

# Fix URLs
def validate_and_fix_urls(candidates):
    for candidate in candidates:
        if not isinstance(candidate, dict):
            continue
        if 'url' not in candidate or not candidate['url']:
            candidate['url'] = 'https://www.shl.com/missing-url'
            continue
        url = str(candidate['url'])
        if url.isdigit():
            candidate['url'] = f"https://www.shl.com/{url}"
            continue
        if not url.startswith(('http://', 'https://')):
            candidate['url'] = f"https://www.shl.com{url}" if url.startswith('/') else f"https://www.shl.com/{url}"
    return candidates

# Recommendation Logic
def recommend(query):
    if not query.strip():
        return {"error": "Please enter a job description"}
    try:
        top_k_df = get_relevant_passages(query, df_clean, top_k=20)
        if top_k_df.empty:
            return {"error": "No matching assessments found"}
        top_k_df['test_type'] = top_k_df['test_type'].apply(
            lambda x: x if isinstance(x, list) else
            (eval(x) if isinstance(x, str) and x.startswith('[') else [str(x)])
        )
        top_k_df['duration'] = top_k_df['duration'].fillna(-1).astype(int)
        top_k_df.loc[top_k_df['duration'] == -1, 'duration'] = None
        candidates = top_k_df.to_dict(orient="records")
        candidates = validate_and_fix_urls(candidates)
        result = rerank(query, candidates)
        if 'recommended_assessments' in result:
            result['recommended_assessments'] = validate_and_fix_urls(result['recommended_assessments'])
        return result
    except Exception as e:
        import traceback
        print(traceback.format_exc())
        return {"error": f"Error processing request: {str(e)}"}

# Initialize Gradio App
def gradio_interface(query):
    return recommend(query)

iface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Textbox(label="Enter Job Description", lines=4),
    outputs="json",
    title="SHL Assessment Recommender",
    description="Paste a job description to get the most relevant SHL assessments."
)

# FastAPI-like Health Endpoint in Gradio
@app.get("/health")
async def health():
    return JSONResponse(content={"status": "healthy"}, status_code=200)

# FastAPI-like Recommendation Endpoint in Gradio
@app.post("/recommend")
async def recommend_api(request: Request):
    try:
        data = await request.json()
        query = data.get("query", "").strip()
        if not query:
            return JSONResponse(content={"error": "Missing query"}, status_code=400)
        result = recommend(query)
        return JSONResponse(content=result, status_code=200)
    except Exception as e:
        return JSONResponse(content={"error": str(e)}, status_code=500)

# Use Gradio app with the FastAPI app
app = FastAPI()

@app.on_event("startup")
async def startup():
    # Launch Gradio app when FastAPI app starts
    iface.launch(inline=True, server_name="0.0.0.0", server_port=7860)