AnshulS commited on
Commit
9e9d5ee
·
verified ·
1 Parent(s): 2133db4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -66
app.py CHANGED
@@ -1,80 +1,34 @@
1
- # app.py
2
- import os
3
- import json
4
  import pandas as pd
5
- import google.generativeai as genai
6
  import gradio as gr
7
-
8
- # Configure Gemini
9
- genai.configure(api_key=os.environ["GEMINI_API_KEY"])
10
- model = genai.GenerativeModel("gemini-2.0-flash")
11
 
12
  # Load and clean CSV
13
- df_raw = pd.read_csv("assesments.csv")
14
-
15
- def preprocess_data(df):
16
- def clean_duration(text):
17
- try:
18
- return int(text.split('=')[-1].strip())
19
- except:
20
- return None
21
-
22
- def clean_support(val):
23
- return "Yes" if val == 'T' else "No"
24
-
25
- def clean_test_type(val):
26
- return [x.strip() for x in str(val).split('\n') if x.strip()]
27
-
28
- df_cleaned = pd.DataFrame({
29
- "url": "https://www.shl.com" + df.iloc[:, 1].astype(str),
30
- "remote_support": df.iloc[:, 2].apply(clean_support),
31
- "adaptive_support": df.iloc[:, 3].apply(clean_support),
32
- "test_type": df.iloc[:, 4].apply(clean_test_type),
33
- "description": df.iloc[:, 5],
34
- "duration": df.iloc[:, 8].apply(clean_duration),
35
- })
36
- return df_cleaned
37
-
38
- assessments = preprocess_data(df_raw)
39
-
40
- def recommend_assessments(query, top_k=10):
41
- prompt = f"""
42
- Given this job description: "{query}", recommend the top {top_k} relevant SHL assessments from the following list.
43
- Return the result as JSON with this format:
44
- {{
45
- "recommended_assessments": [
46
- {{
47
- "url": ...,
48
- "adaptive_support": ...,
49
- "remote_support": ...,
50
- "description": ...,
51
- "duration": ...,
52
- "test_type": [...]
53
- }},
54
- ...
55
- ]
56
- }}
57
 
58
- Data:
59
- {assessments.to_dict(orient='records')}
60
- """
 
 
 
 
 
 
61
 
62
- response = model.generate_content(prompt)
63
- try:
64
- result = json.loads(response.text)
65
- return result
66
- except Exception as e:
67
- return {"error": f"Failed to parse response: {str(e)}\n{response.text}"}
68
 
69
- def predict(query):
70
- return recommend_assessments(query)
 
 
71
 
72
  iface = gr.Interface(
73
- fn=predict,
74
  inputs=gr.Textbox(label="Enter Job Description", lines=4),
75
  outputs="json",
76
- title="SHL Assessment Recommender (Gemini-powered)",
77
- description="Paste a job description and get the most relevant SHL assessments."
78
  )
79
 
80
  if __name__ == "__main__":
 
 
 
 
1
  import pandas as pd
 
2
  import gradio as gr
3
+ from retriever import get_relevant_passages
4
+ from reranker import rerank
 
 
5
 
6
  # Load and clean CSV
7
+ df = pd.read_csv("assessments.csv")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ def clean_df(df):
10
+ df = df.copy()
11
+ df["url"] = "https://www.shl.com" + df.iloc[:, 1]
12
+ df["remote_support"] = df.iloc[:, 2].map(lambda x: "Yes" if x == "T" else "No")
13
+ df["adaptive_support"] = df.iloc[:, 3].map(lambda x: "Yes" if x == "T" else "No")
14
+ df["test_type"] = df.iloc[:, 4].astype(str).str.split("\\n")
15
+ df["description"] = df.iloc[:, 5]
16
+ df["duration"] = df.iloc[:, 8].astype(str).str.extract(r'(\d+)').astype(float)
17
+ return df[["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]]
18
 
19
+ df_clean = clean_df(df)
 
 
 
 
 
20
 
21
+ def recommend(query):
22
+ top_k_df = get_relevant_passages(query, df_clean, top_k=20)
23
+ candidates = top_k_df.to_dict(orient="records")
24
+ return rerank(query, candidates)
25
 
26
  iface = gr.Interface(
27
+ fn=recommend,
28
  inputs=gr.Textbox(label="Enter Job Description", lines=4),
29
  outputs="json",
30
+ title="SHL Assessment Recommender",
31
+ description="Paste a job description to get the most relevant SHL assessments."
32
  )
33
 
34
  if __name__ == "__main__":