AnshulS commited on
Commit
306d267
·
verified ·
1 Parent(s): a14fc94

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -82
app.py CHANGED
@@ -2,139 +2,100 @@ import pandas as pd
2
  import gradio as gr
3
  from retriever import get_relevant_passages
4
  from reranker import rerank
5
- import gradio as gr
6
- import json
7
- from fastapi import FastAPI
8
  from fastapi.responses import JSONResponse
9
- from your_module import app
10
- # This is needed for Hugging Face Spaces
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Load and clean CSV
13
  def clean_df(df):
14
  df = df.copy()
15
-
16
- # Get column names for reference
17
- print(f"Original columns: {df.columns}")
18
-
19
- # Ensure clean URLs from the second column
20
- second_col = df.iloc[:, 2].astype(str) # Pre-packaged Job Solutions column
21
-
22
  if second_col.str.contains('http').any() or second_col.str.contains('www').any():
23
- df["url"] = second_col # Already has full URLs
24
  else:
25
- # Create full URLs from IDs
26
  df["url"] = "https://www.shl.com" + second_col.str.replace(r'^(?!/)', '/', regex=True)
27
-
28
- # Map T/F to Yes/No for remote testing and adaptive support
29
  df["remote_support"] = df.iloc[:, 3].map(lambda x: "Yes" if x == "T" else "No")
30
  df["adaptive_support"] = df.iloc[:, 4].map(lambda x: "Yes" if x == "T" else "No")
31
-
32
- # Handle test_type properly - convert string representation of list to actual list
33
  df["test_type"] = df.iloc[:, 5].apply(lambda x: eval(x) if isinstance(x, str) else x)
34
-
35
- # Get description from column 7
36
  df["description"] = df.iloc[:, 6]
37
-
38
- # Extract duration with error handling from column 10
39
- df["duration"] = pd.to_numeric(
40
- df.iloc[:, 9].astype(str).str.extract(r'(\d+)')[0],
41
- errors='coerce'
42
- )
43
-
44
- # Print sample of cleaned data for debugging
45
- print(f"Sample of cleaned data: {df[['url', 'adaptive_support', 'remote_support', 'description', 'duration', 'test_type']].head(2)}")
46
-
47
  return df[["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]]
48
 
49
  try:
50
- # Load CSV with explicit encoding
51
  df = pd.read_csv("assesments.csv", encoding='utf-8')
52
- print(f"CSV loaded successfully with {len(df)} rows")
53
  df_clean = clean_df(df)
54
  except Exception as e:
55
- print(f"Error loading or cleaning data: {e}")
56
- # Create an empty DataFrame with required columns as fallback
57
- df_clean = pd.DataFrame(columns=["url", "adaptive_support", "remote_support",
58
- "description", "duration", "test_type"])
59
 
60
  def validate_and_fix_urls(candidates):
61
- """Validates and fixes URLs in candidate assessments."""
62
  for candidate in candidates:
63
- # Skip if candidate is not a dictionary
64
  if not isinstance(candidate, dict):
65
  continue
66
-
67
- # Ensure URL exists
68
  if 'url' not in candidate or not candidate['url']:
69
  candidate['url'] = 'https://www.shl.com/missing-url'
70
  continue
71
-
72
  url = str(candidate['url'])
73
-
74
- # Fix URLs that are just numbers
75
  if url.isdigit():
76
  candidate['url'] = f"https://www.shl.com/{url}"
77
  continue
78
-
79
- # Add protocol if missing
80
  if not url.startswith(('http://', 'https://')):
81
  candidate['url'] = f"https://www.shl.com{url}" if url.startswith('/') else f"https://www.shl.com/{url}"
82
-
83
  return candidates
84
 
85
  def recommend(query):
86
  if not query.strip():
87
  return {"error": "Please enter a job description"}
88
-
89
  try:
90
- # Print some debug info
91
- print(f"Processing query: {query[:50]}...")
92
-
93
- # Get relevant passages
94
  top_k_df = get_relevant_passages(query, df_clean, top_k=20)
95
-
96
- # Debug: Check if we got any results
97
- print(f"Retrieved {len(top_k_df)} assessments")
98
-
99
  if top_k_df.empty:
100
  return {"error": "No matching assessments found"}
101
-
102
- # Convert test_type to list if it's not already
103
  top_k_df['test_type'] = top_k_df['test_type'].apply(
104
  lambda x: x if isinstance(x, list) else
105
  (eval(x) if isinstance(x, str) and x.startswith('[') else [str(x)])
106
  )
107
-
108
- # Handle nan values for duration
109
  top_k_df['duration'] = top_k_df['duration'].fillna(-1).astype(int)
110
  top_k_df.loc[top_k_df['duration'] == -1, 'duration'] = None
111
-
112
- # Convert DataFrame to list of dictionaries
113
  candidates = top_k_df.to_dict(orient="records")
114
-
115
- # Additional URL validation
116
  candidates = validate_and_fix_urls(candidates)
117
-
118
- # Print sample of data being sent to reranker
119
- if candidates:
120
- print(f"Sample candidate being sent to reranker: {candidates[0]}")
121
-
122
- # Get recommendations
123
  result = rerank(query, candidates)
124
-
125
- # Post-process result
126
  if 'recommended_assessments' in result:
127
  result['recommended_assessments'] = validate_and_fix_urls(result['recommended_assessments'])
128
- print(f"Returning {len(result['recommended_assessments'])} recommended assessments")
129
-
130
  return result
131
  except Exception as e:
132
  import traceback
133
- error_details = traceback.format_exc()
134
- print(f"Error: {str(e)}\n{error_details}")
135
- return {"error": f"Error processing request: {str(e)}"}
 
 
 
136
 
137
- iface = gr.Interface(
 
 
 
 
 
 
 
 
 
 
138
  fn=recommend,
139
  inputs=gr.Textbox(label="Enter Job Description", lines=4),
140
  outputs="json",
@@ -142,8 +103,7 @@ iface = gr.Interface(
142
  description="Paste a job description to get the most relevant SHL assessments."
143
  )
144
 
145
- if __name__ == "__main__":
146
-
147
- import uvicorn
148
- uvicorn.run(app, host="0.0.0.0", port=7860)
149
- iface.launch()
 
2
  import gradio as gr
3
  from retriever import get_relevant_passages
4
  from reranker import rerank
5
+ from fastapi import FastAPI, Request
 
 
6
  from fastapi.responses import JSONResponse
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from gradio.routes import mount_gradio_app
9
+ import json
10
+
11
+ # Define FastAPI app
12
+ app = FastAPI()
13
+
14
+ # Enable CORS for Spaces
15
+ app.add_middleware(
16
+ CORSMiddleware,
17
+ allow_origins=["*"],
18
+ allow_credentials=True,
19
+ allow_methods=["*"],
20
+ allow_headers=["*"],
21
+ )
22
 
23
  # Load and clean CSV
24
  def clean_df(df):
25
  df = df.copy()
26
+ second_col = df.iloc[:, 2].astype(str)
 
 
 
 
 
 
27
  if second_col.str.contains('http').any() or second_col.str.contains('www').any():
28
+ df["url"] = second_col
29
  else:
 
30
  df["url"] = "https://www.shl.com" + second_col.str.replace(r'^(?!/)', '/', regex=True)
 
 
31
  df["remote_support"] = df.iloc[:, 3].map(lambda x: "Yes" if x == "T" else "No")
32
  df["adaptive_support"] = df.iloc[:, 4].map(lambda x: "Yes" if x == "T" else "No")
 
 
33
  df["test_type"] = df.iloc[:, 5].apply(lambda x: eval(x) if isinstance(x, str) else x)
 
 
34
  df["description"] = df.iloc[:, 6]
35
+ df["duration"] = pd.to_numeric(df.iloc[:, 9].astype(str).str.extract(r'(\d+)')[0], errors='coerce')
 
 
 
 
 
 
 
 
 
36
  return df[["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]]
37
 
38
  try:
 
39
  df = pd.read_csv("assesments.csv", encoding='utf-8')
 
40
  df_clean = clean_df(df)
41
  except Exception as e:
42
+ print(f"Error loading CSV: {e}")
43
+ df_clean = pd.DataFrame(columns=["url", "adaptive_support", "remote_support", "description", "duration", "test_type"])
 
 
44
 
45
  def validate_and_fix_urls(candidates):
 
46
  for candidate in candidates:
 
47
  if not isinstance(candidate, dict):
48
  continue
 
 
49
  if 'url' not in candidate or not candidate['url']:
50
  candidate['url'] = 'https://www.shl.com/missing-url'
51
  continue
 
52
  url = str(candidate['url'])
 
 
53
  if url.isdigit():
54
  candidate['url'] = f"https://www.shl.com/{url}"
55
  continue
 
 
56
  if not url.startswith(('http://', 'https://')):
57
  candidate['url'] = f"https://www.shl.com{url}" if url.startswith('/') else f"https://www.shl.com/{url}"
 
58
  return candidates
59
 
60
  def recommend(query):
61
  if not query.strip():
62
  return {"error": "Please enter a job description"}
 
63
  try:
 
 
 
 
64
  top_k_df = get_relevant_passages(query, df_clean, top_k=20)
 
 
 
 
65
  if top_k_df.empty:
66
  return {"error": "No matching assessments found"}
 
 
67
  top_k_df['test_type'] = top_k_df['test_type'].apply(
68
  lambda x: x if isinstance(x, list) else
69
  (eval(x) if isinstance(x, str) and x.startswith('[') else [str(x)])
70
  )
 
 
71
  top_k_df['duration'] = top_k_df['duration'].fillna(-1).astype(int)
72
  top_k_df.loc[top_k_df['duration'] == -1, 'duration'] = None
 
 
73
  candidates = top_k_df.to_dict(orient="records")
 
 
74
  candidates = validate_and_fix_urls(candidates)
 
 
 
 
 
 
75
  result = rerank(query, candidates)
 
 
76
  if 'recommended_assessments' in result:
77
  result['recommended_assessments'] = validate_and_fix_urls(result['recommended_assessments'])
 
 
78
  return result
79
  except Exception as e:
80
  import traceback
81
+ return {"error": f"Error: {str(e)}", "trace": traceback.format_exc()}
82
+
83
+ # --- API Endpoints ---
84
+ @app.get("/health")
85
+ async def health_check():
86
+ return JSONResponse(status_code=200, content={"status": "healthy"})
87
 
88
+ @app.post("/recommend")
89
+ async def recommend_api(request: Request):
90
+ body = await request.json()
91
+ query = body.get("query", "").strip()
92
+ if not query:
93
+ return JSONResponse(status_code=400, content={"error": "Missing 'query' in request body"})
94
+ result = recommend(query)
95
+ return JSONResponse(status_code=200, content=result)
96
+
97
+ # --- Gradio UI ---
98
+ gradio_iface = gr.Interface(
99
  fn=recommend,
100
  inputs=gr.Textbox(label="Enter Job Description", lines=4),
101
  outputs="json",
 
103
  description="Paste a job description to get the most relevant SHL assessments."
104
  )
105
 
106
+ # Mount Gradio app at root
107
+ app = mount_gradio_app(app, gradio_iface, path="/")
108
+
109
+ # Hugging Face Spaces runs `app` object, so no need for __main__