AnshulS commited on
Commit
cbb8b01
·
verified ·
1 Parent(s): bef0a51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -0
app.py CHANGED
@@ -41,16 +41,61 @@ except Exception as e:
41
  df_clean = pd.DataFrame(columns=["url", "adaptive_support", "remote_support",
42
  "description", "duration", "test_type"])
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  def recommend(query):
45
  if not query.strip():
46
  return {"error": "Please enter a job description"}
47
 
48
  try:
 
 
 
49
  top_k_df = get_relevant_passages(query, df_clean, top_k=20)
 
 
 
 
 
 
50
  candidates = top_k_df.to_dict(orient="records")
 
 
 
 
 
 
 
51
  result = rerank(query, candidates)
 
 
 
 
 
52
  return result
53
  except Exception as e:
 
 
 
54
  return {"error": f"Error processing request: {str(e)}"}
55
 
56
  iface = gr.Interface(
 
41
  df_clean = pd.DataFrame(columns=["url", "adaptive_support", "remote_support",
42
  "description", "duration", "test_type"])
43
 
44
+ def validate_and_fix_urls(candidates):
45
+ """Validates and fixes URLs in candidate assessments."""
46
+ for candidate in candidates:
47
+ # Ensure URL exists
48
+ if 'url' not in candidate or not candidate['url']:
49
+ candidate['url'] = 'https://www.shl.com/missing-url'
50
+ continue
51
+
52
+ url = str(candidate['url'])
53
+
54
+ # Fix URLs that are just numbers
55
+ if url.isdigit() or (url.startswith('https://www.shl.com') and url[len('https://www.shl.com'):].isdigit()):
56
+ candidate['url'] = f"https://www.shl.com/{url.replace('https://www.shl.com', '')}"
57
+ continue
58
+
59
+ # Add protocol if missing
60
+ if not url.startswith(('http://', 'https://')):
61
+ candidate['url'] = f"https://{url}"
62
+
63
+ return candidates
64
+
65
  def recommend(query):
66
  if not query.strip():
67
  return {"error": "Please enter a job description"}
68
 
69
  try:
70
+ # Print some debug info
71
+ print(f"Processing query: {query[:50]}...")
72
+
73
  top_k_df = get_relevant_passages(query, df_clean, top_k=20)
74
+
75
+ # Debug: Check URLs in retrieved data
76
+ print(f"Retrieved {len(top_k_df)} assessments")
77
+ if not top_k_df.empty:
78
+ print(f"Sample URLs from retrieval: {top_k_df['url'].iloc[:3].tolist()}")
79
+
80
  candidates = top_k_df.to_dict(orient="records")
81
+
82
+ # Additional URL validation before sending to reranker
83
+ for c in candidates:
84
+ if 'url' in c:
85
+ if not str(c['url']).startswith(('http://', 'https://')):
86
+ c['url'] = f"https://www.shl.com/{str(c['url']).lstrip('/')}"
87
+
88
  result = rerank(query, candidates)
89
+
90
+ # Post-process result to ensure URLs are properly formatted
91
+ if 'recommended_assessments' in result:
92
+ result['recommended_assessments'] = validate_and_fix_urls(result['recommended_assessments'])
93
+
94
  return result
95
  except Exception as e:
96
+ import traceback
97
+ error_details = traceback.format_exc()
98
+ print(f"Error: {str(e)}\n{error_details}")
99
  return {"error": f"Error processing request: {str(e)}"}
100
 
101
  iface = gr.Interface(