Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -41,16 +41,61 @@ except Exception as e:
|
|
41 |
df_clean = pd.DataFrame(columns=["url", "adaptive_support", "remote_support",
|
42 |
"description", "duration", "test_type"])
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
def recommend(query):
|
45 |
if not query.strip():
|
46 |
return {"error": "Please enter a job description"}
|
47 |
|
48 |
try:
|
|
|
|
|
|
|
49 |
top_k_df = get_relevant_passages(query, df_clean, top_k=20)
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
candidates = top_k_df.to_dict(orient="records")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
result = rerank(query, candidates)
|
|
|
|
|
|
|
|
|
|
|
52 |
return result
|
53 |
except Exception as e:
|
|
|
|
|
|
|
54 |
return {"error": f"Error processing request: {str(e)}"}
|
55 |
|
56 |
iface = gr.Interface(
|
|
|
41 |
df_clean = pd.DataFrame(columns=["url", "adaptive_support", "remote_support",
|
42 |
"description", "duration", "test_type"])
|
43 |
|
44 |
+
def validate_and_fix_urls(candidates):
|
45 |
+
"""Validates and fixes URLs in candidate assessments."""
|
46 |
+
for candidate in candidates:
|
47 |
+
# Ensure URL exists
|
48 |
+
if 'url' not in candidate or not candidate['url']:
|
49 |
+
candidate['url'] = 'https://www.shl.com/missing-url'
|
50 |
+
continue
|
51 |
+
|
52 |
+
url = str(candidate['url'])
|
53 |
+
|
54 |
+
# Fix URLs that are just numbers
|
55 |
+
if url.isdigit() or (url.startswith('https://www.shl.com') and url[len('https://www.shl.com'):].isdigit()):
|
56 |
+
candidate['url'] = f"https://www.shl.com/{url.replace('https://www.shl.com', '')}"
|
57 |
+
continue
|
58 |
+
|
59 |
+
# Add protocol if missing
|
60 |
+
if not url.startswith(('http://', 'https://')):
|
61 |
+
candidate['url'] = f"https://{url}"
|
62 |
+
|
63 |
+
return candidates
|
64 |
+
|
65 |
def recommend(query):
|
66 |
if not query.strip():
|
67 |
return {"error": "Please enter a job description"}
|
68 |
|
69 |
try:
|
70 |
+
# Print some debug info
|
71 |
+
print(f"Processing query: {query[:50]}...")
|
72 |
+
|
73 |
top_k_df = get_relevant_passages(query, df_clean, top_k=20)
|
74 |
+
|
75 |
+
# Debug: Check URLs in retrieved data
|
76 |
+
print(f"Retrieved {len(top_k_df)} assessments")
|
77 |
+
if not top_k_df.empty:
|
78 |
+
print(f"Sample URLs from retrieval: {top_k_df['url'].iloc[:3].tolist()}")
|
79 |
+
|
80 |
candidates = top_k_df.to_dict(orient="records")
|
81 |
+
|
82 |
+
# Additional URL validation before sending to reranker
|
83 |
+
for c in candidates:
|
84 |
+
if 'url' in c:
|
85 |
+
if not str(c['url']).startswith(('http://', 'https://')):
|
86 |
+
c['url'] = f"https://www.shl.com/{str(c['url']).lstrip('/')}"
|
87 |
+
|
88 |
result = rerank(query, candidates)
|
89 |
+
|
90 |
+
# Post-process result to ensure URLs are properly formatted
|
91 |
+
if 'recommended_assessments' in result:
|
92 |
+
result['recommended_assessments'] = validate_and_fix_urls(result['recommended_assessments'])
|
93 |
+
|
94 |
return result
|
95 |
except Exception as e:
|
96 |
+
import traceback
|
97 |
+
error_details = traceback.format_exc()
|
98 |
+
print(f"Error: {str(e)}\n{error_details}")
|
99 |
return {"error": f"Error processing request: {str(e)}"}
|
100 |
|
101 |
iface = gr.Interface(
|