Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import pandas as pd
|
2 |
import gradio as gr
|
3 |
-
import numpy as np
|
4 |
from retriever import get_relevant_passages
|
5 |
from reranker import rerank
|
6 |
|
@@ -85,19 +84,15 @@ def recommend(query):
|
|
85 |
# Print some debug info
|
86 |
print(f"Processing query: {query[:50]}...")
|
87 |
|
88 |
-
# Get relevant passages
|
89 |
-
top_k_df = get_relevant_passages(query, df_clean, top_k=
|
90 |
|
91 |
# Debug: Check if we got any results
|
92 |
print(f"Retrieved {len(top_k_df)} assessments")
|
93 |
|
94 |
if top_k_df.empty:
|
95 |
return {"error": "No matching assessments found"}
|
96 |
-
|
97 |
-
# Remove duplicates by URL - keep the first occurrence (highest ranked)
|
98 |
-
top_k_df = top_k_df.drop_duplicates(subset=['url'])
|
99 |
-
print(f"After deduplication: {len(top_k_df)} unique assessments")
|
100 |
-
|
101 |
# Convert test_type to list if it's not already
|
102 |
top_k_df['test_type'] = top_k_df['test_type'].apply(
|
103 |
lambda x: x if isinstance(x, list) else
|
@@ -108,15 +103,6 @@ def recommend(query):
|
|
108 |
top_k_df['duration'] = top_k_df['duration'].fillna(-1).astype(int)
|
109 |
top_k_df.loc[top_k_df['duration'] == -1, 'duration'] = None
|
110 |
|
111 |
-
# Add a diverse set of assessment types - ensure we have different test types represented
|
112 |
-
test_types = set()
|
113 |
-
for _, row in top_k_df.iterrows():
|
114 |
-
if isinstance(row['test_type'], list):
|
115 |
-
for test_type in row['test_type']:
|
116 |
-
test_types.add(test_type)
|
117 |
-
|
118 |
-
print(f"Found assessments covering {len(test_types)} different test types")
|
119 |
-
|
120 |
# Convert DataFrame to list of dictionaries
|
121 |
candidates = top_k_df.to_dict(orient="records")
|
122 |
|
@@ -130,22 +116,10 @@ def recommend(query):
|
|
130 |
# Get recommendations
|
131 |
result = rerank(query, candidates)
|
132 |
|
133 |
-
# Post-process result
|
134 |
if 'recommended_assessments' in result:
|
135 |
-
|
136 |
-
|
137 |
-
# Deduplicate by URL
|
138 |
-
unique_urls = set()
|
139 |
-
unique_recommendations = []
|
140 |
-
|
141 |
-
for rec in recommendations:
|
142 |
-
if rec['url'] not in unique_urls:
|
143 |
-
unique_urls.add(rec['url'])
|
144 |
-
unique_recommendations.append(rec)
|
145 |
-
|
146 |
-
# Validate URLs
|
147 |
-
result['recommended_assessments'] = validate_and_fix_urls(unique_recommendations)
|
148 |
-
print(f"Returning {len(result['recommended_assessments'])} unique recommended assessments")
|
149 |
|
150 |
return result
|
151 |
except Exception as e:
|
|
|
1 |
import pandas as pd
|
2 |
import gradio as gr
|
|
|
3 |
from retriever import get_relevant_passages
|
4 |
from reranker import rerank
|
5 |
|
|
|
84 |
# Print some debug info
|
85 |
print(f"Processing query: {query[:50]}...")
|
86 |
|
87 |
+
# Get relevant passages
|
88 |
+
top_k_df = get_relevant_passages(query, df_clean, top_k=20)
|
89 |
|
90 |
# Debug: Check if we got any results
|
91 |
print(f"Retrieved {len(top_k_df)} assessments")
|
92 |
|
93 |
if top_k_df.empty:
|
94 |
return {"error": "No matching assessments found"}
|
95 |
+
|
|
|
|
|
|
|
|
|
96 |
# Convert test_type to list if it's not already
|
97 |
top_k_df['test_type'] = top_k_df['test_type'].apply(
|
98 |
lambda x: x if isinstance(x, list) else
|
|
|
103 |
top_k_df['duration'] = top_k_df['duration'].fillna(-1).astype(int)
|
104 |
top_k_df.loc[top_k_df['duration'] == -1, 'duration'] = None
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
# Convert DataFrame to list of dictionaries
|
107 |
candidates = top_k_df.to_dict(orient="records")
|
108 |
|
|
|
116 |
# Get recommendations
|
117 |
result = rerank(query, candidates)
|
118 |
|
119 |
+
# Post-process result
|
120 |
if 'recommended_assessments' in result:
|
121 |
+
result['recommended_assessments'] = validate_and_fix_urls(result['recommended_assessments'])
|
122 |
+
print(f"Returning {len(result['recommended_assessments'])} recommended assessments")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
return result
|
125 |
except Exception as e:
|