AnshulS commited on
Commit
08dabce
·
verified ·
1 Parent(s): 3ed9ca7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -32
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import pandas as pd
2
  import gradio as gr
3
- import numpy as np
4
  from retriever import get_relevant_passages
5
  from reranker import rerank
6
 
@@ -85,19 +84,15 @@ def recommend(query):
85
  # Print some debug info
86
  print(f"Processing query: {query[:50]}...")
87
 
88
- # Get relevant passages with increased diversity (more candidates)
89
- top_k_df = get_relevant_passages(query, df_clean, top_k=30)
90
 
91
  # Debug: Check if we got any results
92
  print(f"Retrieved {len(top_k_df)} assessments")
93
 
94
  if top_k_df.empty:
95
  return {"error": "No matching assessments found"}
96
-
97
- # Remove duplicates by URL - keep the first occurrence (highest ranked)
98
- top_k_df = top_k_df.drop_duplicates(subset=['url'])
99
- print(f"After deduplication: {len(top_k_df)} unique assessments")
100
-
101
  # Convert test_type to list if it's not already
102
  top_k_df['test_type'] = top_k_df['test_type'].apply(
103
  lambda x: x if isinstance(x, list) else
@@ -108,15 +103,6 @@ def recommend(query):
108
  top_k_df['duration'] = top_k_df['duration'].fillna(-1).astype(int)
109
  top_k_df.loc[top_k_df['duration'] == -1, 'duration'] = None
110
 
111
- # Add a diverse set of assessment types - ensure we have different test types represented
112
- test_types = set()
113
- for _, row in top_k_df.iterrows():
114
- if isinstance(row['test_type'], list):
115
- for test_type in row['test_type']:
116
- test_types.add(test_type)
117
-
118
- print(f"Found assessments covering {len(test_types)} different test types")
119
-
120
  # Convert DataFrame to list of dictionaries
121
  candidates = top_k_df.to_dict(orient="records")
122
 
@@ -130,22 +116,10 @@ def recommend(query):
130
  # Get recommendations
131
  result = rerank(query, candidates)
132
 
133
- # Post-process result to ensure no duplicates
134
  if 'recommended_assessments' in result:
135
- recommendations = result['recommended_assessments']
136
-
137
- # Deduplicate by URL
138
- unique_urls = set()
139
- unique_recommendations = []
140
-
141
- for rec in recommendations:
142
- if rec['url'] not in unique_urls:
143
- unique_urls.add(rec['url'])
144
- unique_recommendations.append(rec)
145
-
146
- # Validate URLs
147
- result['recommended_assessments'] = validate_and_fix_urls(unique_recommendations)
148
- print(f"Returning {len(result['recommended_assessments'])} unique recommended assessments")
149
 
150
  return result
151
  except Exception as e:
 
1
  import pandas as pd
2
  import gradio as gr
 
3
  from retriever import get_relevant_passages
4
  from reranker import rerank
5
 
 
84
  # Print some debug info
85
  print(f"Processing query: {query[:50]}...")
86
 
87
+ # Get relevant passages
88
+ top_k_df = get_relevant_passages(query, df_clean, top_k=20)
89
 
90
  # Debug: Check if we got any results
91
  print(f"Retrieved {len(top_k_df)} assessments")
92
 
93
  if top_k_df.empty:
94
  return {"error": "No matching assessments found"}
95
+
 
 
 
 
96
  # Convert test_type to list if it's not already
97
  top_k_df['test_type'] = top_k_df['test_type'].apply(
98
  lambda x: x if isinstance(x, list) else
 
103
  top_k_df['duration'] = top_k_df['duration'].fillna(-1).astype(int)
104
  top_k_df.loc[top_k_df['duration'] == -1, 'duration'] = None
105
 
 
 
 
 
 
 
 
 
 
106
  # Convert DataFrame to list of dictionaries
107
  candidates = top_k_df.to_dict(orient="records")
108
 
 
116
  # Get recommendations
117
  result = rerank(query, candidates)
118
 
119
+ # Post-process result
120
  if 'recommended_assessments' in result:
121
+ result['recommended_assessments'] = validate_and_fix_urls(result['recommended_assessments'])
122
+ print(f"Returning {len(result['recommended_assessments'])} recommended assessments")
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
  return result
125
  except Exception as e: