npc0 commited on
Commit
6a99ff4
·
verified ·
1 Parent(s): 3371ce8

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +26 -2
src/streamlit_app.py CHANGED
@@ -237,8 +237,32 @@ def get_clusters_from_r_matrix(r_matrix):
237
  return np.array([]) # Return empty array on error
238
 
239
 
240
- def get_cluster_labels():
241
  r_matrix, user_id_to_index, _ = get_r_matrix_from_votes()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  cluster_labels = get_clusters_from_r_matrix(r_matrix)
243
  if len(cluster_labels) == 0:
244
  cluster_labels = [0] * len(user_id_to_index)
@@ -264,7 +288,7 @@ def get_user_cluster_label(user_id, ttl_hash=None):
264
  if the user is not found or has no cluster label.
265
  """
266
  # get_cluster_labels is already cached, so calling it repeatedly is fine
267
- cluster_labels, user_id_to_index = get_cluster_labels()
268
 
269
  # Create a reverse mapping from index to user_id for easier lookup
270
  index_to_user_id = {index: uid for uid, index in user_id_to_index.items()}
 
237
  return np.array([]) # Return empty array on error
238
 
239
 
240
+ def get_cluster_labels(user_id):
241
  r_matrix, user_id_to_index, _ = get_r_matrix_from_votes()
242
+ # Check if the user_id exists in the matrix index
243
+ if user_id not in user_id_to_index:
244
+ print(f"Warning: User ID '{user_id}' not found in the R matrix. Cannot perform user-specific filtering for clustering.")
245
+ # Return empty results as filtering based on this user is not possible.
246
+ # The downstream function get_user_cluster_label handles the user not being in the index.
247
+ # Returning empty arrays/dict matches the structure of the expected return value.
248
+ return np.array([]), {} # Return empty labels and empty index map
249
+
250
+ # Get the row for the specific user
251
+ user_row = r_matrix.loc[user_id]
252
+
253
+ # Find columns where the user has voted (values are not NaN)
254
+ voted_comment_ids = user_row.dropna().index
255
+
256
+ # Ensure we handle the case where the user hasn't voted on anything
257
+ if voted_comment_ids.empty:
258
+ print(f"Warning: User ID '{user_id}' has not voted on any comments. Cannot perform clustering based on votes.")
259
+ # If no votes, no columns to cluster on. Return empty results.
260
+ return np.array([]), {}
261
+
262
+ # Filter the r_matrix to include only these columns
263
+ # This is the matrix that will be used for clustering in the next step.
264
+ # The subsequent line calling get_clusters_from_r_matrix should use this variable.
265
+ r_matrix_to_cluster = r_matrix[voted_comment_ids]
266
  cluster_labels = get_clusters_from_r_matrix(r_matrix)
267
  if len(cluster_labels) == 0:
268
  cluster_labels = [0] * len(user_id_to_index)
 
288
  if the user is not found or has no cluster label.
289
  """
290
  # get_cluster_labels is already cached, so calling it repeatedly is fine
291
+ cluster_labels, user_id_to_index = get_cluster_labels(user_id)
292
 
293
  # Create a reverse mapping from index to user_id for easier lookup
294
  index_to_user_id = {index: uid for uid, index in user_id_to_index.items()}