Spaces:

npc0
/

SteamPolis

Sleeping

App Files Files

npc0 commited on May 1

Commit

6a99ff4

verified ·

1 Parent(s): 3371ce8

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +26 -2

src/streamlit_app.py CHANGED Viewed

@@ -237,8 +237,32 @@ def get_clusters_from_r_matrix(r_matrix):
         return np.array([]) # Return empty array on error
-def get_cluster_labels():
     r_matrix, user_id_to_index, _ = get_r_matrix_from_votes()
     cluster_labels = get_clusters_from_r_matrix(r_matrix)
     if len(cluster_labels) == 0:
         cluster_labels = [0] * len(user_id_to_index)
@@ -264,7 +288,7 @@ def get_user_cluster_label(user_id, ttl_hash=None):
                          if the user is not found or has no cluster label.
     """
     # get_cluster_labels is already cached, so calling it repeatedly is fine
-    cluster_labels, user_id_to_index = get_cluster_labels()
     # Create a reverse mapping from index to user_id for easier lookup
     index_to_user_id = {index: uid for uid, index in user_id_to_index.items()}

         return np.array([]) # Return empty array on error
+def get_cluster_labels(user_id):
     r_matrix, user_id_to_index, _ = get_r_matrix_from_votes()
+    # Check if the user_id exists in the matrix index
+    if user_id not in user_id_to_index:
+        print(f"Warning: User ID '{user_id}' not found in the R matrix. Cannot perform user-specific filtering for clustering.")
+        # Return empty results as filtering based on this user is not possible.
+        # The downstream function get_user_cluster_label handles the user not being in the index.
+        # Returning empty arrays/dict matches the structure of the expected return value.
+        return np.array([]), {} # Return empty labels and empty index map
+    # Get the row for the specific user
+    user_row = r_matrix.loc[user_id]
+    # Find columns where the user has voted (values are not NaN)
+    voted_comment_ids = user_row.dropna().index
+    # Ensure we handle the case where the user hasn't voted on anything
+    if voted_comment_ids.empty:
+        print(f"Warning: User ID '{user_id}' has not voted on any comments. Cannot perform clustering based on votes.")
+        # If no votes, no columns to cluster on. Return empty results.
+        return np.array([]), {}
+    # Filter the r_matrix to include only these columns
+    # This is the matrix that will be used for clustering in the next step.
+    # The subsequent line calling get_clusters_from_r_matrix should use this variable.
+    r_matrix_to_cluster = r_matrix[voted_comment_ids]
     cluster_labels = get_clusters_from_r_matrix(r_matrix)
     if len(cluster_labels) == 0:
         cluster_labels = [0] * len(user_id_to_index)
                          if the user is not found or has no cluster label.
     """
     # get_cluster_labels is already cached, so calling it repeatedly is fine
+    cluster_labels, user_id_to_index = get_cluster_labels(user_id)
     # Create a reverse mapping from index to user_id for easier lookup
     index_to_user_id = {index: uid for uid, index in user_id_to_index.items()}