Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +26 -2
src/streamlit_app.py
CHANGED
@@ -237,8 +237,32 @@ def get_clusters_from_r_matrix(r_matrix):
|
|
237 |
return np.array([]) # Return empty array on error
|
238 |
|
239 |
|
240 |
-
def get_cluster_labels():
|
241 |
r_matrix, user_id_to_index, _ = get_r_matrix_from_votes()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
242 |
cluster_labels = get_clusters_from_r_matrix(r_matrix)
|
243 |
if len(cluster_labels) == 0:
|
244 |
cluster_labels = [0] * len(user_id_to_index)
|
@@ -264,7 +288,7 @@ def get_user_cluster_label(user_id, ttl_hash=None):
|
|
264 |
if the user is not found or has no cluster label.
|
265 |
"""
|
266 |
# get_cluster_labels is already cached, so calling it repeatedly is fine
|
267 |
-
cluster_labels, user_id_to_index = get_cluster_labels()
|
268 |
|
269 |
# Create a reverse mapping from index to user_id for easier lookup
|
270 |
index_to_user_id = {index: uid for uid, index in user_id_to_index.items()}
|
|
|
237 |
return np.array([]) # Return empty array on error
|
238 |
|
239 |
|
240 |
+
def get_cluster_labels(user_id):
|
241 |
r_matrix, user_id_to_index, _ = get_r_matrix_from_votes()
|
242 |
+
# Check if the user_id exists in the matrix index
|
243 |
+
if user_id not in user_id_to_index:
|
244 |
+
print(f"Warning: User ID '{user_id}' not found in the R matrix. Cannot perform user-specific filtering for clustering.")
|
245 |
+
# Return empty results as filtering based on this user is not possible.
|
246 |
+
# The downstream function get_user_cluster_label handles the user not being in the index.
|
247 |
+
# Returning empty arrays/dict matches the structure of the expected return value.
|
248 |
+
return np.array([]), {} # Return empty labels and empty index map
|
249 |
+
|
250 |
+
# Get the row for the specific user
|
251 |
+
user_row = r_matrix.loc[user_id]
|
252 |
+
|
253 |
+
# Find columns where the user has voted (values are not NaN)
|
254 |
+
voted_comment_ids = user_row.dropna().index
|
255 |
+
|
256 |
+
# Ensure we handle the case where the user hasn't voted on anything
|
257 |
+
if voted_comment_ids.empty:
|
258 |
+
print(f"Warning: User ID '{user_id}' has not voted on any comments. Cannot perform clustering based on votes.")
|
259 |
+
# If no votes, no columns to cluster on. Return empty results.
|
260 |
+
return np.array([]), {}
|
261 |
+
|
262 |
+
# Filter the r_matrix to include only these columns
|
263 |
+
# This is the matrix that will be used for clustering in the next step.
|
264 |
+
# The subsequent line calling get_clusters_from_r_matrix should use this variable.
|
265 |
+
r_matrix_to_cluster = r_matrix[voted_comment_ids]
|
266 |
cluster_labels = get_clusters_from_r_matrix(r_matrix)
|
267 |
if len(cluster_labels) == 0:
|
268 |
cluster_labels = [0] * len(user_id_to_index)
|
|
|
288 |
if the user is not found or has no cluster label.
|
289 |
"""
|
290 |
# get_cluster_labels is already cached, so calling it repeatedly is fine
|
291 |
+
cluster_labels, user_id_to_index = get_cluster_labels(user_id)
|
292 |
|
293 |
# Create a reverse mapping from index to user_id for easier lookup
|
294 |
index_to_user_id = {index: uid for uid, index in user_id_to_index.items()}
|