npc0 commited on
Commit
9c00a0d
·
verified ·
1 Parent(s): 1953329

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +6 -1
src/streamlit_app.py CHANGED
@@ -221,7 +221,12 @@ def get_clusters_from_r_matrix(r_matrix):
221
  # Using default parameters for min_cluster_size and min_samples
222
  # These might need tuning based on data characteristics and desired cluster granularity
223
  # allow_single_cluster=True prevents an error if all points form one cluster
224
- clusterer = hdbscan.HDBSCAN(metric=hamming_distance_with_nan, allow_single_cluster=True)
 
 
 
 
 
225
 
226
  # Fit the model directly to the DataFrame values
227
  # HDBSCAN fit expects a numpy array or similar structure
 
221
  # Using default parameters for min_cluster_size and min_samples
222
  # These might need tuning based on data characteristics and desired cluster granularity
223
  # allow_single_cluster=True prevents an error if all points form one cluster
224
+ clusterer = hdbscan.HDBSCAN(
225
+ metric=hamming_distance_with_nan,
226
+ allow_single_cluster=True,
227
+ min_cluster_size=max(int(np.sqrt(len(r_matrix))), 3),
228
+ min_samples=None,
229
+ )
230
 
231
  # Fit the model directly to the DataFrame values
232
  # HDBSCAN fit expects a numpy array or similar structure