Molbap HF Staff commited on
Commit
de07ff6
·
1 Parent(s): e9769e3
Files changed (1) hide show
  1. modular_graph_and_candidates.py +9 -3
modular_graph_and_candidates.py CHANGED
@@ -123,15 +123,21 @@ def embedding_similarity_clusters(models_root: Path, missing: List[str], thr: fl
123
  names = list(texts)
124
  all_embeddings = []
125
 
126
- print("Encoding embeddings...")
127
  batch_size = 1
 
128
  for i in tqdm(range(0, len(names), batch_size), desc="Models", leave=False):
 
 
 
129
  try:
130
- batch = [texts[names[i]]]
 
131
  emb = model.encode(batch, convert_to_numpy=True, show_progress_bar=False)
132
  all_embeddings.append(emb)
 
133
  except Exception as e:
134
- print(f"⚠️ GPU worker error for {names[i]}: {e}")
135
  # Create zero embedding as placeholder to maintain consistency
136
  zero_emb = np.zeros((1, model.get_sentence_embedding_dimension()), dtype=np.float32)
137
  all_embeddings.append(zero_emb)
 
123
  names = list(texts)
124
  all_embeddings = []
125
 
126
+ print(f"Encoding embeddings for {len(names)} models...")
127
  batch_size = 1
128
+
129
  for i in tqdm(range(0, len(names), batch_size), desc="Models", leave=False):
130
+ model_name = names[i]
131
+ text_len = len(texts[model_name])
132
+
133
  try:
134
+ print(f"Processing {model_name} (text length: {text_len})")
135
+ batch = [texts[model_name]]
136
  emb = model.encode(batch, convert_to_numpy=True, show_progress_bar=False)
137
  all_embeddings.append(emb)
138
+ print(f"✓ Completed {model_name}")
139
  except Exception as e:
140
+ print(f"⚠️ GPU worker error for {model_name}: {type(e).__name__}: {e}")
141
  # Create zero embedding as placeholder to maintain consistency
142
  zero_emb = np.zeros((1, model.get_sentence_embedding_dimension()), dtype=np.float32)
143
  all_embeddings.append(zero_emb)