Molbap HF Staff commited on
Commit
cb81a08
Β·
1 Parent(s): c0a0c28

recover mess

Browse files
modular_graph_and_candidates.py CHANGED
@@ -45,7 +45,7 @@ import spaces
45
  # ────────────────────────────────────────────────────────────────────────────────
46
  # CONFIG
47
  # ───────────────────────────────────────────────────────────────────────────────
48
- SIM_DEFAULT = 0.78 # Jaccard similarity threshold
49
  PIXEL_MIN_HITS = 0 # multimodal trigger ("pixel_values")
50
  HTML_DEFAULT = "d3_modular_graph.html"
51
 
@@ -126,26 +126,24 @@ def embedding_similarity_clusters(models_root: Path, missing: List[str], thr: fl
126
  texts[name] = code.strip() or " "
127
 
128
  names = list(texts)
129
- names = names[:90] # Limit before processing
130
  all_embeddings = []
131
 
132
  print(f"Encoding embeddings for {len(names)} models...")
133
  batch_size = 8
134
 
135
- for i in tqdm(range(0, len(names), batch_size), desc="Models", leave=False):
136
- model_name = names[i]
137
- text_len = len(texts[model_name])
138
 
139
  try:
140
- print(f"Processing {model_name} (text length: {text_len})")
141
- batch = [texts[model_name]]
142
- emb = model.encode(batch, convert_to_numpy=True, show_progress_bar=False)
143
  all_embeddings.append(emb)
144
- print(f"βœ“ Completed {model_name}")
145
  except Exception as e:
146
- print(f"⚠️ GPU worker error for {model_name}: {type(e).__name__}: {e}")
147
- # Create zero embedding as placeholder to maintain consistency
148
- zero_emb = np.zeros((1, model.get_sentence_embedding_dimension()), dtype=np.float32)
149
  all_embeddings.append(zero_emb)
150
 
151
  embeddings = np.vstack(all_embeddings).astype(np.float32)
@@ -376,7 +374,7 @@ function updateVisibility() {
376
  }
377
  document.getElementById('toggleRed').addEventListener('change', updateVisibility);
378
 
379
- const HF_LOGO_URI = "hf-logo.png";
380
  const graph = __GRAPH_DATA__;
381
  const W = innerWidth, H = innerHeight;
382
  const svg = d3.select('#dependency').call(d3.zoom().on('zoom', e => g.attr('transform', e.transform)));
 
45
  # ────────────────────────────────────────────────────────────────────────────────
46
  # CONFIG
47
  # ───────────────────────────────────────────────────────────────────────────────
48
+ SIM_DEFAULT = 0.5 # similarity threshold
49
  PIXEL_MIN_HITS = 0 # multimodal trigger ("pixel_values")
50
  HTML_DEFAULT = "d3_modular_graph.html"
51
 
 
126
  texts[name] = code.strip() or " "
127
 
128
  names = list(texts)
 
129
  all_embeddings = []
130
 
131
  print(f"Encoding embeddings for {len(names)} models...")
132
  batch_size = 8
133
 
134
+ for i in tqdm(range(0, len(names), batch_size), desc="Batches", leave=False):
135
+ batch_names = names[i:i+batch_size]
136
+ batch_texts = [texts[name] for name in batch_names]
137
 
138
  try:
139
+ print(f"Processing batch: {batch_names}")
140
+ emb = model.encode(batch_texts, convert_to_numpy=True, show_progress_bar=False)
 
141
  all_embeddings.append(emb)
142
+ print(f"βœ“ Completed batch of {len(batch_names)} models")
143
  except Exception as e:
144
+ print(f"⚠️ GPU worker error for batch {batch_names}: {type(e).__name__}: {e}")
145
+ # Create zero embeddings for all models in failed batch
146
+ zero_emb = np.zeros((len(batch_names), model.get_sentence_embedding_dimension()), dtype=np.float32)
147
  all_embeddings.append(zero_emb)
148
 
149
  embeddings = np.vstack(all_embeddings).astype(np.float32)
 
374
  }
375
  document.getElementById('toggleRed').addEventListener('change', updateVisibility);
376
 
377
+ const HF_LOGO_URI = "static/hf-logo.svg";
378
  const graph = __GRAPH_DATA__;
379
  const W = innerWidth, H = innerHeight;
380
  const svg = d3.select('#dependency').call(d3.zoom().on('zoom', e => g.attr('transform', e.transform)));
hf-logo.svg β†’ static/hf-logo.svg RENAMED
File without changes