Molbap HF Staff commited on
Commit
a9aba5d
·
1 Parent(s): 061a198
Files changed (1) hide show
  1. modular_graph_and_candidates.py +26 -0
modular_graph_and_candidates.py CHANGED
@@ -314,6 +314,32 @@ def build_graph_json(
314
  sim_method: str = "jaccard",
315
  ) -> dict:
316
  """Return the {nodes, links} dict that D3 needs."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  models_root = transformers_dir / "src/transformers/models"
318
 
319
  # Get missing models and their data
 
314
  sim_method: str = "jaccard",
315
  ) -> dict:
316
  """Return the {nodes, links} dict that D3 needs."""
317
+
318
+ # Check if we can use cached embeddings only
319
+ embeddings_cache = Path("embeddings_cache.npz")
320
+ if sim_method == "embedding" and embeddings_cache.exists():
321
+ try:
322
+ # Try to compute from cache without accessing repo
323
+ cached_sims = compute_similarities_from_cache(threshold)
324
+ if cached_sims:
325
+ # Create minimal graph with cached data
326
+ cached_data = np.load(embeddings_cache, allow_pickle=True)
327
+ missing = list(cached_data["names"])
328
+
329
+ nodes = []
330
+ for name in missing:
331
+ nodes.append({"id": name, "cls": "cand", "sz": 1})
332
+
333
+ links = []
334
+ for (a, b), s in cached_sims.items():
335
+ links.append({"source": a, "target": b, "label": f"{s*100:.1f}%", "cand": True})
336
+
337
+ print(f"⚡ Built graph from cache: {len(nodes)} nodes, {len(links)} links")
338
+ return {"nodes": nodes, "links": links}
339
+ except Exception as e:
340
+ print(f"⚠️ Cache-only build failed: {e}, falling back to full build")
341
+
342
+ # Full build with repository access
343
  models_root = transformers_dir / "src/transformers/models"
344
 
345
  # Get missing models and their data