Molbap HF Staff commited on
Commit
53c0cc8
Β·
1 Parent(s): 91140e6
Files changed (2) hide show
  1. app.py +89 -0
  2. modular_graph_and_candidates.py +382 -0
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py – Gradio Space wrapper for modular_graph_and_candidates
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import shutil
7
+ import subprocess
8
+ import tempfile
9
+ from datetime import datetime, timedelta
10
+ from functools import lru_cache
11
+ from pathlib import Path
12
+
13
+ import gradio as gr
14
+
15
+ # β€”β€” your refactored helpers β€”β€”
16
+ # β€’ build_graph_json(transformers_dir: Path, threshold: float, multimodal: bool, sim_method: str) -> dict
17
+ # β€’ generate_html(graph: dict) -> str (returns full <html>… string)
18
+ from modular_graph_and_candidates import build_graph_json, generate_html
19
+
20
+ HF_MAIN_REPO = "https://github.com/huggingface/transformers"
21
+
22
+
23
+ @lru_cache(maxsize=4)
24
+ def clone_or_cache(repo_url: str) -> Path:
25
+ """Clone *repo_url* at most **once per 24β€―h**.
26
+
27
+ The repo is cached under /tmp/<hash>. A hidden ``.cloned_at`` file stores the
28
+ UTC ISO timestamp of the last clone; if that stamp is <β€―24β€―h old we reuse the
29
+ existing checkout, otherwise we wipe the directory and clone afresh. This
30
+ guarantees deterministic daily snapshots while avoiding repeated network
31
+ cost within the same day (even across independent Space sessions if the
32
+ container persists).
33
+ """
34
+ tmp_root = Path(tempfile.gettempdir())
35
+ cache_dir = tmp_root / f"repo_{abs(hash(repo_url))}"
36
+ stamp = cache_dir / ".cloned_at"
37
+
38
+ if cache_dir.exists() and stamp.exists():
39
+ try:
40
+ last = datetime.fromisoformat(stamp.read_text().strip())
41
+ if datetime.utcnow() - last < timedelta(days=1):
42
+ return cache_dir # fresh enough
43
+ except Exception:
44
+ # malformed stamp β†’ fall through to re‑clone
45
+ pass
46
+ # stale cache β†’ remove dir completely
47
+ shutil.rmtree(cache_dir, ignore_errors=True)
48
+
49
+ subprocess.check_call(["git", "clone", "--depth", "1", repo_url, str(cache_dir)])
50
+ stamp.write_text(datetime.utcnow().isoformat())
51
+ return cache_dir
52
+
53
+
54
+ def run(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
55
+ repo_path = clone_or_cache(repo_url)
56
+
57
+ graph = build_graph_json(
58
+ transformers_dir=repo_path,
59
+ threshold=threshold,
60
+ multimodal=multimodal,
61
+ sim_method=sim_method,
62
+ )
63
+
64
+ html = generate_html(graph)
65
+
66
+ # Save graph JSON to a temp file so the user can download it.
67
+ json_path = Path(tempfile.mktemp(suffix=".json"))
68
+ json_path.write_text(json.dumps(graph), encoding="utf-8")
69
+
70
+ return html, str(json_path)
71
+
72
+
73
+ with gr.Blocks(css="body{background:#fafafa;}") as demo:
74
+ gr.Markdown("## πŸ” Modular‑candidate explorer for πŸ€— Transformers")
75
+
76
+ with gr.Row():
77
+ repo_in = gr.Text(value=HF_MAIN_REPO, label="Repo / fork URL")
78
+ thresh = gr.Slider(0.50, 0.95, value=0.78, step=0.01, label="Similarity β‰₯")
79
+ multi_cb = gr.Checkbox(label="Only multimodal models")
80
+ sim_radio = gr.Radio(["jaccard", "embedding"], value="jaccard", label="Similarity metric")
81
+ go_btn = gr.Button("Build graph")
82
+
83
+ html_out = gr.HTML()
84
+ json_out = gr.File(label="Download graph.json")
85
+
86
+ go_btn.click(run, [repo_in, thresh, multi_cb, sim_radio], [html_out, json_out])
87
+
88
+ if __name__ == "__main__":
89
+ demo.launch()
modular_graph_and_candidates.py ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ """
3
+ modular_graph_and_candidates.py
4
+ ================================
5
+ Create **one** rich view that combines
6
+ 1. The *dependency graph* between existing **modular_*.py** implementations in
7
+ πŸ€—Β Transformers (blue/🟑) **and**
8
+ 2. The list of *missing* modular models (full‑red nodes) **plus** similarity
9
+ edges (full‑red links) between highly‑overlapping modelling files – the
10
+ output of *find_modular_candidates.py* – so you can immediately spot good
11
+ refactor opportunities.
12
+
13
+ ––– Usage –––
14
+
15
+ ```bash
16
+ python modular_graph_and_candidates.py /path/to/transformers \
17
+ --multimodal # keep only models whose modelling code mentions
18
+ # "pixel_values" β‰₯Β 3 times
19
+ --sim-threshold 0.5 # Jaccard cutoff (default 0.50)
20
+ --out graph.html # output HTML file name
21
+ ```
22
+
23
+ Colour legend in the generated HTML:
24
+ * 🟑 **base model**Β β€” has modular shards *imported* by others but no parent
25
+ * πŸ”΅Β **derived modular model**Β β€” has a `modular_*.py` and inherits from β‰₯β€―1 model
26
+ * πŸ”΄Β **candidate**Β β€” no `modular_*.py` yet (and/or very similar to another)
27
+ * red edges = high‑Jaccard similarity links (potential to factorise)
28
+ """
29
+ from __future__ import annotations
30
+
31
+ import argparse
32
+ import ast
33
+ import json
34
+ import re
35
+ import tokenize
36
+ from collections import Counter, defaultdict
37
+ from itertools import combinations
38
+ from pathlib import Path
39
+ from typing import Dict, List, Set, Tuple
40
+ from sentence_transformers import SentenceTransformer, util
41
+ from tqdm import tqdm
42
+ import numpy as np
43
+
44
+ # ────────────────────────────────────────────────────────────────────────────────
45
+ # CONFIG
46
+ # ───────────────────────────────────────────────────────────────────────────────
47
+ SIM_DEFAULT = 0.78 # Jaccard similarity threshold
48
+ PIXEL_MIN_HITS = 0 # multimodal trigger ("pixel_values")
49
+ HTML_DEFAULT = "d3_modular_graph.html"
50
+
51
+ # ────────────────────────────────────────────────────────────────────────────────
52
+ # 1) Helpers to analyse *modelling* files (for similarity & multimodal filter)
53
+ # ────────────────────────────────────────────────────────────────────────────────
54
+
55
+ def _strip_source(code: str) -> str:
56
+ """Remove doc‑strings, comments and import lines to keep only the core code."""
57
+ code = re.sub(r'("""|\'\'\')(?:.|\n)*?\1', "", code) # doc‑strings
58
+ code = re.sub(r"#.*", "", code) # # comments
59
+ return "\n".join(ln for ln in code.splitlines()
60
+ if not re.match(r"\s*(from|import)\s+", ln))
61
+
62
+ def _tokenise(code: str) -> Set[str]:
63
+ toks: Set[str] = set()
64
+ for tok in tokenize.generate_tokens(iter(code.splitlines(keepends=True)).__next__):
65
+ if tok.type == tokenize.NAME:
66
+ toks.add(tok.string)
67
+ return toks
68
+
69
+ def build_token_bags(models_root: Path) -> Tuple[Dict[str, List[Set[str]]], Dict[str, int]]:
70
+ """Return token‑bags of every `modeling_*.py` plus a pixel‑value counter."""
71
+ bags: Dict[str, List[Set[str]]] = defaultdict(list)
72
+ pixel_hits: Dict[str, int] = defaultdict(int)
73
+ for mdl_dir in sorted(p for p in models_root.iterdir() if p.is_dir()):
74
+ for py in mdl_dir.rglob("modeling_*.py"):
75
+ try:
76
+ text = py.read_text(encoding="utf‑8")
77
+ pixel_hits[mdl_dir.name] += text.count("pixel_values")
78
+ bags[mdl_dir.name].append(_tokenise(_strip_source(text)))
79
+ except Exception as e:
80
+ print(f"⚠️ Skipped {py}: {e}")
81
+ return bags, pixel_hits
82
+
83
+ def _jaccard(a: Set[str], b: Set[str]) -> float:
84
+ return 0.0 if (not a or not b) else len(a & b) / len(a | b)
85
+
86
+ def similarity_clusters(bags: Dict[str, List[Set[str]]], thr: float) -> Dict[Tuple[str,str], float]:
87
+ """Return {(modelA, modelB): score} for pairs with Jaccard β‰₯ *thr*."""
88
+ largest = {m: max(ts, key=len) for m, ts in bags.items() if ts}
89
+ out: Dict[Tuple[str,str], float] = {}
90
+ for m1, m2 in combinations(sorted(largest.keys()), 2):
91
+ s = _jaccard(largest[m1], largest[m2])
92
+ if s >= thr:
93
+ out[(m1, m2)] = s
94
+ return out
95
+
96
+ def embedding_similarity_clusters(models_root: Path, missing: List[str], thr: float) -> Dict[Tuple[str, str], float]:
97
+ model = SentenceTransformer("nomic-ai/nomic-embed-code")
98
+ model.max_seq_length = 4096 # truncate overly long modeling files
99
+ texts = {}
100
+
101
+ for name in tqdm(missing, desc="Reading modeling files"):
102
+ code = ""
103
+ for py in (models_root / name).rglob("modeling_*.py"):
104
+ try:
105
+ code += _strip_source(py.read_text(encoding="utf-8")) + "\n"
106
+ except Exception:
107
+ continue
108
+ texts[name] = code.strip() or " "
109
+
110
+ names = list(texts)
111
+ all_embeddings = []
112
+
113
+ print("Encoding embeddings...")
114
+ batch_size = 8 # or 2 if memory is tight
115
+ for i in tqdm(range(0, len(names), batch_size), desc="Batches", leave=False):
116
+ batch = [texts[n] for n in names[i:i+batch_size]]
117
+ emb = model.encode(batch, convert_to_numpy=True, show_progress_bar=False)
118
+ all_embeddings.append(emb)
119
+
120
+ embeddings = np.vstack(all_embeddings) # [N, D]
121
+
122
+ print("Computing pairwise similarities...")
123
+ sims = embeddings @ embeddings.T # cosine since already normalized
124
+
125
+ out = {}
126
+ for i in range(len(names)):
127
+ for j in range(i + 1, len(names)):
128
+ s = sims[i, j]
129
+ if s >= thr:
130
+ out[(names[i], names[j])] = float(s)
131
+ return out
132
+
133
+
134
+
135
+
136
+ # ────────────────────────────────────────────────────────────────────────────────
137
+ # 2) Scan *modular_*.py* files to build an import‑dependency graph
138
+ # – only **modeling_*** imports are considered (skip configuration / processing)
139
+ # ────────────────────────────────────────────────────────────────────────────────
140
+
141
+ def modular_files(models_root: Path) -> List[Path]:
142
+ return [p for p in models_root.rglob("modular_*.py") if p.suffix == ".py"]
143
+
144
+ def dependency_graph(modular_files: List[Path], models_root: Path) -> Dict[str, List[Dict[str,str]]]:
145
+ """Return {derived_model: [{source, imported_class}, ...]}
146
+
147
+ Only `modeling_*` imports are kept; anything coming from configuration/processing/
148
+ image* utils is ignored so the visual graph focuses strictly on modelling code.
149
+ Excludes edges to sources whose model name is not a model dir.
150
+ """
151
+ model_names = {p.name for p in models_root.iterdir() if p.is_dir()}
152
+ deps: Dict[str, List[Dict[str,str]]] = defaultdict(list)
153
+ for fp in modular_files:
154
+ derived = fp.parent.name
155
+ try:
156
+ tree = ast.parse(fp.read_text(encoding="utf‑8"), filename=str(fp))
157
+ except Exception as e:
158
+ print(f"⚠️ AST parse failed for {fp}: {e}")
159
+ continue
160
+ for node in ast.walk(tree):
161
+ if not isinstance(node, ast.ImportFrom) or not node.module:
162
+ continue
163
+ mod = node.module
164
+ # keep only *modeling_* imports, drop anything else
165
+ if ("modeling_" not in mod or
166
+ "configuration_" in mod or
167
+ "processing_" in mod or
168
+ "image_processing" in mod or
169
+ "modeling_attn_mask_utils" in mod):
170
+ continue
171
+ parts = re.split(r"[./]", mod)
172
+ src = next((p for p in parts if p not in {"", "models", "transformers"}), "")
173
+ if not src or src == derived or src not in model_names:
174
+ continue
175
+ for alias in node.names:
176
+ deps[derived].append({"source": src, "imported_class": alias.name})
177
+ return dict(deps)
178
+
179
+
180
+ # modular_graph_and_candidates.py (top-level)
181
+
182
+ def build_graph_json(
183
+ transformers_dir: Path,
184
+ threshold: float = SIM_DEFAULT,
185
+ multimodal: bool = False,
186
+ sim_method: str = "jaccard",
187
+ ) -> dict:
188
+ """Return the {nodes, links} dict that D3 needs."""
189
+ models_root = transformers_dir / "src/transformers/models"
190
+ bags, pix_hits = build_token_bags(models_root)
191
+
192
+ mod_files = modular_files(models_root)
193
+ deps = dependency_graph(mod_files, models_root)
194
+
195
+ models_with_modular = {p.parent.name for p in mod_files}
196
+ missing = [m for m in bags if m not in models_with_modular]
197
+ if multimodal:
198
+ missing = [m for m in missing if pix_hits[m] >= PIXEL_MIN_HITS]
199
+
200
+ if sim_method == "jaccard":
201
+ sims = similarity_clusters({m: bags[m] for m in missing}, threshold)
202
+ else:
203
+ sims = embedding_similarity_clusters(models_root, missing, threshold)
204
+
205
+ # ---- assemble nodes & links ----
206
+ nodes: Set[str] = set()
207
+ links: List[dict] = []
208
+
209
+ for drv, lst in deps.items():
210
+ for d in lst:
211
+ links.append({
212
+ "source": d["source"],
213
+ "target": drv,
214
+ "label": f"{sum(1 for x in lst if x['source'] == d['source'])} imports",
215
+ "cand": False
216
+ })
217
+ nodes.update({d["source"], drv})
218
+
219
+ for (a, b), s in sims.items():
220
+ links.append({"source": a, "target": b, "label": f"{s*100:.1f}%", "cand": True})
221
+ nodes.update({a, b})
222
+
223
+ nodes.update(missing)
224
+
225
+ deg = Counter()
226
+ for lk in links:
227
+ deg[lk["source"]] += 1
228
+ deg[lk["target"]] += 1
229
+ max_deg = max(deg.values() or [1])
230
+
231
+ targets = {lk["target"] for lk in links if not lk["cand"]}
232
+ sources = {lk["source"] for lk in links if not lk["cand"]}
233
+ missing_only = [m for m in missing if m not in sources and m not in targets]
234
+ nodes.update(missing_only)
235
+
236
+ nodelist = []
237
+ for n in sorted(nodes):
238
+ if n in missing_only:
239
+ cls = "cand"
240
+ elif n in sources and n not in targets:
241
+ cls = "base"
242
+ else:
243
+ cls = "derived"
244
+ nodelist.append({"id": n, "cls": cls, "sz": 1 + 2*(deg[n]/max_deg)})
245
+
246
+ graph = {"nodes": nodelist, "links": links}
247
+ return graph
248
+
249
+
250
+ def generate_html(graph: dict) -> str:
251
+ """Return the full HTML string with inlined CSS/JS + graph JSON."""
252
+ js = JS.replace("__GRAPH_DATA__", json.dumps(graph, separators=(",", ":")))
253
+ return HTML.replace("__CSS__", CSS).replace("__JS__", js)
254
+
255
+
256
+
257
+ # ────────────────────────────────────────────────────────────────────────────────
258
+ # 3) HTML (D3.js) boilerplate – CSS + JS templates (unchanged design)
259
+ # ────────────────────────────────────────────────────────────────────────────────
260
+ CSS = """
261
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600&display=swap');
262
+ :root { --base: 60px; }
263
+ body { margin:0; font-family:'Inter',Arial,sans-serif; background:transparent; overflow:hidden; }
264
+ svg { width:100vw; height:100vh; }
265
+ .link { stroke:#999; stroke-opacity:.6; }
266
+ .link.cand { stroke:#e63946; stroke-width:2.5; }
267
+ .node-label { fill:#333; pointer-events:none; text-anchor:middle; font-weight:600; }
268
+ .link-label { fill:#555; font-size:10px; pointer-events:none; text-anchor:middle; }
269
+ .node.base path { fill:#ffbe0b; }
270
+ .node.derived circle { fill:#1f77b4; }
271
+ .node.cand circle, .node.cand path { fill:#e63946; }
272
+ #legend { position:fixed; top:18px; left:18px; background:rgba(255,255,255,.92); padding:18px 28px;
273
+ border-radius:10px; border:1.5px solid #bbb; font-size:18px; box-shadow:0 2px 8px rgba(0,0,0,.08); }
274
+ """
275
+
276
+ JS = """
277
+
278
+ function updateVisibility() {
279
+ const show = document.getElementById('toggleRed').checked;
280
+ svg.selectAll('.link.cand').style('display', show ? null : 'none');
281
+ svg.selectAll('.node.cand').style('display', show ? null : 'none');
282
+ svg.selectAll('.link-label')
283
+ .filter(d => d.cand)
284
+ .style('display', show ? null : 'none');
285
+ }
286
+
287
+ document.getElementById('toggleRed').addEventListener('change', updateVisibility);
288
+
289
+
290
+ const graph = __GRAPH_DATA__;
291
+ const W = innerWidth, H = innerHeight;
292
+ const svg = d3.select('#dependency').call(d3.zoom().on('zoom', e => g.attr('transform', e.transform)));
293
+ const g = svg.append('g');
294
+
295
+ const link = g.selectAll('line')
296
+ .data(graph.links)
297
+ .join('line')
298
+ .attr('class', d => d.cand ? 'link cand' : 'link');
299
+
300
+ const linkLbl = g.selectAll('text.link-label')
301
+ .data(graph.links)
302
+ .join('text')
303
+ .attr('class', 'link-label')
304
+ .text(d => d.label);
305
+
306
+ const node = g.selectAll('g.node')
307
+ .data(graph.nodes)
308
+ .join('g')
309
+ .attr('class', d => `node ${d.cls}`)
310
+ .call(d3.drag().on('start', dragStart).on('drag', dragged).on('end', dragEnd));
311
+
312
+ node.filter(d => d.cls==='base').append('image')
313
+ .attr('xlink:href', 'hf-logo.svg').attr('x', -30).attr('y', -30).attr('width', 60).attr('height', 60);
314
+ node.filter(d => d.cls!=='base').append('circle').attr('r', d => 20*d.sz);
315
+ node.append('text').attr('class','node-label').attr('dy','-2.4em').text(d => d.id);
316
+
317
+ const sim = d3.forceSimulation(graph.nodes)
318
+ .force('link', d3.forceLink(graph.links).id(d => d.id).distance(520)) // tighter links
319
+ .force('charge', d3.forceManyBody().strength(-600)) // weaker repulsion
320
+ .force('center', d3.forceCenter(W / 2, H / 2))
321
+ .force('collide', d3.forceCollide(d => d.cls === 'base' ? 50 : 50)); // smaller bubble spacing
322
+
323
+
324
+ sim.on('tick', () => {
325
+ link.attr('x1', d=>d.source.x).attr('y1', d=>d.source.y)
326
+ .attr('x2', d=>d.target.x).attr('y2', d=>d.target.y);
327
+ linkLbl.attr('x', d=> (d.source.x+d.target.x)/2)
328
+ .attr('y', d=> (d.source.y+d.target.y)/2);
329
+ node.attr('transform', d=>`translate(${d.x},${d.y})`);
330
+ });
331
+
332
+ function dragStart(e,d){ if(!e.active) sim.alphaTarget(.3).restart(); d.fx=d.x; d.fy=d.y; }
333
+ function dragged(e,d){ d.fx=e.x; d.fy=e.y; }
334
+ function dragEnd(e,d){ if(!e.active) sim.alphaTarget(0); d.fx=d.fy=null; }
335
+ """
336
+
337
+ HTML = """
338
+ <!DOCTYPE html>
339
+ <html lang='en'><head><meta charset='UTF-8'>
340
+ <title>Transformers modular graph</title>
341
+ <style>__CSS__</style></head><body>
342
+ <div id='legend'>
343
+ 🟑 base<br>πŸ”΅ modular<br>πŸ”΄ candidate<br>red edgeΒ = high embedding similarity<br><br>
344
+ <label><input type="checkbox" id="toggleRed" checked> Show candidates edges and nodes</label>
345
+ </div>
346
+ <svg id='dependency'></svg>
347
+ <script src='https://d3js.org/d3.v7.min.js'></script>
348
+ <script>__JS__</script></body></html>
349
+ """
350
+
351
+ # ────────────────────────────────────────────────────────────────────────────────
352
+ # HTML writer
353
+ # ────────────────────────────────────────────────────────────────────────────────
354
+
355
+ def write_html(graph_data: dict, path: Path):
356
+ path.write_text(generate_html(graph_data), encoding="utf-8")
357
+
358
+
359
+ # ────────────────────────────────────────────────────────────────────────────────
360
+ # MAIN
361
+ # ────────────────────────────────────────────────────────────────────────────────
362
+
363
+ def main():
364
+ ap = argparse.ArgumentParser(description="Visualise modular dependencies + candidates")
365
+ ap.add_argument("transformers", help="Path to local πŸ€— transformers repo root")
366
+ ap.add_argument("--multimodal", action="store_true", help="filter to models with β‰₯3 'pixel_values'")
367
+ ap.add_argument("--sim-threshold", type=float, default=SIM_DEFAULT)
368
+ ap.add_argument("--out", default=HTML_DEFAULT)
369
+ ap.add_argument("--sim-method", choices=["jaccard", "embedding"], default="jaccard",
370
+ help="Similarity method: 'jaccard' or 'embedding'")
371
+ args = ap.parse_args()
372
+
373
+ graph = build_graph_json(
374
+ transformers_dir=Path(args.transformers).expanduser().resolve(),
375
+ threshold=args.sim_threshold,
376
+ multimodal=args.multimodal,
377
+ sim_method=args.sim_method,
378
+ )
379
+ write_html(graph, Path(args.out).expanduser())
380
+
381
+ if __name__ == "__main__":
382
+ main()