File size: 3,648 Bytes
53c0cc8
 
 
 
 
 
 
 
 
 
 
 
 
 
c410e03
53c0cc8
 
 
 
c410e03
53c0cc8
 
 
c410e03
53c0cc8
 
 
 
 
 
c410e03
 
53c0cc8
c410e03
53c0cc8
 
 
 
 
 
c410e03
 
 
 
 
 
 
 
 
 
 
 
53c0cc8
 
 
 
 
 
 
 
 
 
 
c410e03
53c0cc8
c410e03
 
 
 
53c0cc8
c410e03
 
 
53c0cc8
c410e03
ceffe7d
 
 
 
 
 
53c0cc8
 
 
 
 
 
 
 
 
df95764
53c0cc8
 
 
 
 
594f222
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# app.py – Gradio Space wrapper for modular_graph_and_candidates

from __future__ import annotations

import json
import shutil
import subprocess
import tempfile
from datetime import datetime, timedelta
from functools import lru_cache
from pathlib import Path

import gradio as gr

# β€”β€” refactored helpers β€”β€”
from modular_graph_and_candidates import build_graph_json, generate_html

HF_MAIN_REPO = "https://github.com/huggingface/transformers"

# ───────────────────────────── cache repo once per 24β€―h ───────────────────────────

@lru_cache(maxsize=4)
def clone_or_cache(repo_url: str) -> Path:
    """Shallow‑clone *repo_url* and reuse it for 24β€―h."""
    tmp_root = Path(tempfile.gettempdir())
    cache_dir = tmp_root / f"repo_{abs(hash(repo_url))}"
    stamp = cache_dir / ".cloned_at"

    if cache_dir.exists() and stamp.exists():
        try:
            if datetime.utcnow() - datetime.fromisoformat(stamp.read_text().strip()) < timedelta(days=1):
                return cache_dir
        except Exception:
            pass  # fall through β†’ reclone
        shutil.rmtree(cache_dir, ignore_errors=True)

    subprocess.check_call(["git", "clone", "--depth", "1", repo_url, str(cache_dir)])
    stamp.write_text(datetime.utcnow().isoformat())
    return cache_dir

# ───────────────────────────── main callback ─────────────────────────────────────

def _escape_srcdoc(text: str) -> str:
    """Escape for inclusion inside an <iframe srcdoc="…"> attribute."""
    return (
        text.replace("&", "&amp;")
            .replace("\"", "&quot;")
            .replace("'", "&#x27;")
            .replace("<", "&lt;")
            .replace(">", "&gt;")
    )


def run(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
    repo_path = clone_or_cache(repo_url)

    graph = build_graph_json(
        transformers_dir=repo_path,
        threshold=threshold,
        multimodal=multimodal,
        sim_method=sim_method,
    )

    raw_html = generate_html(graph)

    iframe_html = (
        f'<iframe style="width:100%;height:85vh;border:none;" '
        f'srcdoc="{_escape_srcdoc(raw_html)}"></iframe>'
    )

    tmp_json = Path(tempfile.mktemp(suffix=".json"))
    tmp_json.write_text(json.dumps(graph), encoding="utf-8")
    return iframe_html, str(tmp_json)

# ───────────────────────────── UI ────────────────────────────────────────────────

CUSTOM_CSS = """
#graph_html iframe {height:85vh !important; width:100% !important; border:none;}
"""

with gr.Blocks(css=CUSTOM_CSS) as demo:
    gr.Markdown("## πŸ” Modular‑candidate explorer for πŸ€— Transformers")

    with gr.Row():
        repo_in   = gr.Text(value=HF_MAIN_REPO, label="Repo / fork URL")
        thresh    = gr.Slider(0.50, 0.95, value=0.78, step=0.01, label="Similarity β‰₯")
        multi_cb  = gr.Checkbox(label="Only multimodal models")
        sim_radio = gr.Radio(["jaccard", "embedding"], value="jaccard", label="Similarity metric")
        go_btn    = gr.Button("Build graph")

    html_out  = gr.HTML(elem_id="graph_html", show_label=False)
    json_out  = gr.File(label="Download graph.json")

    go_btn.click(run, [repo_in, thresh, multi_cb, sim_radio], [html_out, json_out])

if __name__ == "__main__":
    demo.launch()