# app.py – Gradio Space wrapper for modular_graph_and_candidates from __future__ import annotations import json import shutil import subprocess import tempfile from datetime import datetime, timedelta from functools import lru_cache from pathlib import Path import gradio as gr # —— your refactored helpers —— # • build_graph_json(transformers_dir: Path, threshold: float, multimodal: bool, sim_method: str) -> dict # • generate_html(graph: dict) -> str (returns full … string) from modular_graph_and_candidates import build_graph_json, generate_html HF_MAIN_REPO = "https://github.com/huggingface/transformers" @lru_cache(maxsize=4) def clone_or_cache(repo_url: str) -> Path: """Clone *repo_url* at most **once per 24 h**. The repo is cached under /tmp/. A hidden ``.cloned_at`` file stores the UTC ISO timestamp of the last clone; if that stamp is < 24 h old we reuse the existing checkout, otherwise we wipe the directory and clone afresh. This guarantees deterministic daily snapshots while avoiding repeated network cost within the same day (even across independent Space sessions if the container persists). """ tmp_root = Path(tempfile.gettempdir()) cache_dir = tmp_root / f"repo_{abs(hash(repo_url))}" stamp = cache_dir / ".cloned_at" if cache_dir.exists() and stamp.exists(): try: last = datetime.fromisoformat(stamp.read_text().strip()) if datetime.utcnow() - last < timedelta(days=1): return cache_dir # fresh enough except Exception: # malformed stamp → fall through to re‑clone pass # stale cache → remove dir completely shutil.rmtree(cache_dir, ignore_errors=True) subprocess.check_call(["git", "clone", "--depth", "1", repo_url, str(cache_dir)]) stamp.write_text(datetime.utcnow().isoformat()) return cache_dir def run(repo_url: str, threshold: float, multimodal: bool, sim_method: str): repo_path = clone_or_cache(repo_url) graph = build_graph_json( transformers_dir=repo_path, threshold=threshold, multimodal=multimodal, sim_method=sim_method, ) html = generate_html(graph) # Save graph JSON to a temp file so the user can download it. json_path = Path(tempfile.mktemp(suffix=".json")) json_path.write_text(json.dumps(graph), encoding="utf-8") return html, str(json_path) CUSTOM_CSS = """ #graph_html iframe {height:85vh !important; width:100% !important; border:none;} """ with gr.Blocks(css=CUSTOM_CSS) as demo: gr.Markdown("## 🔍 Modular‑candidate explorer for 🤗 Transformers") with gr.Row(): repo_in = gr.Text(value=HF_MAIN_REPO, label="Repo / fork URL") thresh = gr.Slider(0.50, 0.95, value=0.78, step=0.01, label="Similarity ≥") multi_cb = gr.Checkbox(label="Only multimodal models") sim_radio = gr.Radio(["jaccard", "embedding"], value="jaccard", label="Similarity metric") go_btn = gr.Button("Build graph") html_out = gr.HTML(elem_id="graph_html", sanitize=False, show_label=False) json_out = gr.File(label="Download graph.json") go_btn.click(run, [repo_in, thresh, multi_cb, sim_radio], [html_out, json_out]) if __name__ == "__main__": demo.launch()