Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,349 Bytes
53c0cc8 ceffe7d 53c0cc8 ceffe7d 53c0cc8 ceffe7d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
# app.py – Gradio Space wrapper for modular_graph_and_candidates
from __future__ import annotations
import json
import shutil
import subprocess
import tempfile
from datetime import datetime, timedelta
from functools import lru_cache
from pathlib import Path
import gradio as gr
# —— your refactored helpers ——
# • build_graph_json(transformers_dir: Path, threshold: float, multimodal: bool, sim_method: str) -> dict
# • generate_html(graph: dict) -> str (returns full <html>… string)
from modular_graph_and_candidates import build_graph_json, generate_html
HF_MAIN_REPO = "https://github.com/huggingface/transformers"
@lru_cache(maxsize=4)
def clone_or_cache(repo_url: str) -> Path:
"""Clone *repo_url* at most **once per 24 h**.
The repo is cached under /tmp/<hash>. A hidden ``.cloned_at`` file stores the
UTC ISO timestamp of the last clone; if that stamp is < 24 h old we reuse the
existing checkout, otherwise we wipe the directory and clone afresh. This
guarantees deterministic daily snapshots while avoiding repeated network
cost within the same day (even across independent Space sessions if the
container persists).
"""
tmp_root = Path(tempfile.gettempdir())
cache_dir = tmp_root / f"repo_{abs(hash(repo_url))}"
stamp = cache_dir / ".cloned_at"
if cache_dir.exists() and stamp.exists():
try:
last = datetime.fromisoformat(stamp.read_text().strip())
if datetime.utcnow() - last < timedelta(days=1):
return cache_dir # fresh enough
except Exception:
# malformed stamp → fall through to re‑clone
pass
# stale cache → remove dir completely
shutil.rmtree(cache_dir, ignore_errors=True)
subprocess.check_call(["git", "clone", "--depth", "1", repo_url, str(cache_dir)])
stamp.write_text(datetime.utcnow().isoformat())
return cache_dir
def run(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
repo_path = clone_or_cache(repo_url)
graph = build_graph_json(
transformers_dir=repo_path,
threshold=threshold,
multimodal=multimodal,
sim_method=sim_method,
)
html = generate_html(graph)
# Save graph JSON to a temp file so the user can download it.
json_path = Path(tempfile.mktemp(suffix=".json"))
json_path.write_text(json.dumps(graph), encoding="utf-8")
return html, str(json_path)
CUSTOM_CSS = """
#graph_html iframe {height:85vh !important; width:100% !important; border:none;}
"""
with gr.Blocks(css=CUSTOM_CSS) as demo:
gr.Markdown("## 🔍 Modular‑candidate explorer for 🤗 Transformers")
with gr.Row():
repo_in = gr.Text(value=HF_MAIN_REPO, label="Repo / fork URL")
thresh = gr.Slider(0.50, 0.95, value=0.78, step=0.01, label="Similarity ≥")
multi_cb = gr.Checkbox(label="Only multimodal models")
sim_radio = gr.Radio(["jaccard", "embedding"], value="jaccard", label="Similarity metric")
go_btn = gr.Button("Build graph")
html_out = gr.HTML(elem_id="graph_html", sanitize=False, show_label=False)
json_out = gr.File(label="Download graph.json")
go_btn.click(run, [repo_in, thresh, multi_cb, sim_radio], [html_out, json_out])
if __name__ == "__main__":
demo.launch()
|