Molbap HF Staff commited on
Commit
c410e03
Β·
1 Parent(s): 594f222
Files changed (1) hide show
  1. app.py +28 -25
app.py CHANGED
@@ -12,44 +12,44 @@ from pathlib import Path
12
 
13
  import gradio as gr
14
 
15
- # β€”β€” your refactored helpers β€”β€”
16
- # β€’ build_graph_json(transformers_dir: Path, threshold: float, multimodal: bool, sim_method: str) -> dict
17
- # β€’ generate_html(graph: dict) -> str (returns full <html>… string)
18
  from modular_graph_and_candidates import build_graph_json, generate_html
19
 
20
  HF_MAIN_REPO = "https://github.com/huggingface/transformers"
21
 
 
22
 
23
  @lru_cache(maxsize=4)
24
  def clone_or_cache(repo_url: str) -> Path:
25
- """Clone *repo_url* at most **once per 24β€―h**.
26
-
27
- The repo is cached under /tmp/<hash>. A hidden ``.cloned_at`` file stores the
28
- UTC ISO timestamp of the last clone; if that stamp is <β€―24β€―h old we reuse the
29
- existing checkout, otherwise we wipe the directory and clone afresh. This
30
- guarantees deterministic daily snapshots while avoiding repeated network
31
- cost within the same day (even across independent Space sessions if the
32
- container persists).
33
- """
34
  tmp_root = Path(tempfile.gettempdir())
35
  cache_dir = tmp_root / f"repo_{abs(hash(repo_url))}"
36
  stamp = cache_dir / ".cloned_at"
37
 
38
  if cache_dir.exists() and stamp.exists():
39
  try:
40
- last = datetime.fromisoformat(stamp.read_text().strip())
41
- if datetime.utcnow() - last < timedelta(days=1):
42
- return cache_dir # fresh enough
43
  except Exception:
44
- # malformed stamp β†’ fall through to re‑clone
45
- pass
46
- # stale cache β†’ remove dir completely
47
  shutil.rmtree(cache_dir, ignore_errors=True)
48
 
49
  subprocess.check_call(["git", "clone", "--depth", "1", repo_url, str(cache_dir)])
50
  stamp.write_text(datetime.utcnow().isoformat())
51
  return cache_dir
52
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  def run(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
55
  repo_path = clone_or_cache(repo_url)
@@ -61,15 +61,18 @@ def run(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
61
  sim_method=sim_method,
62
  )
63
 
64
- html = generate_html(graph)
65
 
66
- # Save graph JSON to a temp file so the user can download it.
67
- json_path = Path(tempfile.mktemp(suffix=".json"))
68
- json_path.write_text(json.dumps(graph), encoding="utf-8")
69
-
70
- return html, str(json_path)
71
 
 
 
 
72
 
 
73
 
74
  CUSTOM_CSS = """
75
  #graph_html iframe {height:85vh !important; width:100% !important; border:none;}
@@ -85,7 +88,7 @@ with gr.Blocks(css=CUSTOM_CSS) as demo:
85
  sim_radio = gr.Radio(["jaccard", "embedding"], value="jaccard", label="Similarity metric")
86
  go_btn = gr.Button("Build graph")
87
 
88
- html_out = gr.HTML(elem_id="graph_html", show_label=False)
89
  json_out = gr.File(label="Download graph.json")
90
 
91
  go_btn.click(run, [repo_in, thresh, multi_cb, sim_radio], [html_out, json_out])
 
12
 
13
  import gradio as gr
14
 
15
+ # β€”β€” refactored helpers β€”β€”
 
 
16
  from modular_graph_and_candidates import build_graph_json, generate_html
17
 
18
  HF_MAIN_REPO = "https://github.com/huggingface/transformers"
19
 
20
+ # ───────────────────────────── cache repo once per 24β€―h ───────────────────────────
21
 
22
  @lru_cache(maxsize=4)
23
  def clone_or_cache(repo_url: str) -> Path:
24
+ """Shallow‑clone *repo_url* and reuse it for 24β€―h."""
 
 
 
 
 
 
 
 
25
  tmp_root = Path(tempfile.gettempdir())
26
  cache_dir = tmp_root / f"repo_{abs(hash(repo_url))}"
27
  stamp = cache_dir / ".cloned_at"
28
 
29
  if cache_dir.exists() and stamp.exists():
30
  try:
31
+ if datetime.utcnow() - datetime.fromisoformat(stamp.read_text().strip()) < timedelta(days=1):
32
+ return cache_dir
 
33
  except Exception:
34
+ pass # fall through β†’ reclone
 
 
35
  shutil.rmtree(cache_dir, ignore_errors=True)
36
 
37
  subprocess.check_call(["git", "clone", "--depth", "1", repo_url, str(cache_dir)])
38
  stamp.write_text(datetime.utcnow().isoformat())
39
  return cache_dir
40
 
41
+ # ───────────────────────────── main callback ─────────────────────────────────────
42
+
43
+ def _escape_srcdoc(text: str) -> str:
44
+ """Escape for inclusion inside an <iframe srcdoc="…"> attribute."""
45
+ return (
46
+ text.replace("&", "&amp;")
47
+ .replace("\"", "&quot;")
48
+ .replace("'", "&#x27;")
49
+ .replace("<", "&lt;")
50
+ .replace(">", "&gt;")
51
+ )
52
+
53
 
54
  def run(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
55
  repo_path = clone_or_cache(repo_url)
 
61
  sim_method=sim_method,
62
  )
63
 
64
+ raw_html = generate_html(graph)
65
 
66
+ iframe_html = (
67
+ f'<iframe style="width:100%;height:85vh;border:none;" '
68
+ f'srcdoc="{_escape_srcdoc(raw_html)}"></iframe>'
69
+ )
 
70
 
71
+ tmp_json = Path(tempfile.mktemp(suffix=".json"))
72
+ tmp_json.write_text(json.dumps(graph), encoding="utf-8")
73
+ return iframe_html, str(tmp_json)
74
 
75
+ # ───────────────────────────── UI ────────────────────────────────────────────────
76
 
77
  CUSTOM_CSS = """
78
  #graph_html iframe {height:85vh !important; width:100% !important; border:none;}
 
88
  sim_radio = gr.Radio(["jaccard", "embedding"], value="jaccard", label="Similarity metric")
89
  go_btn = gr.Button("Build graph")
90
 
91
+ html_out = gr.HTML(elem_id="graph_html", sanitize=False, show_label=False)
92
  json_out = gr.File(label="Download graph.json")
93
 
94
  go_btn.click(run, [repo_in, thresh, multi_cb, sim_radio], [html_out, json_out])