Molbap HF Staff commited on
Commit
214d223
Β·
1 Parent(s): a9aba5d

cache utils

Browse files
Files changed (2) hide show
  1. app.py +2 -10
  2. modular_graph_and_candidates.py +4 -0
app.py CHANGED
@@ -52,16 +52,8 @@ def _escape_srcdoc(text: str) -> str:
52
 
53
 
54
  def run(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
55
- # Check if we can use cached embeddings for embedding similarity
56
- embeddings_cache = Path("embeddings_cache.npz")
57
-
58
- if sim_method == "embedding" and embeddings_cache.exists():
59
- print("πŸš€ Using cached embeddings - skipping repo download")
60
- # Use a dummy path since we won't need the actual repo
61
- repo_path = Path("/tmp/dummy")
62
- else:
63
- print("πŸ“₯ Downloading/updating repository")
64
- repo_path = clone_or_cache(repo_url)
65
 
66
  graph = build_graph_json(
67
  transformers_dir=repo_path,
 
52
 
53
 
54
  def run(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
55
+ # Always download repo for now - let build_graph_json decide if it needs it
56
+ repo_path = clone_or_cache(repo_url)
 
 
 
 
 
 
 
 
57
 
58
  graph = build_graph_json(
59
  transformers_dir=repo_path,
modular_graph_and_candidates.py CHANGED
@@ -317,10 +317,14 @@ def build_graph_json(
317
 
318
  # Check if we can use cached embeddings only
319
  embeddings_cache = Path("embeddings_cache.npz")
 
 
320
  if sim_method == "embedding" and embeddings_cache.exists():
321
  try:
322
  # Try to compute from cache without accessing repo
323
  cached_sims = compute_similarities_from_cache(threshold)
 
 
324
  if cached_sims:
325
  # Create minimal graph with cached data
326
  cached_data = np.load(embeddings_cache, allow_pickle=True)
 
317
 
318
  # Check if we can use cached embeddings only
319
  embeddings_cache = Path("embeddings_cache.npz")
320
+ print(f"πŸ” Cache file exists: {embeddings_cache.exists()}, sim_method: {sim_method}")
321
+
322
  if sim_method == "embedding" and embeddings_cache.exists():
323
  try:
324
  # Try to compute from cache without accessing repo
325
  cached_sims = compute_similarities_from_cache(threshold)
326
+ print(f"πŸ” Got {len(cached_sims)} cached similarities")
327
+
328
  if cached_sims:
329
  # Create minimal graph with cached data
330
  cached_data = np.load(embeddings_cache, allow_pickle=True)