File size: 2,084 Bytes
a181620 760961f 2a65dde 760961f a181620 760961f a181620 2a65dde a181620 67898d2 a181620 760961f a181620 760961f a181620 2a65dde a181620 67898d2 760961f a181620 2a65dde a181620 760961f a181620 67898d2 2a65dde 67898d2 a181620 2a65dde a181620 67898d2 760961f a181620 67898d2 a181620 67898d2 a181620 67898d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
from huggingface_hub import hf_hub_download
import os
def download_assets():
"""Download necessary assets from Hugging Face Hub."""
# ensure local dirs exist
os.makedirs("data", exist_ok=True)
os.makedirs("vectorstore/db_faiss", exist_ok=True)
repo_id = "MoizK/mindmedic-assets"
repo_type = "dataset"
# allow either env var name
token = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HUGGINGFACE_API_TOKEN")
# list of PDF filenames at the repo root
pdf_files = [
"71763-gale-encyclopedia-of-medicine.-vol.-1.-2nd-ed.pdf",
"Depression-NIM-2024.pdf",
"Depression-and-Other-Common-Mental-Disorders-Global-Health-Estimates.pdf",
"Doing-What-Matters-in-Times-of-Stress.pdf",
"Generalized-Anxiety-Disorder-When-Worry-Gets-Out-of-Control.pdf",
"WHO-mhGAP-Intervention-Guide-v2.pdf",
"social-anxiety-disorder-more-than-just-shyness.pdf",
]
for fname in pdf_files:
try:
path = hf_hub_download(
repo_id=repo_id,
repo_type=repo_type,
filename=fname,
local_dir="data",
token=token,
)
print(f"✅ Downloaded {fname} → {path}")
except Exception as e:
print(f"⚠️ Failed to download {fname}: {e}")
# Now fetch the FAISS index files from the nested path
index_files = ["index.faiss", "index.pkl"]
for fname in index_files:
remote_path = f"vectorstore/db_faiss/{fname}"
try:
path = hf_hub_download(
repo_id=repo_id,
repo_type=repo_type,
filename=remote_path, # this is where they actually live in the repo
local_dir=".", # so that ./vectorstore/db_faiss/... is created
token=token,
)
print(f"✅ Downloaded {remote_path} → {path}")
except Exception as e:
print(f"⚠️ Failed to download {remote_path}: {e}")
if __name__ == "__main__":
download_assets()
|