MindMedic / download_assets.py
MoizK's picture
Update download_assets.py
67898d2 verified
raw
history blame
2.08 kB
from huggingface_hub import hf_hub_download
import os
def download_assets():
"""Download necessary assets from Hugging Face Hub."""
# ensure local dirs exist
os.makedirs("data", exist_ok=True)
os.makedirs("vectorstore/db_faiss", exist_ok=True)
repo_id = "MoizK/mindmedic-assets"
repo_type = "dataset"
# allow either env var name
token = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HUGGINGFACE_API_TOKEN")
# list of PDF filenames at the repo root
pdf_files = [
"71763-gale-encyclopedia-of-medicine.-vol.-1.-2nd-ed.pdf",
"Depression-NIM-2024.pdf",
"Depression-and-Other-Common-Mental-Disorders-Global-Health-Estimates.pdf",
"Doing-What-Matters-in-Times-of-Stress.pdf",
"Generalized-Anxiety-Disorder-When-Worry-Gets-Out-of-Control.pdf",
"WHO-mhGAP-Intervention-Guide-v2.pdf",
"social-anxiety-disorder-more-than-just-shyness.pdf",
]
for fname in pdf_files:
try:
path = hf_hub_download(
repo_id=repo_id,
repo_type=repo_type,
filename=fname,
local_dir="data",
token=token,
)
print(f"✅ Downloaded {fname}{path}")
except Exception as e:
print(f"⚠️ Failed to download {fname}: {e}")
# Now fetch the FAISS index files from the nested path
index_files = ["index.faiss", "index.pkl"]
for fname in index_files:
remote_path = f"vectorstore/db_faiss/{fname}"
try:
path = hf_hub_download(
repo_id=repo_id,
repo_type=repo_type,
filename=remote_path, # this is where they actually live in the repo
local_dir=".", # so that ./vectorstore/db_faiss/... is created
token=token,
)
print(f"✅ Downloaded {remote_path}{path}")
except Exception as e:
print(f"⚠️ Failed to download {remote_path}: {e}")
if __name__ == "__main__":
download_assets()