MindMedic / download_assets.py
MoizK's picture
Update download_assets.py
d67650c verified
from huggingface_hub import hf_hub_download
import os
def download_assets():
"""Download necessary assets from Hugging Face Hub."""
# Prepare your local folders
os.makedirs("data", exist_ok=True)
os.makedirs("vectorstore/db_faiss", exist_ok=True)
repo_id = "MoizK/mindmedic-assets"
repo_type = "dataset"
token = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HUGGINGFACE_API_TOKEN")
# -- PDFs (these live at the repo root) --
pdf_files = [
"71763-gale-encyclopedia-of-medicine.-vol.-1.-2nd-ed.pdf",
"Depression-NIM-2024.pdf",
"Depression-and-Other-Common-Mental-Disorders-Global-Health-Estimates.pdf",
"Doing-What-Matters-in-Times-of-Stress.pdf",
"Generalized-Anxiety-Disorder-When-Worry-Gets-Out-of-Control.pdf",
"WHO-mhGAP-Intervention-Guide-v2.pdf",
"social-anxiety-disorder-more-than-just-shyness.pdf",
]
for fname in pdf_files:
try:
path = hf_hub_download(
repo_id=repo_id,
repo_type=repo_type,
filename=fname, # root-level file
local_dir="data", # save under ./data/
use_auth_token=token,
)
print(f"✅ Downloaded PDF {fname}{path}")
except Exception as e:
print(f"⚠️ Failed to download PDF {fname}: {e}")
# -- FAISS index files (nested under vectorstore/db_faiss in the repo) --
index_files = ["index.faiss", "index.pkl"]
for fname in index_files:
try:
path = hf_hub_download(
repo_id=repo_id,
repo_type=repo_type,
subfolder="vectorstore/db_faiss", # look in this folder in the repo
filename=fname, # just the filename here
local_dir="vectorstore/db_faiss", # save under ./vectorstore/db_faiss/
use_auth_token=token,
)
print(f"✅ Downloaded index file {fname}{path}")
except Exception as e:
print(f"⚠️ Failed to download index file {fname}: {e}")
if __name__ == "__main__":
download_assets()