Spaces:
Paused
Paused
# GPU‑fertige Basis mit Python 3.10, CUDA 12.1, cuDNN 8 | |
FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime | |
# System‑Tools (schlank halten!) | |
RUN apt-get update && \ | |
apt-get install -y git-lfs build-essential bash \ | |
git \ | |
wget curl procps gnupg \ | |
cmake \ | |
htop vim nano && \ | |
rm -rf /var/lib/apt/lists/* | |
# Non‑root‑User, weil Spaces das mögen | |
RUN useradd -m -u 1000 user | |
USER user | |
WORKDIR /app | |
ENV PATH="/home/user/.local/bin:$PATH" | |
#ENV HF_HOME=/app/.cache # HF‑Cache in deinem Schreibverzeichnis | |
# Python-Abhängigkeiten | |
COPY --chown=user requirements.txt . | |
RUN pip install --upgrade pip && \ | |
pip install --no-cache-dir -r requirements.txt | |
# Optional: flash‑attn (Ampere 86 wird erkannt, Wheel vorhanden) | |
# fertiges Wheel laden (kein nvcc) | |
RUN pip install --no-cache-dir \ | |
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl | |
# Mount das geheime HF‑Token beim Build: | |
# Settings → Secrets → Name: HF_TOKEN (scope: "read") | |
#ARG HF_TOKEN | |
#RUN --mount=type=secret,id=HF_TOKEN \ | |
# echo "machine huggingface.co login __token__ password $(cat /run/secrets/HF_TOKEN)" > ~/.netrc | |
# App‑Code | |
COPY --chown=user . /app | |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"] |