FROM python:3.12 #RUN pip install --no-cache-dir torch==2.2.2 #RUN pip install --no-cache-dir flash-attn RUN apt-get update && \ apt-get install -y \ bash \ git git-lfs \ wget curl procps gnupg \ build-essential cmake \ htop vim nano && \ rm -rf /var/lib/apt/lists/* # NVIDIA CUDA Keyring installieren (offizielle Methode seit 2024) RUN wget https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb && \ dpkg -i cuda-keyring_1.1-1_all.deb && \ apt-get update && \ apt-get -y install cuda # CUDA ENV-Variablen setzen ENV PATH=/usr/local/cuda/bin:${PATH} ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} ENV CUDAToolkit_ROOT=/usr/local/cuda ENV CMAKE_ARGS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=86" RUN useradd -m -u 1000 user USER user ENV PATH="/home/user/.local/bin:$PATH" # ^ when run as `user`, pip installs executables there WORKDIR /app COPY --chown=user . /app # Wichtig: Isolation deaktivieren für llama-cpp-python Build RUN pip install --upgrade pip # Manuell Build-Werkzeuge bereitstellen RUN pip install --no-cache-dir \ setuptools \ wheel \ scikit-build-core[pyproject] # Optional: zuerst llama-cpp-python bauen (cachebar) RUN pip install --no-cache-dir --no-build-isolation llama-cpp-python # Danach: outetts (zieht llama-cpp-python nicht erneut) RUN pip install --no-cache-dir --no-build-isolation outetts RUN pip install --no-cache-dir -r requirements.txt #CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", , "--ws", "auto", "--allow-websocket-origin", "*"] CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]