Spaces:

Antigma
/

quantize-my-repo

Runtime error

Brianpuz commited on Apr 22

Commit

b6beebe

1 Parent(s): 55c154d

Roll back Docker and start.sh so that llama.cpp is compiled in start.sh

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -9,6 +9,7 @@ RUN apt-get update && \
     wget \
     curl \
     cmake \
     build-essential \
     libssl-dev \
     zlib1g-dev \
@@ -22,7 +23,7 @@ RUN apt-get update && \
     libxmlsec1-dev \
     libffi-dev \
     liblzma-dev \
-    ffmpeg \
     libcurl4-openssl-dev \
     nvidia-driver-570
@@ -43,22 +44,10 @@ RUN pyenv install ${PYTHON_VERSION} && \
     pip install --no-cache-dir -U pip setuptools wheel && \
     pip install "huggingface-hub" "hf-transfer" "gradio[oauth]>=4.28.0" "gradio_huggingfacehub_search==0.0.8" "APScheduler"
-# Clone llama.cpp and install its requirements
 RUN git clone https://github.com/ggerganov/llama.cpp /app/llama.cpp
 RUN pip install -r /app/llama.cpp/requirements.txt
-# Build llama.cpp
-ARG RUN_LOCALLY=false
-ENV GGML_CUDA=ON
-RUN if [ "$RUN_LOCALLY" = "true" ]; then export GGML_CUDA=OFF; fi && \
-    echo "GGML_CUDA=$GGML_CUDA" && \
-    cd /app/llama.cpp && \
-    cmake -B build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=${GGML_CUDA} && \
-    cmake --build build --config Release -j2 --target llama-quantize llama-gguf-split llama-imatrix && \
-    cp ./build/bin/llama-* . && \
-    rm -rf build
-COPY --chown=1000 . /app
 COPY groups_merged.txt /app/llama.cpp/
 ENV PYTHONPATH=/app \

     wget \
     curl \
     cmake \
+    # python build dependencies \
     build-essential \
     libssl-dev \
     zlib1g-dev \
     libxmlsec1-dev \
     libffi-dev \
     liblzma-dev \
+    ffmpeg \
     libcurl4-openssl-dev \
     nvidia-driver-570
     pip install --no-cache-dir -U pip setuptools wheel && \
     pip install "huggingface-hub" "hf-transfer" "gradio[oauth]>=4.28.0" "gradio_huggingfacehub_search==0.0.8" "APScheduler"
+COPY --chown=1000 . /app
 RUN git clone https://github.com/ggerganov/llama.cpp /app/llama.cpp
 RUN pip install -r /app/llama.cpp/requirements.txt
 COPY groups_merged.txt /app/llama.cpp/
 ENV PYTHONPATH=/app \

start.sh CHANGED Viewed

@@ -1,4 +1,22 @@
 #!/bin/bash
-cd /app
 python app.py

 #!/bin/bash
+if [ ! -d "llama.cpp" ]; then
+  # only run in dev env
+  git clone https://github.com/ggerganov/llama.cpp
+fi
+export GGML_CUDA=OFF
+if [[ -z "${RUN_LOCALLY}" ]]; then
+  # enable CUDA if NOT running locally
+  export GGML_CUDA=ON
+fi
+echo "GGML_CUDA=$GGML_CUDA"
+cd llama.cpp
+cmake -B build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=${GGML_CUDA}
+cmake --build build --config Release -j --target llama-quantize llama-gguf-split llama-imatrix
+cp ./build/bin/llama-* .
+rm -rf build
+cd ..
 python app.py