Brianpuz commited on
Commit
b6beebe
·
1 Parent(s): 55c154d

Roll back Docker and start.sh so that llama.cpp is compiled in start.sh

Browse files
Files changed (2) hide show
  1. Dockerfile +3 -14
  2. start.sh +19 -1
Dockerfile CHANGED
@@ -9,6 +9,7 @@ RUN apt-get update && \
9
  wget \
10
  curl \
11
  cmake \
 
12
  build-essential \
13
  libssl-dev \
14
  zlib1g-dev \
@@ -22,7 +23,7 @@ RUN apt-get update && \
22
  libxmlsec1-dev \
23
  libffi-dev \
24
  liblzma-dev \
25
- ffmpeg \
26
  libcurl4-openssl-dev \
27
  nvidia-driver-570
28
 
@@ -43,22 +44,10 @@ RUN pyenv install ${PYTHON_VERSION} && \
43
  pip install --no-cache-dir -U pip setuptools wheel && \
44
  pip install "huggingface-hub" "hf-transfer" "gradio[oauth]>=4.28.0" "gradio_huggingfacehub_search==0.0.8" "APScheduler"
45
 
46
- # Clone llama.cpp and install its requirements
47
  RUN git clone https://github.com/ggerganov/llama.cpp /app/llama.cpp
48
  RUN pip install -r /app/llama.cpp/requirements.txt
49
 
50
- # Build llama.cpp
51
- ARG RUN_LOCALLY=false
52
- ENV GGML_CUDA=ON
53
- RUN if [ "$RUN_LOCALLY" = "true" ]; then export GGML_CUDA=OFF; fi && \
54
- echo "GGML_CUDA=$GGML_CUDA" && \
55
- cd /app/llama.cpp && \
56
- cmake -B build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=${GGML_CUDA} && \
57
- cmake --build build --config Release -j2 --target llama-quantize llama-gguf-split llama-imatrix && \
58
- cp ./build/bin/llama-* . && \
59
- rm -rf build
60
-
61
- COPY --chown=1000 . /app
62
  COPY groups_merged.txt /app/llama.cpp/
63
 
64
  ENV PYTHONPATH=/app \
 
9
  wget \
10
  curl \
11
  cmake \
12
+ # python build dependencies \
13
  build-essential \
14
  libssl-dev \
15
  zlib1g-dev \
 
23
  libxmlsec1-dev \
24
  libffi-dev \
25
  liblzma-dev \
26
+ ffmpeg \
27
  libcurl4-openssl-dev \
28
  nvidia-driver-570
29
 
 
44
  pip install --no-cache-dir -U pip setuptools wheel && \
45
  pip install "huggingface-hub" "hf-transfer" "gradio[oauth]>=4.28.0" "gradio_huggingfacehub_search==0.0.8" "APScheduler"
46
 
47
+ COPY --chown=1000 . /app
48
  RUN git clone https://github.com/ggerganov/llama.cpp /app/llama.cpp
49
  RUN pip install -r /app/llama.cpp/requirements.txt
50
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  COPY groups_merged.txt /app/llama.cpp/
52
 
53
  ENV PYTHONPATH=/app \
start.sh CHANGED
@@ -1,4 +1,22 @@
1
  #!/bin/bash
2
 
3
- cd /app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  python app.py
 
1
  #!/bin/bash
2
 
3
+ if [ ! -d "llama.cpp" ]; then
4
+ # only run in dev env
5
+ git clone https://github.com/ggerganov/llama.cpp
6
+ fi
7
+
8
+ export GGML_CUDA=OFF
9
+ if [[ -z "${RUN_LOCALLY}" ]]; then
10
+ # enable CUDA if NOT running locally
11
+ export GGML_CUDA=ON
12
+ fi
13
+ echo "GGML_CUDA=$GGML_CUDA"
14
+
15
+ cd llama.cpp
16
+ cmake -B build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=${GGML_CUDA}
17
+ cmake --build build --config Release -j --target llama-quantize llama-gguf-split llama-imatrix
18
+ cp ./build/bin/llama-* .
19
+ rm -rf build
20
+
21
+ cd ..
22
  python app.py