Spaces:
Runtime error
Runtime error
Roll back Docker and start.sh so that llama.cpp is compiled in start.sh
Browse files- Dockerfile +3 -14
- start.sh +19 -1
Dockerfile
CHANGED
@@ -9,6 +9,7 @@ RUN apt-get update && \
|
|
9 |
wget \
|
10 |
curl \
|
11 |
cmake \
|
|
|
12 |
build-essential \
|
13 |
libssl-dev \
|
14 |
zlib1g-dev \
|
@@ -22,7 +23,7 @@ RUN apt-get update && \
|
|
22 |
libxmlsec1-dev \
|
23 |
libffi-dev \
|
24 |
liblzma-dev \
|
25 |
-
ffmpeg \
|
26 |
libcurl4-openssl-dev \
|
27 |
nvidia-driver-570
|
28 |
|
@@ -43,22 +44,10 @@ RUN pyenv install ${PYTHON_VERSION} && \
|
|
43 |
pip install --no-cache-dir -U pip setuptools wheel && \
|
44 |
pip install "huggingface-hub" "hf-transfer" "gradio[oauth]>=4.28.0" "gradio_huggingfacehub_search==0.0.8" "APScheduler"
|
45 |
|
46 |
-
|
47 |
RUN git clone https://github.com/ggerganov/llama.cpp /app/llama.cpp
|
48 |
RUN pip install -r /app/llama.cpp/requirements.txt
|
49 |
|
50 |
-
# Build llama.cpp
|
51 |
-
ARG RUN_LOCALLY=false
|
52 |
-
ENV GGML_CUDA=ON
|
53 |
-
RUN if [ "$RUN_LOCALLY" = "true" ]; then export GGML_CUDA=OFF; fi && \
|
54 |
-
echo "GGML_CUDA=$GGML_CUDA" && \
|
55 |
-
cd /app/llama.cpp && \
|
56 |
-
cmake -B build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=${GGML_CUDA} && \
|
57 |
-
cmake --build build --config Release -j2 --target llama-quantize llama-gguf-split llama-imatrix && \
|
58 |
-
cp ./build/bin/llama-* . && \
|
59 |
-
rm -rf build
|
60 |
-
|
61 |
-
COPY --chown=1000 . /app
|
62 |
COPY groups_merged.txt /app/llama.cpp/
|
63 |
|
64 |
ENV PYTHONPATH=/app \
|
|
|
9 |
wget \
|
10 |
curl \
|
11 |
cmake \
|
12 |
+
# python build dependencies \
|
13 |
build-essential \
|
14 |
libssl-dev \
|
15 |
zlib1g-dev \
|
|
|
23 |
libxmlsec1-dev \
|
24 |
libffi-dev \
|
25 |
liblzma-dev \
|
26 |
+
ffmpeg \
|
27 |
libcurl4-openssl-dev \
|
28 |
nvidia-driver-570
|
29 |
|
|
|
44 |
pip install --no-cache-dir -U pip setuptools wheel && \
|
45 |
pip install "huggingface-hub" "hf-transfer" "gradio[oauth]>=4.28.0" "gradio_huggingfacehub_search==0.0.8" "APScheduler"
|
46 |
|
47 |
+
COPY --chown=1000 . /app
|
48 |
RUN git clone https://github.com/ggerganov/llama.cpp /app/llama.cpp
|
49 |
RUN pip install -r /app/llama.cpp/requirements.txt
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
COPY groups_merged.txt /app/llama.cpp/
|
52 |
|
53 |
ENV PYTHONPATH=/app \
|
start.sh
CHANGED
@@ -1,4 +1,22 @@
|
|
1 |
#!/bin/bash
|
2 |
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
python app.py
|
|
|
1 |
#!/bin/bash
|
2 |
|
3 |
+
if [ ! -d "llama.cpp" ]; then
|
4 |
+
# only run in dev env
|
5 |
+
git clone https://github.com/ggerganov/llama.cpp
|
6 |
+
fi
|
7 |
+
|
8 |
+
export GGML_CUDA=OFF
|
9 |
+
if [[ -z "${RUN_LOCALLY}" ]]; then
|
10 |
+
# enable CUDA if NOT running locally
|
11 |
+
export GGML_CUDA=ON
|
12 |
+
fi
|
13 |
+
echo "GGML_CUDA=$GGML_CUDA"
|
14 |
+
|
15 |
+
cd llama.cpp
|
16 |
+
cmake -B build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=${GGML_CUDA}
|
17 |
+
cmake --build build --config Release -j --target llama-quantize llama-gguf-split llama-imatrix
|
18 |
+
cp ./build/bin/llama-* .
|
19 |
+
rm -rf build
|
20 |
+
|
21 |
+
cd ..
|
22 |
python app.py
|