FROM ghcr.io/ggml-org/llama.cpp:full | |
# Install wget | |
RUN apt update && apt install wget -y | |
# Download the GGUF model file | |
RUN wget "https://huggingface.co/ggml-org/SmolVLM-256M-Instruct-GGUF/resolve/main/SmolVLM-256M-Instruct-Q8_0.gguf" -O /smoll.gguf | |
# Download the mmproj file | |
RUN wget "https://huggingface.co/ggml-org/SmolVLM-256M-Instruct-GGUF/resolve/main/mmproj-SmolVLM-256M-Instruct-Q8_0.gguf" -O /mmproj.gguf | |
#EXPOSE 7860 | |
# Default command to start the model server | |
CMD [ "--server", "-m", "/smoll.gguf", "--mmproj", "/mmproj.gguf", "--port", "7860", "--host", "0.0.0.0", "-n", "512", "-t", "2"] |