vscode-python312 / Dockerfile
miike-ai's picture
Update Dockerfile
a9ac1e7 verified
FROM ubuntu:22.04
# Prevent interactive prompts
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
ENV HF_HUB_ENABLE_HF_TRANSFER=1
ENV CUDA_HOME=/usr/local/cuda
ENV PATH=/usr/local/cuda/bin:${PATH}
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH}
# Install base system dependencies
RUN apt-get update && apt-get install -y \
wget \
curl \
git \
vim \
tmux \
htop \
build-essential \
software-properties-common \
ca-certificates \
gnupg \
lsb-release \
sudo \
openssh-server \
nginx \
supervisor \
python3.11 \
python3.11-dev \
python3-pip \
&& rm -rf /var/lib/apt/lists/*
# Add NVIDIA package repositories
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
dpkg -i cuda-keyring_1.1-1_all.deb && \
apt-get update
# Install CUDA 12.8
RUN apt-get install -y cuda-toolkit-12-8 && \
rm -rf /var/lib/apt/lists/*
# Install cuDNN
RUN apt-get update && apt-get install -y \
libcudnn9-cuda-12 \
libcudnn9-dev-cuda-12 \
&& rm -rf /var/lib/apt/lists/*
# Set Python 3.11 as default
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 && \
update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
# Upgrade pip
RUN python -m pip install --upgrade pip setuptools wheel
# Install PyTorch with CUDA 12.8 support
RUN pip install torch==2.5.1+cu128 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
# Create non-root user for HF Spaces compatibility
RUN useradd -m -u 1000 user && \
echo "user ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
# Install code-server (VSCode in browser)
RUN curl -fsSL https://code-server.dev/install.sh | sh
# Install Ollama
RUN curl -fsSL https://ollama.com/install.sh | sh
# Install hf_transfer first
RUN pip install hf_transfer
# Install core ML packages
RUN pip install \
accelerate \
transformers \
datasets \
peft \
bitsandbytes \
safetensors \
sentencepiece \
protobuf \
scipy \
einops \
wandb \
tensorboard \
gradio \
streamlit
# Install vLLM
RUN pip install vllm
# Install Flash Attention 2
RUN pip install ninja packaging && \
pip install flash-attn --no-build-isolation
# Install Triton for 5090 support
RUN pip install triton
# Clone and install Unsloth with patches
RUN git clone https://github.com/unslothai/unsloth.git /tmp/unsloth && \
cd /tmp/unsloth && \
pip install -e . && \
cd / && \
rm -rf /tmp/unsloth/.git
# Clone and install Axolotl
RUN git clone https://github.com/axolotl-ai-cloud/axolotl /tmp/axolotl && \
cd /tmp/axolotl && \
pip install -e . && \
cd / && \
rm -rf /tmp/axolotl/.git
# Install Node.js for Open-WebUI
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
apt-get install -y nodejs && \
rm -rf /var/lib/apt/lists/*
# Clone and setup Open-WebUI
RUN git clone https://github.com/open-webui/open-webui.git /opt/open-webui && \
cd /opt/open-webui && \
npm install && \
npm run build
# Create directories with proper permissions
RUN mkdir -p /home/user/app /home/user/.cache /home/user/.config && \
chown -R user:user /home/user
# Configure code-server for user
RUN mkdir -p /home/user/.config/code-server && \
echo "bind-addr: 0.0.0.0:8080\nauth: none\ncert: false" > /home/user/.config/code-server/config.yaml && \
chown -R user:user /home/user/.config
# Setup SSH
RUN mkdir /var/run/sshd && \
echo 'user:spaces' | chpasswd && \
sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin no/' /etc/ssh/sshd_config
# Create supervisor config
RUN mkdir -p /etc/supervisor/conf.d
RUN cat > /etc/supervisor/conf.d/services.conf << 'EOF'
[supervisord]
nodaemon=true
user=root
[program:code-server]
command=sudo -u user code-server --bind-addr 0.0.0.0:8080 --auth none
autostart=true
autorestart=true
stderr_logfile=/var/log/code-server.err.log
stdout_logfile=/var/log/code-server.out.log
[program:ollama]
command=ollama serve
autostart=true
autorestart=true
environment=OLLAMA_HOST="0.0.0.0",HOME="/home/user"
stderr_logfile=/var/log/ollama.err.log
stdout_logfile=/var/log/ollama.out.log
[program:open-webui]
command=cd /opt/open-webui && npm start
autostart=true
autorestart=true
environment=PORT="3000",OLLAMA_BASE_URL="http://localhost:11434"
stderr_logfile=/var/log/open-webui.err.log
stdout_logfile=/var/log/open-webui.out.log
user=user
EOF
# Create Gradio app for HF Spaces
RUN cat > /home/user/app/app.py << 'EOF'
import gradio as gr
import subprocess
import os
def get_services_status():
services = {
"VSCode": "http://localhost:8080",
"Ollama API": "http://localhost:11434",
"Open-WebUI": "http://localhost:3000",
"vLLM": "http://localhost:8000"
}
status = "# ๐Ÿš€ ML Stack Services Status\n\n"
for service, url in services.items():
status += f"- **{service}**: {url}\n"
# Get GPU info
try:
gpu_info = subprocess.check_output(['nvidia-smi', '--query-gpu=name,memory.total', '--format=csv,noheader'], text=True)
status += f"\n## ๐ŸŽฎ GPU Status\n```\n{gpu_info}```"
except:
status += "\n## โš ๏ธ No GPUs detected"
return status
def launch_vllm(model_name):
try:
cmd = f"python -m vllm.entrypoints.openai.api_server --model {model_name} --host 0.0.0.0 --port 8000"
subprocess.Popen(cmd, shell=True)
return f"โœ… Launching vLLM with model: {model_name}"
except Exception as e:
return f"โŒ Error: {str(e)}"
def pull_ollama_model(model_name):
try:
result = subprocess.run(['ollama', 'pull', model_name], capture_output=True, text=True)
return f"โœ… {result.stdout}\n{result.stderr}"
except Exception as e:
return f"โŒ Error: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="ML Stack Control Panel") as demo:
gr.Markdown("# ๐ŸŽฎ RunPod ML Stack Control Panel")
with gr.Tab("Status"):
status_btn = gr.Button("๐Ÿ”„ Refresh Status")
status_output = gr.Markdown()
status_btn.click(get_services_status, outputs=status_output)
with gr.Tab("vLLM"):
model_input = gr.Textbox(label="Model Name", value="meta-llama/Llama-2-7b-hf")
vllm_btn = gr.Button("๐Ÿš€ Launch vLLM")
vllm_output = gr.Textbox(label="Output")
vllm_btn.click(launch_vllm, inputs=model_input, outputs=vllm_output)
with gr.Tab("Ollama"):
ollama_model = gr.Textbox(label="Model Name", value="llama3.2")
ollama_btn = gr.Button("๐Ÿ“ฅ Pull Model")
ollama_output = gr.Textbox(label="Output")
ollama_btn.click(pull_ollama_model, inputs=ollama_model, outputs=ollama_output)
# Load initial status
demo.load(get_services_status, outputs=status_output)
if __name__ == "__main__":
# Start supervisor in background
subprocess.Popen(["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"])
# Launch Gradio
demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
EOF
# Create startup script
RUN cat > /home/user/app/start.sh << 'EOF'
#!/bin/bash
cd /home/user/app
python app.py
EOF
RUN chmod +x /home/user/app/start.sh
# Fix permissions
RUN chown -R user:user /home/user /opt/open-webui
# Expose ports
EXPOSE 22 # SSH
EXPOSE 7860 # Gradio (HF Spaces default)
EXPOSE 8080 # Code-server
EXPOSE 11434 # Ollama
EXPOSE 8000 # vLLM
EXPOSE 3000 # Open-WebUI
# Switch to user
USER user
WORKDIR /home/user/app
# Set the entrypoint for HF Spaces
CMD ["python", "app.py"]