Spaces:

miike-ai
/

vscode-python312

Running

App Files Files Community

miike-ai commited on Jun 28

Commit

f3df9f4

verified ·

1 Parent(s): ca4136c

Update Dockerfile

Browse files

Files changed (1) hide show

Dockerfile +335 -100

Dockerfile CHANGED Viewed

@@ -1,118 +1,353 @@
-FROM nvidia/cuda:12.8.0-devel-ubuntu22.04
 # Set environment variables
 ENV DEBIAN_FRONTEND=noninteractive
-ENV HOME=/root
-ENV CUDA_HOME=/usr/local/cuda
-ENV PATH=${CUDA_HOME}/bin:${PATH}
-ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
-ENV TORCH_CUDA_ARCH_LIST="12.0"
-# Install system dependencies and build tools
-RUN apt-get update && \
-    apt-get install -y \
-    curl wget gpg apt-transport-https git software-properties-common \
-    build-essential cmake ninja-build \
-    libopenblas-dev libomp-dev \
     && rm -rf /var/lib/apt/lists/*
-# Install Python 3.12 (as recommended)
-RUN add-apt-repository ppa:deadsnakes/ppa && \
-    apt-get update && \
-    apt-get install -y python3.11 python3.11-venv python3.11-dev && \
-    update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 && \
-    update-alternatives --set python3 /usr/bin/python3.11 && \
-    curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
-# Install uv for faster dependency management
-RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
-    echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc
-# Create workspace
-RUN mkdir -p /workspace
-WORKDIR /workspace
-# CRITICAL: Install in the EXACT order specified in the README
-# Step 1: Create venv
-RUN python3 -m venv /opt/unsloth-env
-ENV PATH="/opt/unsloth-env/bin:$PATH"
-# Step 2: Install vllm with cu128 (MUST be first)
-RUN /opt/unsloth-env/bin/pip install --upgrade pip setuptools wheel && \
-    /opt/unsloth-env/bin/pip install -U vllm --extra-index-url https://wheels.vllm.ai/nightly
-# Step 3: Install unsloth dependencies
-RUN /opt/unsloth-env/bin/pip install unsloth unsloth_zoo bitsandbytes
-# Step 4: Build xformers from source (no Blackwell wheels exist yet)
-RUN git clone --depth=1 https://github.com/facebookresearch/xformers --recursive /tmp/xformers && \
-    cd /tmp/xformers && \
-    /opt/unsloth-env/bin/pip uninstall -y xformers && \
-    /opt/unsloth-env/bin/python setup.py install && \
-    rm -rf /tmp/xformers
-# Step 5: Update triton to >=3.3.1 for Blackwell
-RUN /opt/unsloth-env/bin/pip install -U "triton>=3.3.1"
-# Step 6: Pin transformers to avoid gradient checkpointing bug
-RUN /opt/unsloth-env/bin/pip install -U "transformers==4.52.4"
-# Step 7: Might need to downgrade numpy
-RUN /opt/unsloth-env/bin/pip install "numpy<=2.2"
-# Now install your dev tools (code-server, Node.js, etc)
-RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \
-    apt-get install -y nodejs && \
-    curl -fsSL https://code-server.dev/install.sh | sh && \
-    npm install -g @anthropic-ai/claude-code @anthropic-ai/dxt && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-# Install Ollama
-RUN curl -fsSL https://ollama.com/install.sh | sh || true
 # Configure code-server
-RUN mkdir -p /root/.config/code-server /root/.ollama && \
-    echo "bind-addr: 0.0.0.0:8443\nauth: none\ncert: false" > /root/.config/code-server/config.yaml
-# Install VS Code extensions
-RUN code-server --install-extension ms-python.python && \
-    code-server --install-extension ritwickdey.LiveServer && \
-    code-server --install-extension ms-toolsai.jupyter
-# Create startup script with proper env activation
-RUN echo '#!/bin/bash\n\
-# Activate the unsloth environment\n\
-source /opt/unsloth-env/bin/activate\n\
-\n\
-# Start Ollama in the background\n\
-/usr/local/bin/ollama serve &\n\
-\n\
-# Give Ollama a moment to start\n\
-sleep 2\n\
-\n\
-# Start code-server with the activated environment\n\
-exec code-server --disable-telemetry --bind-addr 0.0.0.0:8443 /workspace\n\
-' > /start.sh && \
-    chmod +x /start.sh
-# Expose ports
-EXPOSE 8443 11434
-# Copy requirements.txt to workspace (will fail build if not present)
-COPY requirements.txt /workspace/requirements.txt
-# Create a script to install from requirements.txt with proper environment
-RUN echo '#!/bin/bash\n\
-echo "Installing from requirements.txt..."\n\
-source /opt/unsloth-env/bin/activate\n\
-pip install -r /workspace/requirements.txt\n\
-echo "Installation complete!"\n\
-' > /workspace/install-requirements.sh && \
-    chmod +x /workspace/install-requirements.sh
-# Optional: Install requirements.txt if you want it done at build time
-# Uncomment the next line if you want automatic installation
-RUN /opt/unsloth-env/bin/pip install -r /workspace/requirements.txt
-CMD ["/start.sh"]

+FROM runpod/pytorch:2.8.0-py3.11-cuda12.8.1-cudnn-devel-ubuntu22.04
 # Set environment variables
 ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+ENV HF_HUB_ENABLE_HF_TRANSFER=1
+ENV CUDA_VISIBLE_DEVICES=all
+ENV NVIDIA_VISIBLE_DEVICES=all
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
+ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+ENV PATH=/usr/local/cuda/bin:$PATH
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    wget \
+    curl \
+    git \
+    vim \
+    tmux \
+    htop \
+    nvtop \
+    build-essential \
+    software-properties-common \
+    ca-certificates \
+    gnupg \
+    lsb-release \
+    sudo \
+    openssh-server \
+    nginx \
+    supervisor \
     && rm -rf /var/lib/apt/lists/*
+# Install code-server (VSCode in browser)
+RUN curl -fsSL https://code-server.dev/install.sh | sh
+# Install Ollama
+RUN curl -fsSL https://ollama.com/install.sh | sh
+# Upgrade pip and install base Python packages
+RUN pip install --upgrade pip setuptools wheel
+# Install hf_transfer first for faster downloads
+RUN pip install hf_transfer
+# Install critical ML infrastructure
+RUN pip install \
+    accelerate \
+    transformers \
+    datasets \
+    peft \
+    bitsandbytes \
+    safetensors \
+    sentencepiece \
+    protobuf \
+    scipy \
+    einops \
+    wandb \
+    tensorboard
+# Install vLLM with CUDA 12.8 support
+RUN pip install vllm --extra-index-url https://download.pytorch.org/whl/cu128
+# Install Flash Attention 2 (critical for 5090)
+RUN pip install flash-attn --no-build-isolation
+# Install Unsloth with 5090 patches
+# Using the approach from the referenced repo
+RUN git clone https://github.com/unslothai/unsloth.git /tmp/unsloth && \
+    cd /tmp/unsloth && \
+    pip install -e . && \
+    cd / && \
+    rm -rf /tmp/unsloth/.git
+# Install Axolotl
+RUN git clone https://github.com/axolotl-ai-cloud/axolotl /tmp/axolotl && \
+    cd /tmp/axolotl && \
+    pip install -e . && \
+    cd / && \
+    rm -rf /tmp/axolotl/.git
+# Install Open-WebUI dependencies
+RUN apt-get update && apt-get install -y \
+    nodejs \
+    npm \
+    && rm -rf /var/lib/apt/lists/*
+# Clone and setup Open-WebUI
+RUN git clone https://github.com/open-webui/open-webui.git /opt/open-webui && \
+    cd /opt/open-webui && \
+    npm install && \
+    npm run build
+# Create workspace directory
+RUN mkdir -p /workspace
 # Configure code-server
+RUN mkdir -p /root/.config/code-server
+RUN echo "bind-addr: 0.0.0.0:8080\nauth: none\ncert: false" > /root/.config/code-server/config.yaml
+# Configure SSH (optional but useful)
+RUN mkdir /var/run/sshd
+RUN echo 'root:runpod' | chpasswd
+RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
+# Create supervisor config to run all services
+RUN mkdir -p /etc/supervisor/conf.d
+RUN cat > /etc/supervisor/conf.d/services.conf << 'EOF'
+[supervisord]
+nodaemon=true
+[program:code-server]
+command=code-server --bind-addr 0.0.0.0:8080 --auth none
+autostart=true
+autorestart=true
+stderr_logfile=/var/log/code-server.err.log
+stdout_logfile=/var/log/code-server.out.log
+[program:ollama]
+command=ollama serve
+autostart=true
+autorestart=true
+environment=OLLAMA_HOST="0.0.0.0"
+stderr_logfile=/var/log/ollama.err.log
+stdout_logfile=/var/log/ollama.out.log
+[program:sshd]
+command=/usr/sbin/sshd -D
+autostart=true
+autorestart=true
+[program:open-webui]
+command=cd /opt/open-webui && npm start
+autostart=true
+autorestart=true
+environment=PORT="3000",OLLAMA_BASE_URL="http://localhost:11434"
+stderr_logfile=/var/log/open-webui.err.log
+stdout_logfile=/var/log/open-webui.out.log
+EOF
+# Create a startup script for vLLM (runs on demand)
+RUN cat > /usr/local/bin/start-vllm << 'EOF'
+#!/bin/bash
+python -m vllm.entrypoints.openai.api_server \
+    --model $1 \
+    --tensor-parallel-size ${CUDA_DEVICE_COUNT:-1} \
+    --gpu-memory-utilization 0.9 \
+    --max-model-len 32768 \
+    --host 0.0.0.0 \
+    --port 8000
+EOF
+RUN chmod +x /usr/local/bin/start-vllm
+# Create multi-GPU training helper script
+RUN cat > /usr/local/bin/train-multi-gpu << 'EOF'
+#!/bin/bash
+GPU_COUNT=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
+accelerate launch \
+    --num_processes $GPU_COUNT \
+    --num_machines 1 \
+    --mixed_precision bf16 \
+    --dynamo_backend no \
+    $@
+EOF
+RUN chmod +x /usr/local/bin/train-multi-gpu
+# Create accelerate config
+RUN cat > /workspace/accelerate_config.yaml << 'EOF'
+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: MULTI_GPU
+downcast_bf16: 'no'
+gpu_ids: all
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 8
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+EOF
+# Create setup script
+RUN cat > /workspace/setup.sh << 'EOF'
+#!/bin/bash
+echo "🚀 RunPod ML Stack Setup"
+echo "========================"
+# Check GPU availability
+echo -e "\n📊 GPU Status:"
+nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader,nounits | nl -v 0
+# Count GPUs
+GPU_COUNT=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
+echo -e "\n✅ Found $GPU_COUNT GPU(s)"
+# Update accelerate config with correct GPU count
+sed -i "s/num_processes: 8/num_processes: $GPU_COUNT/g" /workspace/accelerate_config.yaml
+# Pull a default model for Ollama if not exists
+if ! ollama list | grep -q "llama3.2"; then
+    echo -e "\n📥 Pulling default Ollama model (llama3.2)..."
+    ollama pull llama3.2
+fi
+echo -e "\n🎉 Setup complete! Services available at:"
+echo "   VSCode: http://localhost:8080"
+echo "   Ollama: http://localhost:11434"
+echo "   Open-WebUI: http://localhost:3000"
+echo "   vLLM: http://localhost:8000 (start with: start-vllm <model>)"
+EOF
+RUN chmod +x /workspace/setup.sh
+# Create example multi-GPU training script
+RUN cat > /workspace/example_multi_gpu_train.py << 'EOF'
+import torch
+from accelerate import Accelerator
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from datasets import load_dataset
+from torch.optim import AdamW
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+def main():
+    # Initialize accelerator
+    accelerator = Accelerator()
+    # Setup
+    model_name = "meta-llama/Llama-2-7b-hf"
+    batch_size = 4
+    gradient_accumulation_steps = 4
+    learning_rate = 2e-5
+    num_epochs = 3
+    # Print GPU info
+    if accelerator.is_main_process:
+        print(f"🚀 Training on {accelerator.num_processes} GPU(s)")
+        print(f"💾 Total batch size: {batch_size * accelerator.num_processes * gradient_accumulation_steps}")
+    # Load model and tokenizer
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.bfloat16,
+        use_cache=False,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    tokenizer.pad_token = tokenizer.eos_token
+    # Load dataset
+    dataset = load_dataset("imdb", split="train[:1000]")  # Small subset for demo
+    def tokenize_function(examples):
+        return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
+    tokenized_dataset = dataset.map(tokenize_function, batched=True)
+    tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask"])
+    # Create DataLoader
+    dataloader = DataLoader(tokenized_dataset, batch_size=batch_size, shuffle=True)
+    # Optimizer
+    optimizer = AdamW(model.parameters(), lr=learning_rate)
+    # Prepare for distributed training
+    model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)
+    # Training loop
+    model.train()
+    for epoch in range(num_epochs):
+        if accelerator.is_main_process:
+            print(f"\nEpoch {epoch + 1}/{num_epochs}")
+            progress_bar = tqdm(total=len(dataloader), desc="Training")
+        for step, batch in enumerate(dataloader):
+            with accelerator.accumulate(model):
+                outputs = model(
+                    input_ids=batch["input_ids"],
+                    attention_mask=batch["attention_mask"],
+                    labels=batch["input_ids"],
+                )
+                loss = outputs.loss
+                accelerator.backward(loss)
+                optimizer.step()
+                optimizer.zero_grad()
+            if accelerator.is_main_process:
+                progress_bar.update(1)
+                if step % 10 == 0:
+                    progress_bar.set_postfix({"loss": loss.item()})
+        if accelerator.is_main_process:
+            progress_bar.close()
+    # Save model
+    if accelerator.is_main_process:
+        model.save_pretrained("./trained_model")
+        print("✅ Training complete! Model saved to ./trained_model")
+if __name__ == "__main__":
+    main()
+EOF
+# Create a helpful README
+RUN cat > /workspace/README.md << 'EOF'
+# RunPod Multi-GPU ML Stack 🚀
+## Quick Start
+Run `/workspace/setup.sh` first to detect GPUs and pull models!
+## Services:
+- **VSCode**: http://localhost:8080
+- **Ollama API**: http://localhost:11434
+- **vLLM API**: http://localhost:8000 (start with: `start-vllm <model-name>`)
+- **Open-WebUI**: http://localhost:3000
+## Multi-GPU Commands:
+- Training: `train-multi-gpu your_script.py`
+- vLLM: `start-vllm meta-llama/Llama-2-7b-hf`
+- Test multi-GPU: `python example_multi_gpu_train.py`
+## RTX 5090 Support:
+This image includes patched Unsloth and Flash Attention 2 for RTX 5090 compatibility.
+## SSH Access:
+Default password is `runpod`. Change it with `passwd`.
+## Tips:
+- Check GPU status: `nvidia-smi`
+- Monitor GPUs: `nvtop`
+- List Ollama models: `ollama list`
+- Pull new models: `ollama pull <model>`
+EOF
+# Expose all necessary ports
+EXPOSE 22      # SSH
+EXPOSE 8080    # Code-server (VSCode)
+EXPOSE 11434   # Ollama API
+EXPOSE 8000    # vLLM API
+EXPOSE 3000    # Open-WebUI
+EXPOSE 6006    # TensorBoard
+EXPOSE 8888    # Jupyter (if needed)
+EXPOSE 5000    # Flask/FastAPI apps
+EXPOSE 7860    # Gradio apps
+EXPOSE 29500   # Distributed training master port
+# Set working directory
+WORKDIR /workspace
+# Run setup on first start and then supervisor
+CMD bash -c "/workspace/setup.sh && /usr/bin/supervisord -c /etc/supervisor/supervisord.conf"