Spaces:

miike-ai
/

vscode-python312

Running

App Files Files Community

miike-ai commited on Jun 28

Commit

a9ac1e7

verified ·

1 Parent(s): f3df9f4

Update Dockerfile

Browse files

Files changed (1) hide show

Dockerfile +155 -241

Dockerfile CHANGED Viewed

@@ -1,16 +1,14 @@
-FROM runpod/pytorch:2.8.0-py3.11-cuda12.8.1-cudnn-devel-ubuntu22.04
-# Set environment variables
 ENV DEBIAN_FRONTEND=noninteractive
 ENV PYTHONUNBUFFERED=1
 ENV HF_HUB_ENABLE_HF_TRANSFER=1
-ENV CUDA_VISIBLE_DEVICES=all
-ENV NVIDIA_VISIBLE_DEVICES=all
-ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
-ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
-ENV PATH=/usr/local/cuda/bin:$PATH
-# Install system dependencies
 RUN apt-get update && apt-get install -y \
     wget \
     curl \
@@ -18,7 +16,6 @@ RUN apt-get update && apt-get install -y \
     vim \
     tmux \
     htop \
-    nvtop \
     build-essential \
     software-properties-common \
     ca-certificates \
@@ -28,21 +25,50 @@ RUN apt-get update && apt-get install -y \
     openssh-server \
     nginx \
     supervisor \
     && rm -rf /var/lib/apt/lists/*
 # Install code-server (VSCode in browser)
 RUN curl -fsSL https://code-server.dev/install.sh | sh
 # Install Ollama
 RUN curl -fsSL https://ollama.com/install.sh | sh
-# Upgrade pip and install base Python packages
-RUN pip install --upgrade pip setuptools wheel
-# Install hf_transfer first for faster downloads
 RUN pip install hf_transfer
-# Install critical ML infrastructure
 RUN pip install \
     accelerate \
     transformers \
@@ -55,34 +81,38 @@ RUN pip install \
     scipy \
     einops \
     wandb \
-    tensorboard
-# Install vLLM with CUDA 12.8 support
-RUN pip install vllm --extra-index-url https://download.pytorch.org/whl/cu128
-# Install Flash Attention 2 (critical for 5090)
-RUN pip install flash-attn --no-build-isolation
-# Install Unsloth with 5090 patches
-# Using the approach from the referenced repo
 RUN git clone https://github.com/unslothai/unsloth.git /tmp/unsloth && \
     cd /tmp/unsloth && \
     pip install -e . && \
     cd / && \
     rm -rf /tmp/unsloth/.git
-# Install Axolotl
 RUN git clone https://github.com/axolotl-ai-cloud/axolotl /tmp/axolotl && \
     cd /tmp/axolotl && \
     pip install -e . && \
     cd / && \
     rm -rf /tmp/axolotl/.git
-# Install Open-WebUI dependencies
-RUN apt-get update && apt-get install -y \
-    nodejs \
-    npm \
-    && rm -rf /var/lib/apt/lists/*
 # Clone and setup Open-WebUI
 RUN git clone https://github.com/open-webui/open-webui.git /opt/open-webui && \
@@ -90,26 +120,29 @@ RUN git clone https://github.com/open-webui/open-webui.git /opt/open-webui && \
     npm install && \
     npm run build
-# Create workspace directory
-RUN mkdir -p /workspace
-# Configure code-server
-RUN mkdir -p /root/.config/code-server
-RUN echo "bind-addr: 0.0.0.0:8080\nauth: none\ncert: false" > /root/.config/code-server/config.yaml
-# Configure SSH (optional but useful)
-RUN mkdir /var/run/sshd
-RUN echo 'root:runpod' | chpasswd
-RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
-# Create supervisor config to run all services
 RUN mkdir -p /etc/supervisor/conf.d
 RUN cat > /etc/supervisor/conf.d/services.conf << 'EOF'
 [supervisord]
 nodaemon=true
 [program:code-server]
-command=code-server --bind-addr 0.0.0.0:8080 --auth none
 autostart=true
 autorestart=true
 stderr_logfile=/var/log/code-server.err.log
@@ -119,15 +152,10 @@ stdout_logfile=/var/log/code-server.out.log
 command=ollama serve
 autostart=true
 autorestart=true
-environment=OLLAMA_HOST="0.0.0.0"
 stderr_logfile=/var/log/ollama.err.log
 stdout_logfile=/var/log/ollama.out.log
-[program:sshd]
-command=/usr/sbin/sshd -D
-autostart=true
-autorestart=true
 [program:open-webui]
 command=cd /opt/open-webui && npm start
 autostart=true
@@ -135,219 +163,105 @@ autorestart=true
 environment=PORT="3000",OLLAMA_BASE_URL="http://localhost:11434"
 stderr_logfile=/var/log/open-webui.err.log
 stdout_logfile=/var/log/open-webui.out.log
 EOF
-# Create a startup script for vLLM (runs on demand)
-RUN cat > /usr/local/bin/start-vllm << 'EOF'
-#!/bin/bash
-python -m vllm.entrypoints.openai.api_server \
-    --model $1 \
-    --tensor-parallel-size ${CUDA_DEVICE_COUNT:-1} \
-    --gpu-memory-utilization 0.9 \
-    --max-model-len 32768 \
-    --host 0.0.0.0 \
-    --port 8000
-EOF
-RUN chmod +x /usr/local/bin/start-vllm
-# Create multi-GPU training helper script
-RUN cat > /usr/local/bin/train-multi-gpu << 'EOF'
-#!/bin/bash
-GPU_COUNT=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
-accelerate launch \
-    --num_processes $GPU_COUNT \
-    --num_machines 1 \
-    --mixed_precision bf16 \
-    --dynamo_backend no \
-    $@
-EOF
-RUN chmod +x /usr/local/bin/train-multi-gpu
-# Create accelerate config
-RUN cat > /workspace/accelerate_config.yaml << 'EOF'
-compute_environment: LOCAL_MACHINE
-debug: false
-distributed_type: MULTI_GPU
-downcast_bf16: 'no'
-gpu_ids: all
-machine_rank: 0
-main_training_function: main
-mixed_precision: bf16
-num_machines: 1
-num_processes: 8
-rdzv_backend: static
-same_network: true
-tpu_env: []
-tpu_use_cluster: false
-tpu_use_sudo: false
-use_cpu: false
-EOF
-# Create setup script
-RUN cat > /workspace/setup.sh << 'EOF'
-#!/bin/bash
-echo "🚀 RunPod ML Stack Setup"
-echo "========================"
-# Check GPU availability
-echo -e "\n📊 GPU Status:"
-nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader,nounits | nl -v 0
-# Count GPUs
-GPU_COUNT=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
-echo -e "\n✅ Found $GPU_COUNT GPU(s)"
-# Update accelerate config with correct GPU count
-sed -i "s/num_processes: 8/num_processes: $GPU_COUNT/g" /workspace/accelerate_config.yaml
-# Pull a default model for Ollama if not exists
-if ! ollama list | grep -q "llama3.2"; then
-    echo -e "\n📥 Pulling default Ollama model (llama3.2)..."
-    ollama pull llama3.2
-fi
-echo -e "\n🎉 Setup complete! Services available at:"
-echo "   VSCode: http://localhost:8080"
-echo "   Ollama: http://localhost:11434"
-echo "   Open-WebUI: http://localhost:3000"
-echo "   vLLM: http://localhost:8000 (start with: start-vllm <model>)"
-EOF
-RUN chmod +x /workspace/setup.sh
-# Create example multi-GPU training script
-RUN cat > /workspace/example_multi_gpu_train.py << 'EOF'
-import torch
-from accelerate import Accelerator
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from datasets import load_dataset
-from torch.optim import AdamW
-from torch.utils.data import DataLoader
-from tqdm import tqdm
-def main():
-    # Initialize accelerator
-    accelerator = Accelerator()
-    # Setup
-    model_name = "meta-llama/Llama-2-7b-hf"
-    batch_size = 4
-    gradient_accumulation_steps = 4
-    learning_rate = 2e-5
-    num_epochs = 3
-    # Print GPU info
-    if accelerator.is_main_process:
-        print(f"🚀 Training on {accelerator.num_processes} GPU(s)")
-        print(f"💾 Total batch size: {batch_size * accelerator.num_processes * gradient_accumulation_steps}")
-    # Load model and tokenizer
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        torch_dtype=torch.bfloat16,
-        use_cache=False,
-    )
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    tokenizer.pad_token = tokenizer.eos_token
-    # Load dataset
-    dataset = load_dataset("imdb", split="train[:1000]")  # Small subset for demo
-    def tokenize_function(examples):
-        return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
-    tokenized_dataset = dataset.map(tokenize_function, batched=True)
-    tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask"])
-    # Create DataLoader
-    dataloader = DataLoader(tokenized_dataset, batch_size=batch_size, shuffle=True)
-    # Optimizer
-    optimizer = AdamW(model.parameters(), lr=learning_rate)
-    # Prepare for distributed training
-    model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)
-    # Training loop
-    model.train()
-    for epoch in range(num_epochs):
-        if accelerator.is_main_process:
-            print(f"\nEpoch {epoch + 1}/{num_epochs}")
-            progress_bar = tqdm(total=len(dataloader), desc="Training")
-        for step, batch in enumerate(dataloader):
-            with accelerator.accumulate(model):
-                outputs = model(
-                    input_ids=batch["input_ids"],
-                    attention_mask=batch["attention_mask"],
-                    labels=batch["input_ids"],
-                )
-                loss = outputs.loss
-                accelerator.backward(loss)
-                optimizer.step()
-                optimizer.zero_grad()
-            if accelerator.is_main_process:
-                progress_bar.update(1)
-                if step % 10 == 0:
-                    progress_bar.set_postfix({"loss": loss.item()})
-        if accelerator.is_main_process:
-            progress_bar.close()
-    # Save model
-    if accelerator.is_main_process:
-        model.save_pretrained("./trained_model")
-        print("✅ Training complete! Model saved to ./trained_model")
 if __name__ == "__main__":
-    main()
 EOF
-# Create a helpful README
-RUN cat > /workspace/README.md << 'EOF'
-# RunPod Multi-GPU ML Stack 🚀
-## Quick Start
-Run `/workspace/setup.sh` first to detect GPUs and pull models!
-## Services:
-- **VSCode**: http://localhost:8080
-- **Ollama API**: http://localhost:11434
-- **vLLM API**: http://localhost:8000 (start with: `start-vllm <model-name>`)
-- **Open-WebUI**: http://localhost:3000
-## Multi-GPU Commands:
-- Training: `train-multi-gpu your_script.py`
-- vLLM: `start-vllm meta-llama/Llama-2-7b-hf`
-- Test multi-GPU: `python example_multi_gpu_train.py`
-## RTX 5090 Support:
-This image includes patched Unsloth and Flash Attention 2 for RTX 5090 compatibility.
-## SSH Access:
-Default password is `runpod`. Change it with `passwd`.
-## Tips:
-- Check GPU status: `nvidia-smi`
-- Monitor GPUs: `nvtop`
-- List Ollama models: `ollama list`
-- Pull new models: `ollama pull <model>`
 EOF
-# Expose all necessary ports
 EXPOSE 22      # SSH
-EXPOSE 8080    # Code-server (VSCode)
-EXPOSE 11434   # Ollama API
-EXPOSE 8000    # vLLM API
 EXPOSE 3000    # Open-WebUI
-EXPOSE 6006    # TensorBoard
-EXPOSE 8888    # Jupyter (if needed)
-EXPOSE 5000    # Flask/FastAPI apps
-EXPOSE 7860    # Gradio apps
-EXPOSE 29500   # Distributed training master port
-# Set working directory
-WORKDIR /workspace
-# Run setup on first start and then supervisor
-CMD bash -c "/workspace/setup.sh && /usr/bin/supervisord -c /etc/supervisor/supervisord.conf"

+FROM ubuntu:22.04
+# Prevent interactive prompts
 ENV DEBIAN_FRONTEND=noninteractive
 ENV PYTHONUNBUFFERED=1
 ENV HF_HUB_ENABLE_HF_TRANSFER=1
+ENV CUDA_HOME=/usr/local/cuda
+ENV PATH=/usr/local/cuda/bin:${PATH}
+ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH}
+# Install base system dependencies
 RUN apt-get update && apt-get install -y \
     wget \
     curl \
     vim \
     tmux \
     htop \
     build-essential \
     software-properties-common \
     ca-certificates \
     openssh-server \
     nginx \
     supervisor \
+    python3.11 \
+    python3.11-dev \
+    python3-pip \
     && rm -rf /var/lib/apt/lists/*
+# Add NVIDIA package repositories
+RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
+    dpkg -i cuda-keyring_1.1-1_all.deb && \
+    apt-get update
+# Install CUDA 12.8
+RUN apt-get install -y cuda-toolkit-12-8 && \
+    rm -rf /var/lib/apt/lists/*
+# Install cuDNN
+RUN apt-get update && apt-get install -y \
+    libcudnn9-cuda-12 \
+    libcudnn9-dev-cuda-12 \
+    && rm -rf /var/lib/apt/lists/*
+# Set Python 3.11 as default
+RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 && \
+    update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
+# Upgrade pip
+RUN python -m pip install --upgrade pip setuptools wheel
+# Install PyTorch with CUDA 12.8 support
+RUN pip install torch==2.5.1+cu128 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
+# Create non-root user for HF Spaces compatibility
+RUN useradd -m -u 1000 user && \
+    echo "user ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
 # Install code-server (VSCode in browser)
 RUN curl -fsSL https://code-server.dev/install.sh | sh
 # Install Ollama
 RUN curl -fsSL https://ollama.com/install.sh | sh
+# Install hf_transfer first
 RUN pip install hf_transfer
+# Install core ML packages
 RUN pip install \
     accelerate \
     transformers \
     scipy \
     einops \
     wandb \
+    tensorboard \
+    gradio \
+    streamlit
+# Install vLLM
+RUN pip install vllm
+# Install Flash Attention 2
+RUN pip install ninja packaging && \
+    pip install flash-attn --no-build-isolation
+# Install Triton for 5090 support
+RUN pip install triton
+# Clone and install Unsloth with patches
 RUN git clone https://github.com/unslothai/unsloth.git /tmp/unsloth && \
     cd /tmp/unsloth && \
     pip install -e . && \
     cd / && \
     rm -rf /tmp/unsloth/.git
+# Clone and install Axolotl
 RUN git clone https://github.com/axolotl-ai-cloud/axolotl /tmp/axolotl && \
     cd /tmp/axolotl && \
     pip install -e . && \
     cd / && \
     rm -rf /tmp/axolotl/.git
+# Install Node.js for Open-WebUI
+RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
+    apt-get install -y nodejs && \
+    rm -rf /var/lib/apt/lists/*
 # Clone and setup Open-WebUI
 RUN git clone https://github.com/open-webui/open-webui.git /opt/open-webui && \
     npm install && \
     npm run build
+# Create directories with proper permissions
+RUN mkdir -p /home/user/app /home/user/.cache /home/user/.config && \
+    chown -R user:user /home/user
+# Configure code-server for user
+RUN mkdir -p /home/user/.config/code-server && \
+    echo "bind-addr: 0.0.0.0:8080\nauth: none\ncert: false" > /home/user/.config/code-server/config.yaml && \
+    chown -R user:user /home/user/.config
+# Setup SSH
+RUN mkdir /var/run/sshd && \
+    echo 'user:spaces' | chpasswd && \
+    sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin no/' /etc/ssh/sshd_config
+# Create supervisor config
 RUN mkdir -p /etc/supervisor/conf.d
 RUN cat > /etc/supervisor/conf.d/services.conf << 'EOF'
 [supervisord]
 nodaemon=true
+user=root
 [program:code-server]
+command=sudo -u user code-server --bind-addr 0.0.0.0:8080 --auth none
 autostart=true
 autorestart=true
 stderr_logfile=/var/log/code-server.err.log
 command=ollama serve
 autostart=true
 autorestart=true
+environment=OLLAMA_HOST="0.0.0.0",HOME="/home/user"
 stderr_logfile=/var/log/ollama.err.log
 stdout_logfile=/var/log/ollama.out.log
 [program:open-webui]
 command=cd /opt/open-webui && npm start
 autostart=true
 environment=PORT="3000",OLLAMA_BASE_URL="http://localhost:11434"
 stderr_logfile=/var/log/open-webui.err.log
 stdout_logfile=/var/log/open-webui.out.log
+user=user
 EOF
+# Create Gradio app for HF Spaces
+RUN cat > /home/user/app/app.py << 'EOF'
+import gradio as gr
+import subprocess
+import os
+def get_services_status():
+    services = {
+        "VSCode": "http://localhost:8080",
+        "Ollama API": "http://localhost:11434",
+        "Open-WebUI": "http://localhost:3000",
+        "vLLM": "http://localhost:8000"
+    }
+    status = "# 🚀 ML Stack Services Status\n\n"
+    for service, url in services.items():
+        status += f"- **{service}**: {url}\n"
+    # Get GPU info
+    try:
+        gpu_info = subprocess.check_output(['nvidia-smi', '--query-gpu=name,memory.total', '--format=csv,noheader'], text=True)
+        status += f"\n## 🎮 GPU Status\n```\n{gpu_info}```"
+    except:
+        status += "\n## ⚠️ No GPUs detected"
+    return status
+def launch_vllm(model_name):
+    try:
+        cmd = f"python -m vllm.entrypoints.openai.api_server --model {model_name} --host 0.0.0.0 --port 8000"
+        subprocess.Popen(cmd, shell=True)
+        return f"✅ Launching vLLM with model: {model_name}"
+    except Exception as e:
+        return f"❌ Error: {str(e)}"
+def pull_ollama_model(model_name):
+    try:
+        result = subprocess.run(['ollama', 'pull', model_name], capture_output=True, text=True)
+        return f"✅ {result.stdout}\n{result.stderr}"
+    except Exception as e:
+        return f"❌ Error: {str(e)}"
+# Create Gradio interface
+with gr.Blocks(title="ML Stack Control Panel") as demo:
+    gr.Markdown("# 🎮 RunPod ML Stack Control Panel")
+    with gr.Tab("Status"):
+        status_btn = gr.Button("🔄 Refresh Status")
+        status_output = gr.Markdown()
+        status_btn.click(get_services_status, outputs=status_output)
+    with gr.Tab("vLLM"):
+        model_input = gr.Textbox(label="Model Name", value="meta-llama/Llama-2-7b-hf")
+        vllm_btn = gr.Button("🚀 Launch vLLM")
+        vllm_output = gr.Textbox(label="Output")
+        vllm_btn.click(launch_vllm, inputs=model_input, outputs=vllm_output)
+    with gr.Tab("Ollama"):
+        ollama_model = gr.Textbox(label="Model Name", value="llama3.2")
+        ollama_btn = gr.Button("📥 Pull Model")
+        ollama_output = gr.Textbox(label="Output")
+        ollama_btn.click(pull_ollama_model, inputs=ollama_model, outputs=ollama_output)
+    # Load initial status
+    demo.load(get_services_status, outputs=status_output)
 if __name__ == "__main__":
+    # Start supervisor in background
+    subprocess.Popen(["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"])
+    # Launch Gradio
+    demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
 EOF
+# Create startup script
+RUN cat > /home/user/app/start.sh << 'EOF'
+#!/bin/bash
+cd /home/user/app
+python app.py
 EOF
+RUN chmod +x /home/user/app/start.sh
+# Fix permissions
+RUN chown -R user:user /home/user /opt/open-webui
+# Expose ports
 EXPOSE 22      # SSH
+EXPOSE 7860    # Gradio (HF Spaces default)
+EXPOSE 8080    # Code-server
+EXPOSE 11434   # Ollama
+EXPOSE 8000    # vLLM
 EXPOSE 3000    # Open-WebUI
+# Switch to user
+USER user
+WORKDIR /home/user/app
+# Set the entrypoint for HF Spaces
+CMD ["python", "app.py"]