miike-ai commited on
Commit
f3df9f4
Β·
verified Β·
1 Parent(s): ca4136c

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +335 -100
Dockerfile CHANGED
@@ -1,118 +1,353 @@
1
- FROM nvidia/cuda:12.8.0-devel-ubuntu22.04
2
 
3
  # Set environment variables
4
  ENV DEBIAN_FRONTEND=noninteractive
5
- ENV HOME=/root
6
- ENV CUDA_HOME=/usr/local/cuda
7
- ENV PATH=${CUDA_HOME}/bin:${PATH}
8
- ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
9
- ENV TORCH_CUDA_ARCH_LIST="12.0"
10
-
11
- # Install system dependencies and build tools
12
- RUN apt-get update && \
13
- apt-get install -y \
14
- curl wget gpg apt-transport-https git software-properties-common \
15
- build-essential cmake ninja-build \
16
- libopenblas-dev libomp-dev \
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  && rm -rf /var/lib/apt/lists/*
18
 
19
- # Install Python 3.12 (as recommended)
20
- RUN add-apt-repository ppa:deadsnakes/ppa && \
21
- apt-get update && \
22
- apt-get install -y python3.11 python3.11-venv python3.11-dev && \
23
- update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 && \
24
- update-alternatives --set python3 /usr/bin/python3.11 && \
25
- curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
26
 
27
- # Install uv for faster dependency management
28
- RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
29
- echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc
30
 
31
- # Create workspace
32
- RUN mkdir -p /workspace
33
- WORKDIR /workspace
34
 
35
- # CRITICAL: Install in the EXACT order specified in the README
36
- # Step 1: Create venv
37
- RUN python3 -m venv /opt/unsloth-env
38
- ENV PATH="/opt/unsloth-env/bin:$PATH"
39
 
40
- # Step 2: Install vllm with cu128 (MUST be first)
41
- RUN /opt/unsloth-env/bin/pip install --upgrade pip setuptools wheel && \
42
- /opt/unsloth-env/bin/pip install -U vllm --extra-index-url https://wheels.vllm.ai/nightly
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- # Step 3: Install unsloth dependencies
45
- RUN /opt/unsloth-env/bin/pip install unsloth unsloth_zoo bitsandbytes
46
 
47
- # Step 4: Build xformers from source (no Blackwell wheels exist yet)
48
- RUN git clone --depth=1 https://github.com/facebookresearch/xformers --recursive /tmp/xformers && \
49
- cd /tmp/xformers && \
50
- /opt/unsloth-env/bin/pip uninstall -y xformers && \
51
- /opt/unsloth-env/bin/python setup.py install && \
52
- rm -rf /tmp/xformers
53
 
54
- # Step 5: Update triton to >=3.3.1 for Blackwell
55
- RUN /opt/unsloth-env/bin/pip install -U "triton>=3.3.1"
 
 
 
 
 
56
 
57
- # Step 6: Pin transformers to avoid gradient checkpointing bug
58
- RUN /opt/unsloth-env/bin/pip install -U "transformers==4.52.4"
 
 
 
 
59
 
60
- # Step 7: Might need to downgrade numpy
61
- RUN /opt/unsloth-env/bin/pip install "numpy<=2.2"
 
 
 
62
 
63
- # Now install your dev tools (code-server, Node.js, etc)
64
- RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \
65
- apt-get install -y nodejs && \
66
- curl -fsSL https://code-server.dev/install.sh | sh && \
67
- npm install -g @anthropic-ai/claude-code @anthropic-ai/dxt && \
68
- apt-get clean && \
69
- rm -rf /var/lib/apt/lists/*
70
 
71
- # Install Ollama
72
- RUN curl -fsSL https://ollama.com/install.sh | sh || true
73
 
74
  # Configure code-server
75
- RUN mkdir -p /root/.config/code-server /root/.ollama && \
76
- echo "bind-addr: 0.0.0.0:8443\nauth: none\ncert: false" > /root/.config/code-server/config.yaml
77
-
78
- # Install VS Code extensions
79
- RUN code-server --install-extension ms-python.python && \
80
- code-server --install-extension ritwickdey.LiveServer && \
81
- code-server --install-extension ms-toolsai.jupyter
82
-
83
- # Create startup script with proper env activation
84
- RUN echo '#!/bin/bash\n\
85
- # Activate the unsloth environment\n\
86
- source /opt/unsloth-env/bin/activate\n\
87
- \n\
88
- # Start Ollama in the background\n\
89
- /usr/local/bin/ollama serve &\n\
90
- \n\
91
- # Give Ollama a moment to start\n\
92
- sleep 2\n\
93
- \n\
94
- # Start code-server with the activated environment\n\
95
- exec code-server --disable-telemetry --bind-addr 0.0.0.0:8443 /workspace\n\
96
- ' > /start.sh && \
97
- chmod +x /start.sh
98
-
99
- # Expose ports
100
- EXPOSE 8443 11434
101
-
102
- # Copy requirements.txt to workspace (will fail build if not present)
103
- COPY requirements.txt /workspace/requirements.txt
104
-
105
- # Create a script to install from requirements.txt with proper environment
106
- RUN echo '#!/bin/bash\n\
107
- echo "Installing from requirements.txt..."\n\
108
- source /opt/unsloth-env/bin/activate\n\
109
- pip install -r /workspace/requirements.txt\n\
110
- echo "Installation complete!"\n\
111
- ' > /workspace/install-requirements.sh && \
112
- chmod +x /workspace/install-requirements.sh
113
-
114
- # Optional: Install requirements.txt if you want it done at build time
115
- # Uncomment the next line if you want automatic installation
116
- RUN /opt/unsloth-env/bin/pip install -r /workspace/requirements.txt
117
-
118
- CMD ["/start.sh"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM runpod/pytorch:2.8.0-py3.11-cuda12.8.1-cudnn-devel-ubuntu22.04
2
 
3
  # Set environment variables
4
  ENV DEBIAN_FRONTEND=noninteractive
5
+ ENV PYTHONUNBUFFERED=1
6
+ ENV HF_HUB_ENABLE_HF_TRANSFER=1
7
+ ENV CUDA_VISIBLE_DEVICES=all
8
+ ENV NVIDIA_VISIBLE_DEVICES=all
9
+ ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
10
+ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
11
+ ENV PATH=/usr/local/cuda/bin:$PATH
12
+
13
+ # Install system dependencies
14
+ RUN apt-get update && apt-get install -y \
15
+ wget \
16
+ curl \
17
+ git \
18
+ vim \
19
+ tmux \
20
+ htop \
21
+ nvtop \
22
+ build-essential \
23
+ software-properties-common \
24
+ ca-certificates \
25
+ gnupg \
26
+ lsb-release \
27
+ sudo \
28
+ openssh-server \
29
+ nginx \
30
+ supervisor \
31
  && rm -rf /var/lib/apt/lists/*
32
 
33
+ # Install code-server (VSCode in browser)
34
+ RUN curl -fsSL https://code-server.dev/install.sh | sh
 
 
 
 
 
35
 
36
+ # Install Ollama
37
+ RUN curl -fsSL https://ollama.com/install.sh | sh
 
38
 
39
+ # Upgrade pip and install base Python packages
40
+ RUN pip install --upgrade pip setuptools wheel
 
41
 
42
+ # Install hf_transfer first for faster downloads
43
+ RUN pip install hf_transfer
 
 
44
 
45
+ # Install critical ML infrastructure
46
+ RUN pip install \
47
+ accelerate \
48
+ transformers \
49
+ datasets \
50
+ peft \
51
+ bitsandbytes \
52
+ safetensors \
53
+ sentencepiece \
54
+ protobuf \
55
+ scipy \
56
+ einops \
57
+ wandb \
58
+ tensorboard
59
 
60
+ # Install vLLM with CUDA 12.8 support
61
+ RUN pip install vllm --extra-index-url https://download.pytorch.org/whl/cu128
62
 
63
+ # Install Flash Attention 2 (critical for 5090)
64
+ RUN pip install flash-attn --no-build-isolation
 
 
 
 
65
 
66
+ # Install Unsloth with 5090 patches
67
+ # Using the approach from the referenced repo
68
+ RUN git clone https://github.com/unslothai/unsloth.git /tmp/unsloth && \
69
+ cd /tmp/unsloth && \
70
+ pip install -e . && \
71
+ cd / && \
72
+ rm -rf /tmp/unsloth/.git
73
 
74
+ # Install Axolotl
75
+ RUN git clone https://github.com/axolotl-ai-cloud/axolotl /tmp/axolotl && \
76
+ cd /tmp/axolotl && \
77
+ pip install -e . && \
78
+ cd / && \
79
+ rm -rf /tmp/axolotl/.git
80
 
81
+ # Install Open-WebUI dependencies
82
+ RUN apt-get update && apt-get install -y \
83
+ nodejs \
84
+ npm \
85
+ && rm -rf /var/lib/apt/lists/*
86
 
87
+ # Clone and setup Open-WebUI
88
+ RUN git clone https://github.com/open-webui/open-webui.git /opt/open-webui && \
89
+ cd /opt/open-webui && \
90
+ npm install && \
91
+ npm run build
 
 
92
 
93
+ # Create workspace directory
94
+ RUN mkdir -p /workspace
95
 
96
  # Configure code-server
97
+ RUN mkdir -p /root/.config/code-server
98
+ RUN echo "bind-addr: 0.0.0.0:8080\nauth: none\ncert: false" > /root/.config/code-server/config.yaml
99
+
100
+ # Configure SSH (optional but useful)
101
+ RUN mkdir /var/run/sshd
102
+ RUN echo 'root:runpod' | chpasswd
103
+ RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
104
+
105
+ # Create supervisor config to run all services
106
+ RUN mkdir -p /etc/supervisor/conf.d
107
+ RUN cat > /etc/supervisor/conf.d/services.conf << 'EOF'
108
+ [supervisord]
109
+ nodaemon=true
110
+
111
+ [program:code-server]
112
+ command=code-server --bind-addr 0.0.0.0:8080 --auth none
113
+ autostart=true
114
+ autorestart=true
115
+ stderr_logfile=/var/log/code-server.err.log
116
+ stdout_logfile=/var/log/code-server.out.log
117
+
118
+ [program:ollama]
119
+ command=ollama serve
120
+ autostart=true
121
+ autorestart=true
122
+ environment=OLLAMA_HOST="0.0.0.0"
123
+ stderr_logfile=/var/log/ollama.err.log
124
+ stdout_logfile=/var/log/ollama.out.log
125
+
126
+ [program:sshd]
127
+ command=/usr/sbin/sshd -D
128
+ autostart=true
129
+ autorestart=true
130
+
131
+ [program:open-webui]
132
+ command=cd /opt/open-webui && npm start
133
+ autostart=true
134
+ autorestart=true
135
+ environment=PORT="3000",OLLAMA_BASE_URL="http://localhost:11434"
136
+ stderr_logfile=/var/log/open-webui.err.log
137
+ stdout_logfile=/var/log/open-webui.out.log
138
+ EOF
139
+
140
+ # Create a startup script for vLLM (runs on demand)
141
+ RUN cat > /usr/local/bin/start-vllm << 'EOF'
142
+ #!/bin/bash
143
+ python -m vllm.entrypoints.openai.api_server \
144
+ --model $1 \
145
+ --tensor-parallel-size ${CUDA_DEVICE_COUNT:-1} \
146
+ --gpu-memory-utilization 0.9 \
147
+ --max-model-len 32768 \
148
+ --host 0.0.0.0 \
149
+ --port 8000
150
+ EOF
151
+ RUN chmod +x /usr/local/bin/start-vllm
152
+
153
+ # Create multi-GPU training helper script
154
+ RUN cat > /usr/local/bin/train-multi-gpu << 'EOF'
155
+ #!/bin/bash
156
+ GPU_COUNT=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
157
+ accelerate launch \
158
+ --num_processes $GPU_COUNT \
159
+ --num_machines 1 \
160
+ --mixed_precision bf16 \
161
+ --dynamo_backend no \
162
+ $@
163
+ EOF
164
+ RUN chmod +x /usr/local/bin/train-multi-gpu
165
+
166
+ # Create accelerate config
167
+ RUN cat > /workspace/accelerate_config.yaml << 'EOF'
168
+ compute_environment: LOCAL_MACHINE
169
+ debug: false
170
+ distributed_type: MULTI_GPU
171
+ downcast_bf16: 'no'
172
+ gpu_ids: all
173
+ machine_rank: 0
174
+ main_training_function: main
175
+ mixed_precision: bf16
176
+ num_machines: 1
177
+ num_processes: 8
178
+ rdzv_backend: static
179
+ same_network: true
180
+ tpu_env: []
181
+ tpu_use_cluster: false
182
+ tpu_use_sudo: false
183
+ use_cpu: false
184
+ EOF
185
+
186
+ # Create setup script
187
+ RUN cat > /workspace/setup.sh << 'EOF'
188
+ #!/bin/bash
189
+ echo "πŸš€ RunPod ML Stack Setup"
190
+ echo "========================"
191
+
192
+ # Check GPU availability
193
+ echo -e "\nπŸ“Š GPU Status:"
194
+ nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader,nounits | nl -v 0
195
+
196
+ # Count GPUs
197
+ GPU_COUNT=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
198
+ echo -e "\nβœ… Found $GPU_COUNT GPU(s)"
199
+
200
+ # Update accelerate config with correct GPU count
201
+ sed -i "s/num_processes: 8/num_processes: $GPU_COUNT/g" /workspace/accelerate_config.yaml
202
+
203
+ # Pull a default model for Ollama if not exists
204
+ if ! ollama list | grep -q "llama3.2"; then
205
+ echo -e "\nπŸ“₯ Pulling default Ollama model (llama3.2)..."
206
+ ollama pull llama3.2
207
+ fi
208
+
209
+ echo -e "\nπŸŽ‰ Setup complete! Services available at:"
210
+ echo " VSCode: http://localhost:8080"
211
+ echo " Ollama: http://localhost:11434"
212
+ echo " Open-WebUI: http://localhost:3000"
213
+ echo " vLLM: http://localhost:8000 (start with: start-vllm <model>)"
214
+ EOF
215
+ RUN chmod +x /workspace/setup.sh
216
+
217
+ # Create example multi-GPU training script
218
+ RUN cat > /workspace/example_multi_gpu_train.py << 'EOF'
219
+ import torch
220
+ from accelerate import Accelerator
221
+ from transformers import AutoModelForCausalLM, AutoTokenizer
222
+ from datasets import load_dataset
223
+ from torch.optim import AdamW
224
+ from torch.utils.data import DataLoader
225
+ from tqdm import tqdm
226
+
227
+ def main():
228
+ # Initialize accelerator
229
+ accelerator = Accelerator()
230
+
231
+ # Setup
232
+ model_name = "meta-llama/Llama-2-7b-hf"
233
+ batch_size = 4
234
+ gradient_accumulation_steps = 4
235
+ learning_rate = 2e-5
236
+ num_epochs = 3
237
+
238
+ # Print GPU info
239
+ if accelerator.is_main_process:
240
+ print(f"πŸš€ Training on {accelerator.num_processes} GPU(s)")
241
+ print(f"πŸ’Ύ Total batch size: {batch_size * accelerator.num_processes * gradient_accumulation_steps}")
242
+
243
+ # Load model and tokenizer
244
+ model = AutoModelForCausalLM.from_pretrained(
245
+ model_name,
246
+ torch_dtype=torch.bfloat16,
247
+ use_cache=False,
248
+ )
249
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
250
+ tokenizer.pad_token = tokenizer.eos_token
251
+
252
+ # Load dataset
253
+ dataset = load_dataset("imdb", split="train[:1000]") # Small subset for demo
254
+
255
+ def tokenize_function(examples):
256
+ return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
257
+
258
+ tokenized_dataset = dataset.map(tokenize_function, batched=True)
259
+ tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask"])
260
+
261
+ # Create DataLoader
262
+ dataloader = DataLoader(tokenized_dataset, batch_size=batch_size, shuffle=True)
263
+
264
+ # Optimizer
265
+ optimizer = AdamW(model.parameters(), lr=learning_rate)
266
+
267
+ # Prepare for distributed training
268
+ model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)
269
+
270
+ # Training loop
271
+ model.train()
272
+ for epoch in range(num_epochs):
273
+ if accelerator.is_main_process:
274
+ print(f"\nEpoch {epoch + 1}/{num_epochs}")
275
+ progress_bar = tqdm(total=len(dataloader), desc="Training")
276
+
277
+ for step, batch in enumerate(dataloader):
278
+ with accelerator.accumulate(model):
279
+ outputs = model(
280
+ input_ids=batch["input_ids"],
281
+ attention_mask=batch["attention_mask"],
282
+ labels=batch["input_ids"],
283
+ )
284
+ loss = outputs.loss
285
+ accelerator.backward(loss)
286
+ optimizer.step()
287
+ optimizer.zero_grad()
288
+
289
+ if accelerator.is_main_process:
290
+ progress_bar.update(1)
291
+ if step % 10 == 0:
292
+ progress_bar.set_postfix({"loss": loss.item()})
293
+
294
+ if accelerator.is_main_process:
295
+ progress_bar.close()
296
+
297
+ # Save model
298
+ if accelerator.is_main_process:
299
+ model.save_pretrained("./trained_model")
300
+ print("βœ… Training complete! Model saved to ./trained_model")
301
+
302
+ if __name__ == "__main__":
303
+ main()
304
+ EOF
305
+
306
+ # Create a helpful README
307
+ RUN cat > /workspace/README.md << 'EOF'
308
+ # RunPod Multi-GPU ML Stack πŸš€
309
+
310
+ ## Quick Start
311
+ Run `/workspace/setup.sh` first to detect GPUs and pull models!
312
+
313
+ ## Services:
314
+ - **VSCode**: http://localhost:8080
315
+ - **Ollama API**: http://localhost:11434
316
+ - **vLLM API**: http://localhost:8000 (start with: `start-vllm <model-name>`)
317
+ - **Open-WebUI**: http://localhost:3000
318
+
319
+ ## Multi-GPU Commands:
320
+ - Training: `train-multi-gpu your_script.py`
321
+ - vLLM: `start-vllm meta-llama/Llama-2-7b-hf`
322
+ - Test multi-GPU: `python example_multi_gpu_train.py`
323
+
324
+ ## RTX 5090 Support:
325
+ This image includes patched Unsloth and Flash Attention 2 for RTX 5090 compatibility.
326
+
327
+ ## SSH Access:
328
+ Default password is `runpod`. Change it with `passwd`.
329
+
330
+ ## Tips:
331
+ - Check GPU status: `nvidia-smi`
332
+ - Monitor GPUs: `nvtop`
333
+ - List Ollama models: `ollama list`
334
+ - Pull new models: `ollama pull <model>`
335
+ EOF
336
+
337
+ # Expose all necessary ports
338
+ EXPOSE 22 # SSH
339
+ EXPOSE 8080 # Code-server (VSCode)
340
+ EXPOSE 11434 # Ollama API
341
+ EXPOSE 8000 # vLLM API
342
+ EXPOSE 3000 # Open-WebUI
343
+ EXPOSE 6006 # TensorBoard
344
+ EXPOSE 8888 # Jupyter (if needed)
345
+ EXPOSE 5000 # Flask/FastAPI apps
346
+ EXPOSE 7860 # Gradio apps
347
+ EXPOSE 29500 # Distributed training master port
348
+
349
+ # Set working directory
350
+ WORKDIR /workspace
351
+
352
+ # Run setup on first start and then supervisor
353
+ CMD bash -c "/workspace/setup.sh && /usr/bin/supervisord -c /etc/supervisor/supervisord.conf"