miike-ai commited on
Commit
a9ac1e7
ยท
verified ยท
1 Parent(s): f3df9f4

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +155 -241
Dockerfile CHANGED
@@ -1,16 +1,14 @@
1
- FROM runpod/pytorch:2.8.0-py3.11-cuda12.8.1-cudnn-devel-ubuntu22.04
2
 
3
- # Set environment variables
4
  ENV DEBIAN_FRONTEND=noninteractive
5
  ENV PYTHONUNBUFFERED=1
6
  ENV HF_HUB_ENABLE_HF_TRANSFER=1
7
- ENV CUDA_VISIBLE_DEVICES=all
8
- ENV NVIDIA_VISIBLE_DEVICES=all
9
- ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
10
- ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
11
- ENV PATH=/usr/local/cuda/bin:$PATH
12
 
13
- # Install system dependencies
14
  RUN apt-get update && apt-get install -y \
15
  wget \
16
  curl \
@@ -18,7 +16,6 @@ RUN apt-get update && apt-get install -y \
18
  vim \
19
  tmux \
20
  htop \
21
- nvtop \
22
  build-essential \
23
  software-properties-common \
24
  ca-certificates \
@@ -28,21 +25,50 @@ RUN apt-get update && apt-get install -y \
28
  openssh-server \
29
  nginx \
30
  supervisor \
 
 
 
31
  && rm -rf /var/lib/apt/lists/*
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  # Install code-server (VSCode in browser)
34
  RUN curl -fsSL https://code-server.dev/install.sh | sh
35
 
36
  # Install Ollama
37
  RUN curl -fsSL https://ollama.com/install.sh | sh
38
 
39
- # Upgrade pip and install base Python packages
40
- RUN pip install --upgrade pip setuptools wheel
41
-
42
- # Install hf_transfer first for faster downloads
43
  RUN pip install hf_transfer
44
 
45
- # Install critical ML infrastructure
46
  RUN pip install \
47
  accelerate \
48
  transformers \
@@ -55,34 +81,38 @@ RUN pip install \
55
  scipy \
56
  einops \
57
  wandb \
58
- tensorboard
 
 
59
 
60
- # Install vLLM with CUDA 12.8 support
61
- RUN pip install vllm --extra-index-url https://download.pytorch.org/whl/cu128
62
 
63
- # Install Flash Attention 2 (critical for 5090)
64
- RUN pip install flash-attn --no-build-isolation
 
65
 
66
- # Install Unsloth with 5090 patches
67
- # Using the approach from the referenced repo
 
 
68
  RUN git clone https://github.com/unslothai/unsloth.git /tmp/unsloth && \
69
  cd /tmp/unsloth && \
70
  pip install -e . && \
71
  cd / && \
72
  rm -rf /tmp/unsloth/.git
73
 
74
- # Install Axolotl
75
  RUN git clone https://github.com/axolotl-ai-cloud/axolotl /tmp/axolotl && \
76
  cd /tmp/axolotl && \
77
  pip install -e . && \
78
  cd / && \
79
  rm -rf /tmp/axolotl/.git
80
 
81
- # Install Open-WebUI dependencies
82
- RUN apt-get update && apt-get install -y \
83
- nodejs \
84
- npm \
85
- && rm -rf /var/lib/apt/lists/*
86
 
87
  # Clone and setup Open-WebUI
88
  RUN git clone https://github.com/open-webui/open-webui.git /opt/open-webui && \
@@ -90,26 +120,29 @@ RUN git clone https://github.com/open-webui/open-webui.git /opt/open-webui && \
90
  npm install && \
91
  npm run build
92
 
93
- # Create workspace directory
94
- RUN mkdir -p /workspace
 
95
 
96
- # Configure code-server
97
- RUN mkdir -p /root/.config/code-server
98
- RUN echo "bind-addr: 0.0.0.0:8080\nauth: none\ncert: false" > /root/.config/code-server/config.yaml
 
99
 
100
- # Configure SSH (optional but useful)
101
- RUN mkdir /var/run/sshd
102
- RUN echo 'root:runpod' | chpasswd
103
- RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
104
 
105
- # Create supervisor config to run all services
106
  RUN mkdir -p /etc/supervisor/conf.d
107
  RUN cat > /etc/supervisor/conf.d/services.conf << 'EOF'
108
  [supervisord]
109
  nodaemon=true
 
110
 
111
  [program:code-server]
112
- command=code-server --bind-addr 0.0.0.0:8080 --auth none
113
  autostart=true
114
  autorestart=true
115
  stderr_logfile=/var/log/code-server.err.log
@@ -119,15 +152,10 @@ stdout_logfile=/var/log/code-server.out.log
119
  command=ollama serve
120
  autostart=true
121
  autorestart=true
122
- environment=OLLAMA_HOST="0.0.0.0"
123
  stderr_logfile=/var/log/ollama.err.log
124
  stdout_logfile=/var/log/ollama.out.log
125
 
126
- [program:sshd]
127
- command=/usr/sbin/sshd -D
128
- autostart=true
129
- autorestart=true
130
-
131
  [program:open-webui]
132
  command=cd /opt/open-webui && npm start
133
  autostart=true
@@ -135,219 +163,105 @@ autorestart=true
135
  environment=PORT="3000",OLLAMA_BASE_URL="http://localhost:11434"
136
  stderr_logfile=/var/log/open-webui.err.log
137
  stdout_logfile=/var/log/open-webui.out.log
 
138
  EOF
139
 
140
- # Create a startup script for vLLM (runs on demand)
141
- RUN cat > /usr/local/bin/start-vllm << 'EOF'
142
- #!/bin/bash
143
- python -m vllm.entrypoints.openai.api_server \
144
- --model $1 \
145
- --tensor-parallel-size ${CUDA_DEVICE_COUNT:-1} \
146
- --gpu-memory-utilization 0.9 \
147
- --max-model-len 32768 \
148
- --host 0.0.0.0 \
149
- --port 8000
150
- EOF
151
- RUN chmod +x /usr/local/bin/start-vllm
152
-
153
- # Create multi-GPU training helper script
154
- RUN cat > /usr/local/bin/train-multi-gpu << 'EOF'
155
- #!/bin/bash
156
- GPU_COUNT=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
157
- accelerate launch \
158
- --num_processes $GPU_COUNT \
159
- --num_machines 1 \
160
- --mixed_precision bf16 \
161
- --dynamo_backend no \
162
- $@
163
- EOF
164
- RUN chmod +x /usr/local/bin/train-multi-gpu
165
-
166
- # Create accelerate config
167
- RUN cat > /workspace/accelerate_config.yaml << 'EOF'
168
- compute_environment: LOCAL_MACHINE
169
- debug: false
170
- distributed_type: MULTI_GPU
171
- downcast_bf16: 'no'
172
- gpu_ids: all
173
- machine_rank: 0
174
- main_training_function: main
175
- mixed_precision: bf16
176
- num_machines: 1
177
- num_processes: 8
178
- rdzv_backend: static
179
- same_network: true
180
- tpu_env: []
181
- tpu_use_cluster: false
182
- tpu_use_sudo: false
183
- use_cpu: false
184
- EOF
185
-
186
- # Create setup script
187
- RUN cat > /workspace/setup.sh << 'EOF'
188
- #!/bin/bash
189
- echo "๐Ÿš€ RunPod ML Stack Setup"
190
- echo "========================"
191
-
192
- # Check GPU availability
193
- echo -e "\n๐Ÿ“Š GPU Status:"
194
- nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader,nounits | nl -v 0
195
-
196
- # Count GPUs
197
- GPU_COUNT=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
198
- echo -e "\nโœ… Found $GPU_COUNT GPU(s)"
199
-
200
- # Update accelerate config with correct GPU count
201
- sed -i "s/num_processes: 8/num_processes: $GPU_COUNT/g" /workspace/accelerate_config.yaml
202
-
203
- # Pull a default model for Ollama if not exists
204
- if ! ollama list | grep -q "llama3.2"; then
205
- echo -e "\n๐Ÿ“ฅ Pulling default Ollama model (llama3.2)..."
206
- ollama pull llama3.2
207
- fi
208
-
209
- echo -e "\n๐ŸŽ‰ Setup complete! Services available at:"
210
- echo " VSCode: http://localhost:8080"
211
- echo " Ollama: http://localhost:11434"
212
- echo " Open-WebUI: http://localhost:3000"
213
- echo " vLLM: http://localhost:8000 (start with: start-vllm <model>)"
214
- EOF
215
- RUN chmod +x /workspace/setup.sh
216
-
217
- # Create example multi-GPU training script
218
- RUN cat > /workspace/example_multi_gpu_train.py << 'EOF'
219
- import torch
220
- from accelerate import Accelerator
221
- from transformers import AutoModelForCausalLM, AutoTokenizer
222
- from datasets import load_dataset
223
- from torch.optim import AdamW
224
- from torch.utils.data import DataLoader
225
- from tqdm import tqdm
226
-
227
- def main():
228
- # Initialize accelerator
229
- accelerator = Accelerator()
230
-
231
- # Setup
232
- model_name = "meta-llama/Llama-2-7b-hf"
233
- batch_size = 4
234
- gradient_accumulation_steps = 4
235
- learning_rate = 2e-5
236
- num_epochs = 3
237
-
238
- # Print GPU info
239
- if accelerator.is_main_process:
240
- print(f"๐Ÿš€ Training on {accelerator.num_processes} GPU(s)")
241
- print(f"๐Ÿ’พ Total batch size: {batch_size * accelerator.num_processes * gradient_accumulation_steps}")
242
-
243
- # Load model and tokenizer
244
- model = AutoModelForCausalLM.from_pretrained(
245
- model_name,
246
- torch_dtype=torch.bfloat16,
247
- use_cache=False,
248
- )
249
- tokenizer = AutoTokenizer.from_pretrained(model_name)
250
- tokenizer.pad_token = tokenizer.eos_token
251
-
252
- # Load dataset
253
- dataset = load_dataset("imdb", split="train[:1000]") # Small subset for demo
254
 
255
- def tokenize_function(examples):
256
- return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
 
257
 
258
- tokenized_dataset = dataset.map(tokenize_function, batched=True)
259
- tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask"])
 
 
 
 
260
 
261
- # Create DataLoader
262
- dataloader = DataLoader(tokenized_dataset, batch_size=batch_size, shuffle=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
 
264
- # Optimizer
265
- optimizer = AdamW(model.parameters(), lr=learning_rate)
 
 
266
 
267
- # Prepare for distributed training
268
- model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)
 
 
 
269
 
270
- # Training loop
271
- model.train()
272
- for epoch in range(num_epochs):
273
- if accelerator.is_main_process:
274
- print(f"\nEpoch {epoch + 1}/{num_epochs}")
275
- progress_bar = tqdm(total=len(dataloader), desc="Training")
276
-
277
- for step, batch in enumerate(dataloader):
278
- with accelerator.accumulate(model):
279
- outputs = model(
280
- input_ids=batch["input_ids"],
281
- attention_mask=batch["attention_mask"],
282
- labels=batch["input_ids"],
283
- )
284
- loss = outputs.loss
285
- accelerator.backward(loss)
286
- optimizer.step()
287
- optimizer.zero_grad()
288
-
289
- if accelerator.is_main_process:
290
- progress_bar.update(1)
291
- if step % 10 == 0:
292
- progress_bar.set_postfix({"loss": loss.item()})
293
-
294
- if accelerator.is_main_process:
295
- progress_bar.close()
296
 
297
- # Save model
298
- if accelerator.is_main_process:
299
- model.save_pretrained("./trained_model")
300
- print("โœ… Training complete! Model saved to ./trained_model")
301
 
302
  if __name__ == "__main__":
303
- main()
 
 
 
 
304
  EOF
305
 
306
- # Create a helpful README
307
- RUN cat > /workspace/README.md << 'EOF'
308
- # RunPod Multi-GPU ML Stack ๐Ÿš€
309
-
310
- ## Quick Start
311
- Run `/workspace/setup.sh` first to detect GPUs and pull models!
312
-
313
- ## Services:
314
- - **VSCode**: http://localhost:8080
315
- - **Ollama API**: http://localhost:11434
316
- - **vLLM API**: http://localhost:8000 (start with: `start-vllm <model-name>`)
317
- - **Open-WebUI**: http://localhost:3000
318
-
319
- ## Multi-GPU Commands:
320
- - Training: `train-multi-gpu your_script.py`
321
- - vLLM: `start-vllm meta-llama/Llama-2-7b-hf`
322
- - Test multi-GPU: `python example_multi_gpu_train.py`
323
-
324
- ## RTX 5090 Support:
325
- This image includes patched Unsloth and Flash Attention 2 for RTX 5090 compatibility.
326
-
327
- ## SSH Access:
328
- Default password is `runpod`. Change it with `passwd`.
329
-
330
- ## Tips:
331
- - Check GPU status: `nvidia-smi`
332
- - Monitor GPUs: `nvtop`
333
- - List Ollama models: `ollama list`
334
- - Pull new models: `ollama pull <model>`
335
  EOF
 
 
 
 
336
 
337
- # Expose all necessary ports
338
  EXPOSE 22 # SSH
339
- EXPOSE 8080 # Code-server (VSCode)
340
- EXPOSE 11434 # Ollama API
341
- EXPOSE 8000 # vLLM API
 
342
  EXPOSE 3000 # Open-WebUI
343
- EXPOSE 6006 # TensorBoard
344
- EXPOSE 8888 # Jupyter (if needed)
345
- EXPOSE 5000 # Flask/FastAPI apps
346
- EXPOSE 7860 # Gradio apps
347
- EXPOSE 29500 # Distributed training master port
348
 
349
- # Set working directory
350
- WORKDIR /workspace
 
351
 
352
- # Run setup on first start and then supervisor
353
- CMD bash -c "/workspace/setup.sh && /usr/bin/supervisord -c /etc/supervisor/supervisord.conf"
 
1
+ FROM ubuntu:22.04
2
 
3
+ # Prevent interactive prompts
4
  ENV DEBIAN_FRONTEND=noninteractive
5
  ENV PYTHONUNBUFFERED=1
6
  ENV HF_HUB_ENABLE_HF_TRANSFER=1
7
+ ENV CUDA_HOME=/usr/local/cuda
8
+ ENV PATH=/usr/local/cuda/bin:${PATH}
9
+ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH}
 
 
10
 
11
+ # Install base system dependencies
12
  RUN apt-get update && apt-get install -y \
13
  wget \
14
  curl \
 
16
  vim \
17
  tmux \
18
  htop \
 
19
  build-essential \
20
  software-properties-common \
21
  ca-certificates \
 
25
  openssh-server \
26
  nginx \
27
  supervisor \
28
+ python3.11 \
29
+ python3.11-dev \
30
+ python3-pip \
31
  && rm -rf /var/lib/apt/lists/*
32
 
33
+ # Add NVIDIA package repositories
34
+ RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
35
+ dpkg -i cuda-keyring_1.1-1_all.deb && \
36
+ apt-get update
37
+
38
+ # Install CUDA 12.8
39
+ RUN apt-get install -y cuda-toolkit-12-8 && \
40
+ rm -rf /var/lib/apt/lists/*
41
+
42
+ # Install cuDNN
43
+ RUN apt-get update && apt-get install -y \
44
+ libcudnn9-cuda-12 \
45
+ libcudnn9-dev-cuda-12 \
46
+ && rm -rf /var/lib/apt/lists/*
47
+
48
+ # Set Python 3.11 as default
49
+ RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 && \
50
+ update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
51
+
52
+ # Upgrade pip
53
+ RUN python -m pip install --upgrade pip setuptools wheel
54
+
55
+ # Install PyTorch with CUDA 12.8 support
56
+ RUN pip install torch==2.5.1+cu128 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
57
+
58
+ # Create non-root user for HF Spaces compatibility
59
+ RUN useradd -m -u 1000 user && \
60
+ echo "user ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
61
+
62
  # Install code-server (VSCode in browser)
63
  RUN curl -fsSL https://code-server.dev/install.sh | sh
64
 
65
  # Install Ollama
66
  RUN curl -fsSL https://ollama.com/install.sh | sh
67
 
68
+ # Install hf_transfer first
 
 
 
69
  RUN pip install hf_transfer
70
 
71
+ # Install core ML packages
72
  RUN pip install \
73
  accelerate \
74
  transformers \
 
81
  scipy \
82
  einops \
83
  wandb \
84
+ tensorboard \
85
+ gradio \
86
+ streamlit
87
 
88
+ # Install vLLM
89
+ RUN pip install vllm
90
 
91
+ # Install Flash Attention 2
92
+ RUN pip install ninja packaging && \
93
+ pip install flash-attn --no-build-isolation
94
 
95
+ # Install Triton for 5090 support
96
+ RUN pip install triton
97
+
98
+ # Clone and install Unsloth with patches
99
  RUN git clone https://github.com/unslothai/unsloth.git /tmp/unsloth && \
100
  cd /tmp/unsloth && \
101
  pip install -e . && \
102
  cd / && \
103
  rm -rf /tmp/unsloth/.git
104
 
105
+ # Clone and install Axolotl
106
  RUN git clone https://github.com/axolotl-ai-cloud/axolotl /tmp/axolotl && \
107
  cd /tmp/axolotl && \
108
  pip install -e . && \
109
  cd / && \
110
  rm -rf /tmp/axolotl/.git
111
 
112
+ # Install Node.js for Open-WebUI
113
+ RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
114
+ apt-get install -y nodejs && \
115
+ rm -rf /var/lib/apt/lists/*
 
116
 
117
  # Clone and setup Open-WebUI
118
  RUN git clone https://github.com/open-webui/open-webui.git /opt/open-webui && \
 
120
  npm install && \
121
  npm run build
122
 
123
+ # Create directories with proper permissions
124
+ RUN mkdir -p /home/user/app /home/user/.cache /home/user/.config && \
125
+ chown -R user:user /home/user
126
 
127
+ # Configure code-server for user
128
+ RUN mkdir -p /home/user/.config/code-server && \
129
+ echo "bind-addr: 0.0.0.0:8080\nauth: none\ncert: false" > /home/user/.config/code-server/config.yaml && \
130
+ chown -R user:user /home/user/.config
131
 
132
+ # Setup SSH
133
+ RUN mkdir /var/run/sshd && \
134
+ echo 'user:spaces' | chpasswd && \
135
+ sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin no/' /etc/ssh/sshd_config
136
 
137
+ # Create supervisor config
138
  RUN mkdir -p /etc/supervisor/conf.d
139
  RUN cat > /etc/supervisor/conf.d/services.conf << 'EOF'
140
  [supervisord]
141
  nodaemon=true
142
+ user=root
143
 
144
  [program:code-server]
145
+ command=sudo -u user code-server --bind-addr 0.0.0.0:8080 --auth none
146
  autostart=true
147
  autorestart=true
148
  stderr_logfile=/var/log/code-server.err.log
 
152
  command=ollama serve
153
  autostart=true
154
  autorestart=true
155
+ environment=OLLAMA_HOST="0.0.0.0",HOME="/home/user"
156
  stderr_logfile=/var/log/ollama.err.log
157
  stdout_logfile=/var/log/ollama.out.log
158
 
 
 
 
 
 
159
  [program:open-webui]
160
  command=cd /opt/open-webui && npm start
161
  autostart=true
 
163
  environment=PORT="3000",OLLAMA_BASE_URL="http://localhost:11434"
164
  stderr_logfile=/var/log/open-webui.err.log
165
  stdout_logfile=/var/log/open-webui.out.log
166
+ user=user
167
  EOF
168
 
169
+ # Create Gradio app for HF Spaces
170
+ RUN cat > /home/user/app/app.py << 'EOF'
171
+ import gradio as gr
172
+ import subprocess
173
+ import os
174
+
175
+ def get_services_status():
176
+ services = {
177
+ "VSCode": "http://localhost:8080",
178
+ "Ollama API": "http://localhost:11434",
179
+ "Open-WebUI": "http://localhost:3000",
180
+ "vLLM": "http://localhost:8000"
181
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
+ status = "# ๐Ÿš€ ML Stack Services Status\n\n"
184
+ for service, url in services.items():
185
+ status += f"- **{service}**: {url}\n"
186
 
187
+ # Get GPU info
188
+ try:
189
+ gpu_info = subprocess.check_output(['nvidia-smi', '--query-gpu=name,memory.total', '--format=csv,noheader'], text=True)
190
+ status += f"\n## ๐ŸŽฎ GPU Status\n```\n{gpu_info}```"
191
+ except:
192
+ status += "\n## โš ๏ธ No GPUs detected"
193
 
194
+ return status
195
+
196
+ def launch_vllm(model_name):
197
+ try:
198
+ cmd = f"python -m vllm.entrypoints.openai.api_server --model {model_name} --host 0.0.0.0 --port 8000"
199
+ subprocess.Popen(cmd, shell=True)
200
+ return f"โœ… Launching vLLM with model: {model_name}"
201
+ except Exception as e:
202
+ return f"โŒ Error: {str(e)}"
203
+
204
+ def pull_ollama_model(model_name):
205
+ try:
206
+ result = subprocess.run(['ollama', 'pull', model_name], capture_output=True, text=True)
207
+ return f"โœ… {result.stdout}\n{result.stderr}"
208
+ except Exception as e:
209
+ return f"โŒ Error: {str(e)}"
210
+
211
+ # Create Gradio interface
212
+ with gr.Blocks(title="ML Stack Control Panel") as demo:
213
+ gr.Markdown("# ๐ŸŽฎ RunPod ML Stack Control Panel")
214
 
215
+ with gr.Tab("Status"):
216
+ status_btn = gr.Button("๐Ÿ”„ Refresh Status")
217
+ status_output = gr.Markdown()
218
+ status_btn.click(get_services_status, outputs=status_output)
219
 
220
+ with gr.Tab("vLLM"):
221
+ model_input = gr.Textbox(label="Model Name", value="meta-llama/Llama-2-7b-hf")
222
+ vllm_btn = gr.Button("๐Ÿš€ Launch vLLM")
223
+ vllm_output = gr.Textbox(label="Output")
224
+ vllm_btn.click(launch_vllm, inputs=model_input, outputs=vllm_output)
225
 
226
+ with gr.Tab("Ollama"):
227
+ ollama_model = gr.Textbox(label="Model Name", value="llama3.2")
228
+ ollama_btn = gr.Button("๐Ÿ“ฅ Pull Model")
229
+ ollama_output = gr.Textbox(label="Output")
230
+ ollama_btn.click(pull_ollama_model, inputs=ollama_model, outputs=ollama_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
232
+ # Load initial status
233
+ demo.load(get_services_status, outputs=status_output)
 
 
234
 
235
  if __name__ == "__main__":
236
+ # Start supervisor in background
237
+ subprocess.Popen(["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"])
238
+
239
+ # Launch Gradio
240
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
241
  EOF
242
 
243
+ # Create startup script
244
+ RUN cat > /home/user/app/start.sh << 'EOF'
245
+ #!/bin/bash
246
+ cd /home/user/app
247
+ python app.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  EOF
249
+ RUN chmod +x /home/user/app/start.sh
250
+
251
+ # Fix permissions
252
+ RUN chown -R user:user /home/user /opt/open-webui
253
 
254
+ # Expose ports
255
  EXPOSE 22 # SSH
256
+ EXPOSE 7860 # Gradio (HF Spaces default)
257
+ EXPOSE 8080 # Code-server
258
+ EXPOSE 11434 # Ollama
259
+ EXPOSE 8000 # vLLM
260
  EXPOSE 3000 # Open-WebUI
 
 
 
 
 
261
 
262
+ # Switch to user
263
+ USER user
264
+ WORKDIR /home/user/app
265
 
266
+ # Set the entrypoint for HF Spaces
267
+ CMD ["python", "app.py"]