FROM ubuntu:22.04 # Prevent interactive prompts ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 ENV HF_HUB_ENABLE_HF_TRANSFER=1 ENV CUDA_HOME=/usr/local/cuda ENV PATH=/usr/local/cuda/bin:${PATH} ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} # Install base system dependencies RUN apt-get update && apt-get install -y \ wget \ curl \ git \ vim \ tmux \ htop \ build-essential \ software-properties-common \ ca-certificates \ gnupg \ lsb-release \ sudo \ openssh-server \ nginx \ supervisor \ python3.11 \ python3.11-dev \ python3-pip \ && rm -rf /var/lib/apt/lists/* # Add NVIDIA package repositories RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ dpkg -i cuda-keyring_1.1-1_all.deb && \ apt-get update # Install CUDA 12.8 RUN apt-get install -y cuda-toolkit-12-8 && \ rm -rf /var/lib/apt/lists/* # Install cuDNN RUN apt-get update && apt-get install -y \ libcudnn9-cuda-12 \ libcudnn9-dev-cuda-12 \ && rm -rf /var/lib/apt/lists/* # Set Python 3.11 as default RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 && \ update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 # Upgrade pip RUN python -m pip install --upgrade pip setuptools wheel # Install PyTorch with CUDA 12.8 support RUN pip install torch==2.5.1+cu128 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128 # Create non-root user for HF Spaces compatibility RUN useradd -m -u 1000 user && \ echo "user ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers # Install code-server (VSCode in browser) RUN curl -fsSL https://code-server.dev/install.sh | sh # Install Ollama RUN curl -fsSL https://ollama.com/install.sh | sh # Install hf_transfer first RUN pip install hf_transfer # Install core ML packages RUN pip install \ accelerate \ transformers \ datasets \ peft \ bitsandbytes \ safetensors \ sentencepiece \ protobuf \ scipy \ einops \ wandb \ tensorboard \ gradio \ streamlit # Install vLLM RUN pip install vllm # Install Flash Attention 2 RUN pip install ninja packaging && \ pip install flash-attn --no-build-isolation # Install Triton for 5090 support RUN pip install triton # Clone and install Unsloth with patches RUN git clone https://github.com/unslothai/unsloth.git /tmp/unsloth && \ cd /tmp/unsloth && \ pip install -e . && \ cd / && \ rm -rf /tmp/unsloth/.git # Clone and install Axolotl RUN git clone https://github.com/axolotl-ai-cloud/axolotl /tmp/axolotl && \ cd /tmp/axolotl && \ pip install -e . && \ cd / && \ rm -rf /tmp/axolotl/.git # Install Node.js for Open-WebUI RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ apt-get install -y nodejs && \ rm -rf /var/lib/apt/lists/* # Clone and setup Open-WebUI RUN git clone https://github.com/open-webui/open-webui.git /opt/open-webui && \ cd /opt/open-webui && \ npm install && \ npm run build # Create directories with proper permissions RUN mkdir -p /home/user/app /home/user/.cache /home/user/.config && \ chown -R user:user /home/user # Configure code-server for user RUN mkdir -p /home/user/.config/code-server && \ echo "bind-addr: 0.0.0.0:8080\nauth: none\ncert: false" > /home/user/.config/code-server/config.yaml && \ chown -R user:user /home/user/.config # Setup SSH RUN mkdir /var/run/sshd && \ echo 'user:spaces' | chpasswd && \ sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin no/' /etc/ssh/sshd_config # Create supervisor config RUN mkdir -p /etc/supervisor/conf.d RUN cat > /etc/supervisor/conf.d/services.conf << 'EOF' [supervisord] nodaemon=true user=root [program:code-server] command=sudo -u user code-server --bind-addr 0.0.0.0:8080 --auth none autostart=true autorestart=true stderr_logfile=/var/log/code-server.err.log stdout_logfile=/var/log/code-server.out.log [program:ollama] command=ollama serve autostart=true autorestart=true environment=OLLAMA_HOST="0.0.0.0",HOME="/home/user" stderr_logfile=/var/log/ollama.err.log stdout_logfile=/var/log/ollama.out.log [program:open-webui] command=cd /opt/open-webui && npm start autostart=true autorestart=true environment=PORT="3000",OLLAMA_BASE_URL="http://localhost:11434" stderr_logfile=/var/log/open-webui.err.log stdout_logfile=/var/log/open-webui.out.log user=user EOF # Create Gradio app for HF Spaces RUN cat > /home/user/app/app.py << 'EOF' import gradio as gr import subprocess import os def get_services_status(): services = { "VSCode": "http://localhost:8080", "Ollama API": "http://localhost:11434", "Open-WebUI": "http://localhost:3000", "vLLM": "http://localhost:8000" } status = "# 🚀 ML Stack Services Status\n\n" for service, url in services.items(): status += f"- **{service}**: {url}\n" # Get GPU info try: gpu_info = subprocess.check_output(['nvidia-smi', '--query-gpu=name,memory.total', '--format=csv,noheader'], text=True) status += f"\n## 🎮 GPU Status\n```\n{gpu_info}```" except: status += "\n## ⚠️ No GPUs detected" return status def launch_vllm(model_name): try: cmd = f"python -m vllm.entrypoints.openai.api_server --model {model_name} --host 0.0.0.0 --port 8000" subprocess.Popen(cmd, shell=True) return f"✅ Launching vLLM with model: {model_name}" except Exception as e: return f"❌ Error: {str(e)}" def pull_ollama_model(model_name): try: result = subprocess.run(['ollama', 'pull', model_name], capture_output=True, text=True) return f"✅ {result.stdout}\n{result.stderr}" except Exception as e: return f"❌ Error: {str(e)}" # Create Gradio interface with gr.Blocks(title="ML Stack Control Panel") as demo: gr.Markdown("# 🎮 RunPod ML Stack Control Panel") with gr.Tab("Status"): status_btn = gr.Button("🔄 Refresh Status") status_output = gr.Markdown() status_btn.click(get_services_status, outputs=status_output) with gr.Tab("vLLM"): model_input = gr.Textbox(label="Model Name", value="meta-llama/Llama-2-7b-hf") vllm_btn = gr.Button("🚀 Launch vLLM") vllm_output = gr.Textbox(label="Output") vllm_btn.click(launch_vllm, inputs=model_input, outputs=vllm_output) with gr.Tab("Ollama"): ollama_model = gr.Textbox(label="Model Name", value="llama3.2") ollama_btn = gr.Button("📥 Pull Model") ollama_output = gr.Textbox(label="Output") ollama_btn.click(pull_ollama_model, inputs=ollama_model, outputs=ollama_output) # Load initial status demo.load(get_services_status, outputs=status_output) if __name__ == "__main__": # Start supervisor in background subprocess.Popen(["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"]) # Launch Gradio demo.launch(server_name="0.0.0.0", server_port=7860, share=False) EOF # Create startup script RUN cat > /home/user/app/start.sh << 'EOF' #!/bin/bash cd /home/user/app python app.py EOF RUN chmod +x /home/user/app/start.sh # Fix permissions RUN chown -R user:user /home/user /opt/open-webui # Expose ports EXPOSE 22 # SSH EXPOSE 7860 # Gradio (HF Spaces default) EXPOSE 8080 # Code-server EXPOSE 11434 # Ollama EXPOSE 8000 # vLLM EXPOSE 3000 # Open-WebUI # Switch to user USER user WORKDIR /home/user/app # Set the entrypoint for HF Spaces CMD ["python", "app.py"]