Spaces:
Running
Running
FROM ubuntu:22.04 | |
# Prevent interactive prompts | |
ENV DEBIAN_FRONTEND=noninteractive | |
ENV PYTHONUNBUFFERED=1 | |
ENV HF_HUB_ENABLE_HF_TRANSFER=1 | |
ENV CUDA_HOME=/usr/local/cuda | |
ENV PATH=/usr/local/cuda/bin:${PATH} | |
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} | |
# Install base system dependencies | |
RUN apt-get update && apt-get install -y \ | |
wget \ | |
curl \ | |
git \ | |
vim \ | |
tmux \ | |
htop \ | |
build-essential \ | |
software-properties-common \ | |
ca-certificates \ | |
gnupg \ | |
lsb-release \ | |
sudo \ | |
openssh-server \ | |
nginx \ | |
supervisor \ | |
python3.11 \ | |
python3.11-dev \ | |
python3-pip \ | |
&& rm -rf /var/lib/apt/lists/* | |
# Add NVIDIA package repositories | |
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ | |
dpkg -i cuda-keyring_1.1-1_all.deb && \ | |
apt-get update | |
# Install CUDA 12.8 | |
RUN apt-get install -y cuda-toolkit-12-8 && \ | |
rm -rf /var/lib/apt/lists/* | |
# Install cuDNN | |
RUN apt-get update && apt-get install -y \ | |
libcudnn9-cuda-12 \ | |
libcudnn9-dev-cuda-12 \ | |
&& rm -rf /var/lib/apt/lists/* | |
# Set Python 3.11 as default | |
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 && \ | |
update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 | |
# Upgrade pip | |
RUN python -m pip install --upgrade pip setuptools wheel | |
# Install PyTorch with CUDA 12.8 support | |
RUN pip install torch==2.5.1+cu128 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128 | |
# Create non-root user for HF Spaces compatibility | |
RUN useradd -m -u 1000 user && \ | |
echo "user ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers | |
# Install code-server (VSCode in browser) | |
RUN curl -fsSL https://code-server.dev/install.sh | sh | |
# Install Ollama | |
RUN curl -fsSL https://ollama.com/install.sh | sh | |
# Install hf_transfer first | |
RUN pip install hf_transfer | |
# Install core ML packages | |
RUN pip install \ | |
accelerate \ | |
transformers \ | |
datasets \ | |
peft \ | |
bitsandbytes \ | |
safetensors \ | |
sentencepiece \ | |
protobuf \ | |
scipy \ | |
einops \ | |
wandb \ | |
tensorboard \ | |
gradio \ | |
streamlit | |
# Install vLLM | |
RUN pip install vllm | |
# Install Flash Attention 2 | |
RUN pip install ninja packaging && \ | |
pip install flash-attn --no-build-isolation | |
# Install Triton for 5090 support | |
RUN pip install triton | |
# Clone and install Unsloth with patches | |
RUN git clone https://github.com/unslothai/unsloth.git /tmp/unsloth && \ | |
cd /tmp/unsloth && \ | |
pip install -e . && \ | |
cd / && \ | |
rm -rf /tmp/unsloth/.git | |
# Clone and install Axolotl | |
RUN git clone https://github.com/axolotl-ai-cloud/axolotl /tmp/axolotl && \ | |
cd /tmp/axolotl && \ | |
pip install -e . && \ | |
cd / && \ | |
rm -rf /tmp/axolotl/.git | |
# Install Node.js for Open-WebUI | |
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ | |
apt-get install -y nodejs && \ | |
rm -rf /var/lib/apt/lists/* | |
# Clone and setup Open-WebUI | |
RUN git clone https://github.com/open-webui/open-webui.git /opt/open-webui && \ | |
cd /opt/open-webui && \ | |
npm install && \ | |
npm run build | |
# Create directories with proper permissions | |
RUN mkdir -p /home/user/app /home/user/.cache /home/user/.config && \ | |
chown -R user:user /home/user | |
# Configure code-server for user | |
RUN mkdir -p /home/user/.config/code-server && \ | |
echo "bind-addr: 0.0.0.0:8080\nauth: none\ncert: false" > /home/user/.config/code-server/config.yaml && \ | |
chown -R user:user /home/user/.config | |
# Setup SSH | |
RUN mkdir /var/run/sshd && \ | |
echo 'user:spaces' | chpasswd && \ | |
sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin no/' /etc/ssh/sshd_config | |
# Create supervisor config | |
RUN mkdir -p /etc/supervisor/conf.d | |
RUN cat > /etc/supervisor/conf.d/services.conf << 'EOF' | |
[supervisord] | |
nodaemon=true | |
user=root | |
[program:code-server] | |
command=sudo -u user code-server --bind-addr 0.0.0.0:8080 --auth none | |
autostart=true | |
autorestart=true | |
stderr_logfile=/var/log/code-server.err.log | |
stdout_logfile=/var/log/code-server.out.log | |
[program:ollama] | |
command=ollama serve | |
autostart=true | |
autorestart=true | |
environment=OLLAMA_HOST="0.0.0.0",HOME="/home/user" | |
stderr_logfile=/var/log/ollama.err.log | |
stdout_logfile=/var/log/ollama.out.log | |
[program:open-webui] | |
command=cd /opt/open-webui && npm start | |
autostart=true | |
autorestart=true | |
environment=PORT="3000",OLLAMA_BASE_URL="http://localhost:11434" | |
stderr_logfile=/var/log/open-webui.err.log | |
stdout_logfile=/var/log/open-webui.out.log | |
user=user | |
EOF | |
# Create Gradio app for HF Spaces | |
RUN cat > /home/user/app/app.py << 'EOF' | |
import gradio as gr | |
import subprocess | |
import os | |
def get_services_status(): | |
services = { | |
"VSCode": "http://localhost:8080", | |
"Ollama API": "http://localhost:11434", | |
"Open-WebUI": "http://localhost:3000", | |
"vLLM": "http://localhost:8000" | |
} | |
status = "# ๐ ML Stack Services Status\n\n" | |
for service, url in services.items(): | |
status += f"- **{service}**: {url}\n" | |
# Get GPU info | |
try: | |
gpu_info = subprocess.check_output(['nvidia-smi', '--query-gpu=name,memory.total', '--format=csv,noheader'], text=True) | |
status += f"\n## ๐ฎ GPU Status\n```\n{gpu_info}```" | |
except: | |
status += "\n## โ ๏ธ No GPUs detected" | |
return status | |
def launch_vllm(model_name): | |
try: | |
cmd = f"python -m vllm.entrypoints.openai.api_server --model {model_name} --host 0.0.0.0 --port 8000" | |
subprocess.Popen(cmd, shell=True) | |
return f"โ Launching vLLM with model: {model_name}" | |
except Exception as e: | |
return f"โ Error: {str(e)}" | |
def pull_ollama_model(model_name): | |
try: | |
result = subprocess.run(['ollama', 'pull', model_name], capture_output=True, text=True) | |
return f"โ {result.stdout}\n{result.stderr}" | |
except Exception as e: | |
return f"โ Error: {str(e)}" | |
# Create Gradio interface | |
with gr.Blocks(title="ML Stack Control Panel") as demo: | |
gr.Markdown("# ๐ฎ RunPod ML Stack Control Panel") | |
with gr.Tab("Status"): | |
status_btn = gr.Button("๐ Refresh Status") | |
status_output = gr.Markdown() | |
status_btn.click(get_services_status, outputs=status_output) | |
with gr.Tab("vLLM"): | |
model_input = gr.Textbox(label="Model Name", value="meta-llama/Llama-2-7b-hf") | |
vllm_btn = gr.Button("๐ Launch vLLM") | |
vllm_output = gr.Textbox(label="Output") | |
vllm_btn.click(launch_vllm, inputs=model_input, outputs=vllm_output) | |
with gr.Tab("Ollama"): | |
ollama_model = gr.Textbox(label="Model Name", value="llama3.2") | |
ollama_btn = gr.Button("๐ฅ Pull Model") | |
ollama_output = gr.Textbox(label="Output") | |
ollama_btn.click(pull_ollama_model, inputs=ollama_model, outputs=ollama_output) | |
# Load initial status | |
demo.load(get_services_status, outputs=status_output) | |
if __name__ == "__main__": | |
# Start supervisor in background | |
subprocess.Popen(["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"]) | |
# Launch Gradio | |
demo.launch(server_name="0.0.0.0", server_port=7860, share=False) | |
EOF | |
# Create startup script | |
RUN cat > /home/user/app/start.sh << 'EOF' | |
#!/bin/bash | |
cd /home/user/app | |
python app.py | |
EOF | |
RUN chmod +x /home/user/app/start.sh | |
# Fix permissions | |
RUN chown -R user:user /home/user /opt/open-webui | |
# Expose ports | |
EXPOSE 22 # SSH | |
EXPOSE 7860 # Gradio (HF Spaces default) | |
EXPOSE 8080 # Code-server | |
EXPOSE 11434 # Ollama | |
EXPOSE 8000 # vLLM | |
EXPOSE 3000 # Open-WebUI | |
# Switch to user | |
USER user | |
WORKDIR /home/user/app | |
# Set the entrypoint for HF Spaces | |
CMD ["python", "app.py"] |