Web_scrapper / Dockerfile
Zelyanoth's picture
Upload 5 files
85e594a verified
# Use the official Playwright image as base which includes browsers and dependencies
FROM mcr.microsoft.com/playwright:v1.48.0-jammy
# Set timezone and prevent interactive prompts
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=UTC
# Install Python 3.11 and required packages
RUN apt-get update && apt-get install -y \
software-properties-common \
curl \
tzdata \
&& ln -fs /usr/share/zoneinfo/$TZ /etc/localtime \
&& echo $TZ > /etc/timezone \
&& dpkg-reconfigure --frontend noninteractive tzdata \
&& add-apt-repository ppa:deadsnakes/ppa \
&& apt-get update \
&& apt-get install -y \
python3.11 \
python3.11-venv \
python3.11-distutils \
python3.11-dev \
&& rm -rf /var/lib/apt/lists/*
# Install pip for Python 3.11
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
# Create symlinks for python and pip commands
RUN ln -sf /usr/bin/python3.11 /usr/bin/python \
&& ln -sf /usr/bin/python3.11 /usr/bin/python3
# Set working directory
WORKDIR /app
# Copy requirements first for better caching
COPY requirements.txt .
# Install Python dependencies
RUN python3.11 -m pip install --no-cache-dir -r requirements.txt
# Install the MCP Playwright package globally
RUN npm install -g @playwright/mcp@latest
# Create a non-root user (let system assign UID automatically)
RUN useradd -m appuser
# Copy application code and set ownership
COPY . .
RUN chown -R appuser:appuser /app
# Make sure the non-root user can access Playwright browsers
RUN chmod -R 755 /ms-playwright
# Switch to non-root user
USER appuser
# Set environment variables to use the pre-installed browsers
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
ENV PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1
# Expose Gradio port
EXPOSE 7860
# Set environment variables
ENV GRADIO_SERVER_NAME=0.0.0.0
ENV GRADIO_SERVER_PORT=7860
# Run the application
CMD ["python", "app.py"]