craw_web / Dockerfile
euler314's picture
Update Dockerfile
ae78d26 verified
raw
history blame
1.37 kB
# Use Python 3.9-slim as the base image.
FROM python:3.9-slim
# Disable .pyc generation and enable unbuffered stdout.
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1
# Tell Scrapely to disable its C extensions (if supported).
ENV SCRAPELY_DISABLE_C_EXTENSIONS=1
# Set the working directory.
WORKDIR /home/user/app
# Install system dependencies.
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
git-lfs \
ffmpeg \
libsm6 \
libxext6 \
cmake \
rsync \
libgl1-mesa-glx \
&& rm -rf /var/lib/apt/lists/* \
&& git lfs install
# Upgrade pip and install common packages.
RUN pip install --upgrade pip && \
pip install --no-cache-dir \
datasets \
"huggingface-hub>=0.19" \
"hf-transfer>=0.1.4" \
"protobuf<4" \
"click<8.1" \
"pydantic~=1.0"
# Copy your requirements file.
COPY requirements.txt .
# IMPORTANT:
# Ensure your requirements.txt pins Scrapely to a compatible (or pure-Python) version.
# For example, include a line like:
#
# scrapely==0.13.2
#
# Then install your Python dependencies.
RUN pip install --no-cache-dir -r requirements.txt
# Copy the rest of your application code.
COPY . .
# If your app listens on a specific port (e.g. for Streamlit), expose that port.
EXPOSE 8501
# Set the default command (adjust as needed).
CMD ["python", "app.py"]