# Use Python 3.9-slim as the base image. | |
FROM python:3.9-slim | |
# Disable .pyc generation and enable unbuffered stdout. | |
ENV PYTHONDONTWRITEBYTECODE=1 \ | |
PYTHONUNBUFFERED=1 | |
# Tell Scrapely to disable its C extensions (if supported). | |
ENV SCRAPELY_DISABLE_C_EXTENSIONS=1 | |
# Set the working directory. | |
WORKDIR /home/user/app | |
# Install system dependencies. | |
RUN apt-get update && apt-get install -y --no-install-recommends \ | |
git \ | |
git-lfs \ | |
ffmpeg \ | |
libsm6 \ | |
libxext6 \ | |
cmake \ | |
rsync \ | |
libgl1-mesa-glx \ | |
&& rm -rf /var/lib/apt/lists/* \ | |
&& git lfs install | |
# Upgrade pip and install common packages. | |
RUN pip install --upgrade pip && \ | |
pip install --no-cache-dir \ | |
datasets \ | |
"huggingface-hub>=0.19" \ | |
"hf-transfer>=0.1.4" \ | |
"protobuf<4" \ | |
"click<8.1" \ | |
"pydantic~=1.0" | |
# Copy your requirements file. | |
COPY requirements.txt . | |
# IMPORTANT: | |
# Ensure your requirements.txt pins Scrapely to a compatible (or pure-Python) version. | |
# For example, include a line like: | |
# | |
# scrapely==0.13.2 | |
# | |
# Then install your Python dependencies. | |
RUN pip install --no-cache-dir -r requirements.txt | |
# Copy the rest of your application code. | |
COPY . . | |
# If your app listens on a specific port (e.g. for Streamlit), expose that port. | |
EXPOSE 8501 | |
# Set the default command (adjust as needed). | |
CMD ["python", "app.py"] |