|
|
|
|
|
ARG TARGETPLATFORM |
|
ARG BUILDPLATFORM |
|
|
|
|
|
ARG PYTHON_VERSION=3.10 |
|
|
|
|
|
FROM python:${PYTHON_VERSION}-slim as base |
|
|
|
|
|
ARG INSTALL_TYPE=basic |
|
ARG ENABLE_GPU=false |
|
|
|
|
|
LABEL maintainer="unclecode" |
|
LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper" |
|
LABEL version="1.0" |
|
|
|
|
|
ENV PYTHONUNBUFFERED=1 \ |
|
PYTHONDONTWRITEBYTECODE=1 \ |
|
PIP_NO_CACHE_DIR=1 \ |
|
PIP_DISABLE_PIP_VERSION_CHECK=1 \ |
|
PIP_DEFAULT_TIMEOUT=100 \ |
|
DEBIAN_FRONTEND=noninteractive |
|
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \ |
|
build-essential \ |
|
curl \ |
|
wget \ |
|
gnupg \ |
|
git \ |
|
cmake \ |
|
pkg-config \ |
|
python3-dev \ |
|
libjpeg-dev \ |
|
libpng-dev \ |
|
&& rm -rf /var/lib/apt/lists/* |
|
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \ |
|
libglib2.0-0 \ |
|
libnss3 \ |
|
libnspr4 \ |
|
libatk1.0-0 \ |
|
libatk-bridge2.0-0 \ |
|
libcups2 \ |
|
libdrm2 \ |
|
libdbus-1-3 \ |
|
libxcb1 \ |
|
libxkbcommon0 \ |
|
libx11-6 \ |
|
libxcomposite1 \ |
|
libxdamage1 \ |
|
libxext6 \ |
|
libxfixes3 \ |
|
libxrandr2 \ |
|
libgbm1 \ |
|
libpango-1.0-0 \ |
|
libcairo2 \ |
|
libasound2 \ |
|
libatspi2.0-0 \ |
|
&& rm -rf /var/lib/apt/lists/* |
|
|
|
|
|
RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \ |
|
apt-get update && apt-get install -y --no-install-recommends \ |
|
nvidia-cuda-toolkit \ |
|
&& rm -rf /var/lib/apt/lists/* ; \ |
|
else \ |
|
echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \ |
|
fi |
|
|
|
|
|
WORKDIR /app |
|
|
|
|
|
COPY . . |
|
|
|
|
|
RUN pip install --no-cache-dir -r requirements.txt |
|
|
|
|
|
RUN pip install fastapi uvicorn psutil |
|
|
|
|
|
RUN if [ "$INSTALL_TYPE" = "all" ] ; then \ |
|
pip install --no-cache-dir \ |
|
torch \ |
|
torchvision \ |
|
torchaudio \ |
|
scikit-learn \ |
|
nltk \ |
|
transformers \ |
|
tokenizers && \ |
|
python -m nltk.downloader punkt stopwords ; \ |
|
fi |
|
|
|
|
|
RUN if [ "$INSTALL_TYPE" = "all" ] ; then \ |
|
pip install ".[all]" && \ |
|
python -m crawl4ai.model_loader ; \ |
|
elif [ "$INSTALL_TYPE" = "torch" ] ; then \ |
|
pip install ".[torch]" ; \ |
|
elif [ "$INSTALL_TYPE" = "transformer" ] ; then \ |
|
pip install ".[transformer]" && \ |
|
python -m crawl4ai.model_loader ; \ |
|
else \ |
|
pip install "." ; \ |
|
fi |
|
|
|
|
|
RUN pip install --no-cache-dir \ |
|
mkdocs \ |
|
mkdocs-material \ |
|
mkdocs-terminal \ |
|
pymdown-extensions |
|
|
|
|
|
RUN mkdocs build |
|
|
|
|
|
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ |
|
playwright install chromium; \ |
|
elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ |
|
playwright install chromium; \ |
|
fi |
|
|
|
|
|
EXPOSE 8000 11235 9222 8080 |
|
|
|
|
|
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "11235"] |