amaye15 commited on
Commit
cb60f9c
·
1 Parent(s): 878ae55
Files changed (1) hide show
  1. Dockerfile +140 -3
Dockerfile CHANGED
@@ -1,3 +1,140 @@
1
- # Basic version (recommended)
2
- docker pull unclecode/crawl4ai:basic-amd64
3
- docker run -p 7860:7860 unclecode/crawl4ai:basic-amd64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Basic version (recommended)
2
+ # docker pull unclecode/crawl4ai:basic-amd64
3
+ # docker run -p 7860:7860 unclecode/crawl4ai:basic-amd64
4
+
5
+ # syntax=docker/dockerfile:1.4
6
+
7
+ ARG TARGETPLATFORM
8
+ ARG BUILDPLATFORM
9
+
10
+ # Other build arguments
11
+ ARG PYTHON_VERSION=3.10
12
+
13
+ # Base stage with system dependencies
14
+ FROM python:${PYTHON_VERSION}-slim as base
15
+
16
+ # Declare ARG variables again within the build stage
17
+ ARG INSTALL_TYPE=all
18
+ ARG ENABLE_GPU=false
19
+
20
+ # Platform-specific labels
21
+ LABEL maintainer="unclecode"
22
+ LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper"
23
+ LABEL version="1.0"
24
+
25
+ # Environment setup
26
+ ENV PYTHONUNBUFFERED=1 \
27
+ PYTHONDONTWRITEBYTECODE=1 \
28
+ PIP_NO_CACHE_DIR=1 \
29
+ PIP_DISABLE_PIP_VERSION_CHECK=1 \
30
+ PIP_DEFAULT_TIMEOUT=100 \
31
+ DEBIAN_FRONTEND=noninteractive
32
+
33
+ # Install system dependencies
34
+ RUN apt-get update && apt-get install -y --no-install-recommends \
35
+ build-essential \
36
+ curl \
37
+ wget \
38
+ gnupg \
39
+ git \
40
+ cmake \
41
+ pkg-config \
42
+ python3-dev \
43
+ libjpeg-dev \
44
+ libpng-dev \
45
+ && rm -rf /var/lib/apt/lists/*
46
+
47
+ # Playwright system dependencies for Linux
48
+ RUN apt-get update && apt-get install -y --no-install-recommends \
49
+ libglib2.0-0 \
50
+ libnss3 \
51
+ libnspr4 \
52
+ libatk1.0-0 \
53
+ libatk-bridge2.0-0 \
54
+ libcups2 \
55
+ libdrm2 \
56
+ libdbus-1-3 \
57
+ libxcb1 \
58
+ libxkbcommon0 \
59
+ libx11-6 \
60
+ libxcomposite1 \
61
+ libxdamage1 \
62
+ libxext6 \
63
+ libxfixes3 \
64
+ libxrandr2 \
65
+ libgbm1 \
66
+ libpango-1.0-0 \
67
+ libcairo2 \
68
+ libasound2 \
69
+ libatspi2.0-0 \
70
+ && rm -rf /var/lib/apt/lists/*
71
+
72
+ # GPU support if enabled and architecture is supported
73
+ RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
74
+ apt-get update && apt-get install -y --no-install-recommends \
75
+ nvidia-cuda-toolkit \
76
+ && rm -rf /var/lib/apt/lists/* ; \
77
+ else \
78
+ echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \
79
+ fi
80
+
81
+ # Create and set working directory
82
+ WORKDIR /app
83
+
84
+ # Copy the entire project
85
+ COPY . .
86
+
87
+ # Install base requirements
88
+ RUN pip install --no-cache-dir -r requirements.txt
89
+
90
+ # Install required library for FastAPI
91
+ RUN pip install fastapi uvicorn psutil
92
+
93
+ # Install ML dependencies first for better layer caching
94
+ RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
95
+ pip install --no-cache-dir \
96
+ torch \
97
+ torchvision \
98
+ torchaudio \
99
+ scikit-learn \
100
+ nltk \
101
+ transformers \
102
+ tokenizers && \
103
+ python -m nltk.downloader punkt stopwords ; \
104
+ fi
105
+
106
+ # Install the package
107
+ RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
108
+ pip install ".[all]" && \
109
+ python -m crawl4ai.model_loader ; \
110
+ elif [ "$INSTALL_TYPE" = "torch" ] ; then \
111
+ pip install ".[torch]" ; \
112
+ elif [ "$INSTALL_TYPE" = "transformer" ] ; then \
113
+ pip install ".[transformer]" && \
114
+ python -m crawl4ai.model_loader ; \
115
+ else \
116
+ pip install "." ; \
117
+ fi
118
+
119
+ # Install MkDocs and required plugins
120
+ RUN pip install --no-cache-dir \
121
+ mkdocs \
122
+ mkdocs-material \
123
+ mkdocs-terminal \
124
+ pymdown-extensions
125
+
126
+ # Build MkDocs documentation
127
+ RUN mkdocs build
128
+
129
+ # Install Playwright and browsers
130
+ RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
131
+ playwright install chromium; \
132
+ elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
133
+ playwright install chromium; \
134
+ fi
135
+
136
+ # Expose port
137
+ EXPOSE 8000 11235 9222 8080 7860
138
+
139
+ # Start the FastAPI server
140
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]