selflengthen / Dockerfile
Sergidev's picture
Update Dockerfile
da440bd verified
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04
# Install system dependencies
RUN apt-get update && apt-get install -y \
git \
git-lfs \
python3.10 \
python3-pip \
python-is-python3 \
wget \
ninja-build \
gcc \
g++ \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Create a non-root user
RUN useradd -m -u 1000 user && \
chown -R user:user /app
# Install basic Python packages first
RUN pip3 install --no-cache-dir \
packaging \
setuptools \
wheel \
numpy \
torch==2.4.0
# Install CUDA toolkit
ENV CUDA_HOME=/usr/local/cuda
ENV PATH=${CUDA_HOME}/bin:${PATH}
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
# Clone Self-Lengthen repository
RUN git clone https://github.com/QwenLM/Self-Lengthen.git && \
mv Self-Lengthen/* . && \
rm -rf Self-Lengthen
# Install dependencies in order
COPY requirements.txt .
RUN pip3 install --no-cache-dir \
transformers==4.43.2 \
accelerate \
peft \
datasets \
sentencepiece \
protobuf \
tiktoken \
scipy \
gradio \
cn2an>=0.5.22 \
langdetect>=1.0.9 \
openai \
tqdm \
&& pip3 install --no-cache-dir flash-attn --no-build-isolation \
&& pip3 install --no-cache-dir vllm==0.5.5 vllm-flash-attn
# Install FastChat
RUN git clone -b self-lengthen https://github.com/quanshr/FastChat.git && \
cd FastChat && \
pip3 install ".[model_worker,webui]"
# Install LLaMA Factory
RUN pip3 install --no-cache-dir llamafactory
# Create directories and set permissions
RUN mkdir -p models results && \
chown -R user:user /app
# Switch to non-root user
USER user
# Initialize git-lfs
RUN git lfs install
# Set environment variables
ENV CUDA_VISIBLE_DEVICES=0
ENV WORLD_SIZE=1
ENV RANK=0
ENV MASTER_ADDR=localhost
ENV MASTER_PORT=29500
ENV MODEL_PATH=/app/models/base_model
ENV INSTRUCT_COUNT=5000
ENV MAX_ITER=3
# Create startup script
RUN echo '#!/bin/bash\n\
\n\
# Function to wait for service\n\
wait_for_service() {\n\
local host="$1"\n\
local port="$2"\n\
local retries=30\n\
while ! nc -z "$host" "$port" > /dev/null 2>&1; do\n\
retries=$((retries-1))\n\
if [ "$retries" -eq 0 ]; then\n\
echo "Service $host:$port is not available after maximum retries"\n\
exit 1\n\
fi\n\
echo "Waiting for service $host:$port..."\n\
sleep 2\n\
done\n\
}\n\
\n\
# Download model if needed\n\
if [ ! -d "$MODEL_PATH" ]; then\n\
echo "Downloading model..."\n\
mkdir -p "$MODEL_PATH"\n\
git clone https://huggingface.co/Qwen/Qwen2-7B-Instruct "$MODEL_PATH"\n\
fi\n\
\n\
# Start FastChat services\n\
python -m fastchat.serve.controller \
--host 0.0.0.0 \
--port 21001 > controller.log 2>&1 &\n\
\n\
# Wait for controller\n\
wait_for_service localhost 21001\n\
\n\
python -m fastchat.serve.openai_api_server \
--controller-address http://localhost:21001 \
--host 0.0.0.0 \
--port 8000 > api_server.log 2>&1 &\n\
\n\
# Wait for API server\n\
wait_for_service localhost 8000\n\
\n\
# Start model worker\n\
python -m fastchat.serve.vllm_worker \
--model-names Qwen/Qwen2-7B-Instruct \
--model-path "$MODEL_PATH" \
--controller-address http://localhost:21001 \
--host localhost \
--port 8080 \
--worker-address http://localhost:8080 > worker.log 2>&1 &\n\
\n\
# Wait for model worker\n\
wait_for_service localhost 8080\n\
\n\
# Run the training process\n\
cd /app/qwen\n\
bash run.sh --base_model="$MODEL_PATH" --instruct_count="$INSTRUCT_COUNT" --max_iter="$MAX_ITER"\n\
\n\
# Start the web interface\n\
python app.py\n' > /app/start.sh && \
chmod +x /app/start.sh
# Install netcat for service checking
USER root
RUN apt-get update && apt-get install -y netcat-openbsd && rm -rf /var/lib/apt/lists/*
USER user
# Create a simple web interface
COPY --chown=user:user app.py .
# Expose port for web interface
EXPOSE 7860 8000 21001 8080
# Command to run
ENTRYPOINT ["/app/start.sh"]