Spaces:

MatrixStudio
/

Qwen-7B-gguf-API

Sleeping

File size: 1,291 Bytes

# Using a standard Ubuntu base image
FROM ubuntu:22.04

# Environment variables
ENV MODEL_NAME="Qwen-7B-gguf"
ENV MODEL_FILE="qwen7b-q4_0.gguf"
ENV MODEL_USER="MatrixStudio"
ENV DEFAULT_MODEL_BRANCH="main"
ENV MODEL_URL="https://huggingface.co/${MODEL_USER}/${MODEL_NAME}/resolve/${DEFAULT_MODEL_BRANCH}/${MODEL_FILE}"

# Installing necessary packages
RUN apt-get update && apt-get upgrade -y \
    && apt-get install -y build-essential python3 python3-pip wget curl git \
    --no-install-recommends \
    && apt-get clean && rm -rf /var/lib/apt/lists/*

# Setting up the working directory
WORKDIR /app

# Cloning the llama.cpp repository
RUN git clone https://github.com/ggerganov/llama.cpp.git

# Moving to the llama.cpp directory and building the project
WORKDIR /app/llama.cpp
RUN make

# Model download process
RUN mkdir -p models/7B && wget -O models/7B/${MODEL_FILE} ${MODEL_URL}

# Changing ownership to a non-root user
RUN useradd -m -u 1000 user && chown -R user:user /app
USER user
ENV HOME=/home/user \
    PATH=/home/user/.local/bin:$PATH
RUN chmod -R 777 /app/llama.cpp

# Setting up the application
EXPOSE 8080

# Adjust the CMD to use the absolute path of the server executable
CMD ["/app/llama.cpp/server", "-m", "/app/llama.cpp/models/7B/qwen7b-q4_0.gguf", "-c", "16000"]