Spaces:
Sleeping
Sleeping
Joash
commited on
Commit
·
5f0bb6b
1
Parent(s):
1f37a6a
Optimize memory usage with 4-bit quantization and Docker settings
Browse files- Dockerfile +7 -4
- src/model_manager.py +11 -3
Dockerfile
CHANGED
@@ -4,10 +4,11 @@ FROM python:3.11-slim
|
|
4 |
# Set working directory
|
5 |
WORKDIR /app
|
6 |
|
7 |
-
# Install system dependencies
|
8 |
RUN apt-get update && apt-get install -y \
|
9 |
build-essential \
|
10 |
curl \
|
|
|
11 |
&& rm -rf /var/lib/apt/lists/*
|
12 |
|
13 |
# Create necessary directories with proper permissions
|
@@ -28,6 +29,8 @@ ENV HF_HOME=/home/user/.cache/huggingface
|
|
28 |
ENV TRANSFORMERS_CACHE=/home/user/.cache/huggingface
|
29 |
# Set logging to stdout
|
30 |
ENV LOG_FILE=/dev/stdout
|
|
|
|
|
31 |
|
32 |
# Switch to non-root user
|
33 |
USER user
|
@@ -39,7 +42,7 @@ RUN pip install --user --no-cache-dir "numpy<2.0.0"
|
|
39 |
# Copy requirements first to leverage Docker cache
|
40 |
COPY --chown=user:user requirements.txt .
|
41 |
|
42 |
-
# Install Python dependencies
|
43 |
RUN pip install --user --no-cache-dir -r requirements.txt
|
44 |
|
45 |
# Copy application code
|
@@ -48,5 +51,5 @@ COPY --chown=user:user . .
|
|
48 |
# Expose port for Hugging Face Spaces
|
49 |
EXPOSE 7860
|
50 |
|
51 |
-
# Run the application with
|
52 |
-
CMD ["python", "-u", "-m", "uvicorn", "src.api:app", "--host", "0.0.0.0", "--port", "7860", "--log-level", "debug"]
|
|
|
4 |
# Set working directory
|
5 |
WORKDIR /app
|
6 |
|
7 |
+
# Install system dependencies including cuda-toolkit for bitsandbytes
|
8 |
RUN apt-get update && apt-get install -y \
|
9 |
build-essential \
|
10 |
curl \
|
11 |
+
git \
|
12 |
&& rm -rf /var/lib/apt/lists/*
|
13 |
|
14 |
# Create necessary directories with proper permissions
|
|
|
29 |
ENV TRANSFORMERS_CACHE=/home/user/.cache/huggingface
|
30 |
# Set logging to stdout
|
31 |
ENV LOG_FILE=/dev/stdout
|
32 |
+
# Reduce memory usage
|
33 |
+
ENV MALLOC_ARENA_MAX=2
|
34 |
|
35 |
# Switch to non-root user
|
36 |
USER user
|
|
|
42 |
# Copy requirements first to leverage Docker cache
|
43 |
COPY --chown=user:user requirements.txt .
|
44 |
|
45 |
+
# Install Python dependencies with reduced memory usage
|
46 |
RUN pip install --user --no-cache-dir -r requirements.txt
|
47 |
|
48 |
# Copy application code
|
|
|
51 |
# Expose port for Hugging Face Spaces
|
52 |
EXPOSE 7860
|
53 |
|
54 |
+
# Run the application with reduced memory usage
|
55 |
+
CMD ["python", "-u", "-m", "uvicorn", "src.api:app", "--host", "0.0.0.0", "--port", "7860", "--log-level", "debug", "--workers", "1"]
|
src/model_manager.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import logging
|
2 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM,
|
3 |
import torch
|
4 |
from huggingface_hub import login
|
5 |
from .config import Config
|
@@ -54,11 +54,19 @@ class ModelManager:
|
|
54 |
logger.info(f"Loading model: {self.model_name}")
|
55 |
logger.info(f"Using device: {self.device}")
|
56 |
|
57 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
self.model = AutoModelForCausalLM.from_pretrained(
|
59 |
self.model_name,
|
60 |
device_map={"": self.device},
|
61 |
-
|
62 |
token=Config.HUGGING_FACE_TOKEN,
|
63 |
low_cpu_mem_usage=True
|
64 |
)
|
|
|
1 |
import logging
|
2 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
3 |
import torch
|
4 |
from huggingface_hub import login
|
5 |
from .config import Config
|
|
|
54 |
logger.info(f"Loading model: {self.model_name}")
|
55 |
logger.info(f"Using device: {self.device}")
|
56 |
|
57 |
+
# Configure 4-bit quantization
|
58 |
+
quantization_config = BitsAndBytesConfig(
|
59 |
+
load_in_4bit=True,
|
60 |
+
bnb_4bit_compute_dtype=torch.float16,
|
61 |
+
bnb_4bit_use_double_quant=True,
|
62 |
+
bnb_4bit_quant_type="nf4"
|
63 |
+
)
|
64 |
+
|
65 |
+
# Load model with 4-bit quantization
|
66 |
self.model = AutoModelForCausalLM.from_pretrained(
|
67 |
self.model_name,
|
68 |
device_map={"": self.device},
|
69 |
+
quantization_config=quantization_config,
|
70 |
token=Config.HUGGING_FACE_TOKEN,
|
71 |
low_cpu_mem_usage=True
|
72 |
)
|