Joash commited on
Commit
69455b9
·
1 Parent(s): b4ae3b7

Fix offline mode and improve model loading

Browse files
Files changed (2) hide show
  1. Dockerfile +3 -7
  2. src/model_manager.py +9 -9
Dockerfile CHANGED
@@ -26,21 +26,17 @@ ENV PYTHONDONTWRITEBYTECODE=1
26
  ENV PORT=7860
27
  ENV PATH="/home/user/.local/bin:${PATH}"
28
  ENV HF_HOME=/home/user/.cache/huggingface
29
- ENV TRANSFORMERS_CACHE=/home/user/.cache/huggingface
30
- # Set logging to stdout
31
- ENV LOG_FILE=/dev/stdout
32
  # Memory optimizations
33
  ENV MALLOC_ARENA_MAX=2
34
  ENV MALLOC_TRIM_THRESHOLD_=100000
35
  ENV MALLOC_MMAP_THRESHOLD_=100000
36
- # Transformers optimizations
37
- ENV TRANSFORMERS_OFFLINE=1
38
- ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
39
- ENV CUDA_LAUNCH_BLOCKING=1
40
  # Model optimizations
41
  ENV OMP_NUM_THREADS=1
42
  ENV MKL_NUM_THREADS=1
43
  ENV NUMEXPR_NUM_THREADS=1
 
 
 
44
 
45
  # Switch to non-root user
46
  USER user
 
26
  ENV PORT=7860
27
  ENV PATH="/home/user/.local/bin:${PATH}"
28
  ENV HF_HOME=/home/user/.cache/huggingface
 
 
 
29
  # Memory optimizations
30
  ENV MALLOC_ARENA_MAX=2
31
  ENV MALLOC_TRIM_THRESHOLD_=100000
32
  ENV MALLOC_MMAP_THRESHOLD_=100000
 
 
 
 
33
  # Model optimizations
34
  ENV OMP_NUM_THREADS=1
35
  ENV MKL_NUM_THREADS=1
36
  ENV NUMEXPR_NUM_THREADS=1
37
+ # Ensure offline mode is disabled
38
+ ENV HF_HUB_OFFLINE=0
39
+ ENV TRANSFORMERS_OFFLINE=0
40
 
41
  # Switch to non-root user
42
  USER user
src/model_manager.py CHANGED
@@ -3,6 +3,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
3
  import torch
4
  from huggingface_hub import login
5
  from .config import Config
 
6
 
7
  logger = logging.getLogger(__name__)
8
 
@@ -13,11 +14,15 @@ class ModelManager:
13
  self.model = None
14
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
15
 
 
 
 
 
16
  # Login to Hugging Face Hub
17
  if Config.HUGGING_FACE_TOKEN:
18
  logger.info("Logging in to Hugging Face Hub")
19
  try:
20
- login(token=Config.HUGGING_FACE_TOKEN)
21
  logger.info("Successfully logged in to Hugging Face Hub")
22
  except Exception as e:
23
  logger.error(f"Failed to login to Hugging Face Hub: {str(e)}")
@@ -34,7 +39,8 @@ class ModelManager:
34
  self.tokenizer = AutoTokenizer.from_pretrained(
35
  self.model_name,
36
  token=Config.HUGGING_FACE_TOKEN,
37
- model_max_length=1024 # Limit max length to save memory
 
38
  )
39
  # Ensure we have the necessary special tokens
40
  special_tokens = {
@@ -71,14 +77,8 @@ class ModelManager:
71
  token=Config.HUGGING_FACE_TOKEN,
72
  low_cpu_mem_usage=True,
73
  torch_dtype=torch.float16, # Use fp16 for additional memory savings
74
- max_memory={0: "4GB"}, # Limit memory usage
75
- offload_folder="offload", # Enable CPU offloading
76
- use_cache=False # Disable KV cache to save memory
77
  )
78
-
79
- # Enable gradient checkpointing
80
- self.model.gradient_checkpointing_enable()
81
-
82
  # Resize embeddings to match tokenizer
83
  self.model.resize_token_embeddings(len(self.tokenizer))
84
  logger.info("Model loaded successfully")
 
3
  import torch
4
  from huggingface_hub import login
5
  from .config import Config
6
+ import os
7
 
8
  logger = logging.getLogger(__name__)
9
 
 
14
  self.model = None
15
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
16
 
17
+ # Ensure offline mode is disabled
18
+ os.environ['HF_HUB_OFFLINE'] = '0'
19
+ os.environ['TRANSFORMERS_OFFLINE'] = '0'
20
+
21
  # Login to Hugging Face Hub
22
  if Config.HUGGING_FACE_TOKEN:
23
  logger.info("Logging in to Hugging Face Hub")
24
  try:
25
+ login(token=Config.HUGGING_FACE_TOKEN, add_to_git_credential=False)
26
  logger.info("Successfully logged in to Hugging Face Hub")
27
  except Exception as e:
28
  logger.error(f"Failed to login to Hugging Face Hub: {str(e)}")
 
39
  self.tokenizer = AutoTokenizer.from_pretrained(
40
  self.model_name,
41
  token=Config.HUGGING_FACE_TOKEN,
42
+ model_max_length=1024, # Limit max length to save memory
43
+ trust_remote_code=True
44
  )
45
  # Ensure we have the necessary special tokens
46
  special_tokens = {
 
77
  token=Config.HUGGING_FACE_TOKEN,
78
  low_cpu_mem_usage=True,
79
  torch_dtype=torch.float16, # Use fp16 for additional memory savings
80
+ trust_remote_code=True
 
 
81
  )
 
 
 
 
82
  # Resize embeddings to match tokenizer
83
  self.model.resize_token_embeddings(len(self.tokenizer))
84
  logger.info("Model loaded successfully")