CamiloVega commited on
Commit
c092bf7
·
verified ·
1 Parent(s): 41d13d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -6,6 +6,7 @@ import logging
6
  import sys
7
  import os
8
  from accelerate import infer_auto_device_map, init_empty_weights
 
9
 
10
  # Configure logging
11
  logging.basicConfig(
@@ -45,7 +46,7 @@ try:
45
  tokenizer.pad_token = tokenizer.eos_token
46
  logger.info("Tokenizer loaded successfully")
47
 
48
- # Load model with device map
49
  logger.info("Loading model...")
50
  model = AutoModelForCausalLM.from_pretrained(
51
  model_name,
@@ -53,7 +54,12 @@ try:
53
  trust_remote_code=True,
54
  token=hf_token,
55
  device_map="auto",
56
- load_in_8bit=True
 
 
 
 
 
57
  )
58
  logger.info("Model loaded successfully")
59
 
 
6
  import sys
7
  import os
8
  from accelerate import infer_auto_device_map, init_empty_weights
9
+ import bitsandbytes as bnb
10
 
11
  # Configure logging
12
  logging.basicConfig(
 
46
  tokenizer.pad_token = tokenizer.eos_token
47
  logger.info("Tokenizer loaded successfully")
48
 
49
+ # Load model with quantization and device map
50
  logger.info("Loading model...")
51
  model = AutoModelForCausalLM.from_pretrained(
52
  model_name,
 
54
  trust_remote_code=True,
55
  token=hf_token,
56
  device_map="auto",
57
+ quantization_config={
58
+ "load_in_4bit": True,
59
+ "bnb_4bit_compute_dtype": torch.float16,
60
+ "bnb_4bit_use_double_quant": True,
61
+ "bnb_4bit_quant_type": "nf4"
62
+ }
63
  )
64
  logger.info("Model loaded successfully")
65