drmasad commited on
Commit
216f23b
·
verified ·
1 Parent(s): 5a3a00b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -46,17 +46,22 @@ def load_model(selected_model_name):
46
  st.info("Loading the model, please wait...")
47
  model_name = model_links[selected_model_name]
48
 
49
- # Ensure the device is properly set for CUDA availability
50
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
 
 
 
 
51
 
52
- # Load the model with quantization settings
53
  model = AutoModelForCausalLM.from_pretrained(
54
  model_name,
55
  quantization_config=bnb_config,
56
  trust_remote_code=True,
57
  )
58
 
59
-
60
  # Additional configurations and training enhancements
61
  model.config.use_cache = False
62
  model = prepare_model_for_kbit_training(model)
@@ -82,6 +87,7 @@ def load_model(selected_model_name):
82
 
83
 
84
 
 
85
  # Load model and tokenizer
86
  model, tokenizer = load_model(selected_model)
87
 
 
46
  st.info("Loading the model, please wait...")
47
  model_name = model_links[selected_model_name]
48
 
49
+ # Define the BitsAndBytesConfig for quantization
50
+ bnb_config = BitsAndBytesConfig(
51
+ load_in_4bit=True,
52
+ bnb_4bit_quant_type="nf4",
53
+ bnb_4bit_compute_dtype=torch.bfloat16,
54
+ bnb_4bit_use_double_quant=False,
55
+ llm_int8_enable_fp32_cpu_offload=True,
56
+ )
57
 
58
+ # Load the model with quantization settings directly applied
59
  model = AutoModelForCausalLM.from_pretrained(
60
  model_name,
61
  quantization_config=bnb_config,
62
  trust_remote_code=True,
63
  )
64
 
 
65
  # Additional configurations and training enhancements
66
  model.config.use_cache = False
67
  model = prepare_model_for_kbit_training(model)
 
87
 
88
 
89
 
90
+
91
  # Load model and tokenizer
92
  model, tokenizer = load_model(selected_model)
93