Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -46,17 +46,22 @@ def load_model(selected_model_name):
|
|
46 |
st.info("Loading the model, please wait...")
|
47 |
model_name = model_links[selected_model_name]
|
48 |
|
49 |
-
#
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
-
# Load the model with quantization settings
|
53 |
model = AutoModelForCausalLM.from_pretrained(
|
54 |
model_name,
|
55 |
quantization_config=bnb_config,
|
56 |
trust_remote_code=True,
|
57 |
)
|
58 |
|
59 |
-
|
60 |
# Additional configurations and training enhancements
|
61 |
model.config.use_cache = False
|
62 |
model = prepare_model_for_kbit_training(model)
|
@@ -82,6 +87,7 @@ def load_model(selected_model_name):
|
|
82 |
|
83 |
|
84 |
|
|
|
85 |
# Load model and tokenizer
|
86 |
model, tokenizer = load_model(selected_model)
|
87 |
|
|
|
46 |
st.info("Loading the model, please wait...")
|
47 |
model_name = model_links[selected_model_name]
|
48 |
|
49 |
+
# Define the BitsAndBytesConfig for quantization
|
50 |
+
bnb_config = BitsAndBytesConfig(
|
51 |
+
load_in_4bit=True,
|
52 |
+
bnb_4bit_quant_type="nf4",
|
53 |
+
bnb_4bit_compute_dtype=torch.bfloat16,
|
54 |
+
bnb_4bit_use_double_quant=False,
|
55 |
+
llm_int8_enable_fp32_cpu_offload=True,
|
56 |
+
)
|
57 |
|
58 |
+
# Load the model with quantization settings directly applied
|
59 |
model = AutoModelForCausalLM.from_pretrained(
|
60 |
model_name,
|
61 |
quantization_config=bnb_config,
|
62 |
trust_remote_code=True,
|
63 |
)
|
64 |
|
|
|
65 |
# Additional configurations and training enhancements
|
66 |
model.config.use_cache = False
|
67 |
model = prepare_model_for_kbit_training(model)
|
|
|
87 |
|
88 |
|
89 |
|
90 |
+
|
91 |
# Load model and tokenizer
|
92 |
model, tokenizer = load_model(selected_model)
|
93 |
|