Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -48,7 +48,6 @@ def load_model(selected_model_name):
|
|
48 |
# Set a specific device
|
49 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
50 |
|
51 |
-
# Load model with device mapping
|
52 |
bnb_config = BitsAndBytesConfig(
|
53 |
load_in_4bit=True,
|
54 |
bnb_4bit_quant_type="nf4",
|
@@ -57,16 +56,19 @@ def load_model(selected_model_name):
|
|
57 |
llm_int8_enable_fp32_cpu_offload=True,
|
58 |
)
|
59 |
|
60 |
-
device_map = {
|
61 |
-
|
62 |
-
|
|
|
|
|
63 |
model = AutoModelForCausalLM.from_pretrained(
|
64 |
model_name,
|
65 |
quantization_config=bnb_config,
|
66 |
-
device_map=device_map,
|
67 |
trust_remote_code=True,
|
68 |
)
|
69 |
|
|
|
70 |
model.config.use_cache = False
|
71 |
model = prepare_model_for_kbit_training(model)
|
72 |
|
|
|
48 |
# Set a specific device
|
49 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
50 |
|
|
|
51 |
bnb_config = BitsAndBytesConfig(
|
52 |
load_in_4bit=True,
|
53 |
bnb_4bit_quant_type="nf4",
|
|
|
56 |
llm_int8_enable_fp32_cpu_offload=True,
|
57 |
)
|
58 |
|
59 |
+
device_map = {
|
60 |
+
'encoder.layer.0': 'cuda', # Keep specific parts on GPU
|
61 |
+
'decoder': 'cpu', # Offload others to CPU
|
62 |
+
}
|
63 |
+
|
64 |
model = AutoModelForCausalLM.from_pretrained(
|
65 |
model_name,
|
66 |
quantization_config=bnb_config,
|
67 |
+
device_map=device_map,
|
68 |
trust_remote_code=True,
|
69 |
)
|
70 |
|
71 |
+
|
72 |
model.config.use_cache = False
|
73 |
model = prepare_model_for_kbit_training(model)
|
74 |
|