Spaces:
Runtime error
Runtime error
Commit
·
bee6408
1
Parent(s):
9e13414
Update app.py
Browse files
app.py
CHANGED
@@ -14,14 +14,16 @@ model_id = "meta-llama/Llama-2-7b-chat-hf"
|
|
14 |
|
15 |
quantization_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=True)
|
16 |
device_map = {
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
22 |
}
|
23 |
|
24 |
-
|
25 |
model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device_map,quantization_config=quantization_config)
|
26 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
27 |
|
|
|
14 |
|
15 |
quantization_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=True)
|
16 |
device_map = {
|
17 |
+
"transformer.word_embeddings": "cpu",
|
18 |
+
"transformer.word_embeddings_layernorm": "cpu",
|
19 |
+
"lm_head": "cpu",
|
20 |
+
"transformer.h": "cpu",
|
21 |
+
"transformer.ln_f": "cpu",
|
22 |
+
"model.embed_tokens": "cpu",
|
23 |
+
"model.layers":"cpu",
|
24 |
+
"model.norm":"cpu"
|
25 |
}
|
26 |
|
|
|
27 |
model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device_map,quantization_config=quantization_config)
|
28 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
29 |
|