Update config.json
Browse filesAdd `pad_head_dim_to_multiple_of` field to allow the use of [memory efficient attention](https://pytorch.org/blog/accelerated-pytorch-2/#:~:text=The%20head%20dimension%20must%20be%20a%20multiple%20of%208%20for%2016%2Dbit%20floating%20point%20numbers%20and%20a%20multiple%20of%204%20for%2032%2Dbit%20floating%20point%20numbers.%20At%20present%2C%20the%20maximum%20head_dim%20support%20for%20the%20Flash%20Attention%20custom%20kernel%20is%20128).
- config.json +2 -1
config.json
CHANGED
@@ -28,5 +28,6 @@
|
|
28 |
"torch_dtype": "float32",
|
29 |
"transformers_version": "4.48.0.dev0",
|
30 |
"use_cache": true,
|
31 |
-
"vocab_size": 32768
|
|
|
32 |
}
|
|
|
28 |
"torch_dtype": "float32",
|
29 |
"transformers_version": "4.48.0.dev0",
|
30 |
"use_cache": true,
|
31 |
+
"vocab_size": 32768,
|
32 |
+
"pad_head_dim_to_multiple_of": 8
|
33 |
}
|