Update configuration_Llamoe.py
Browse files- configuration_Llamoe.py +9 -9
configuration_Llamoe.py
CHANGED
@@ -16,21 +16,21 @@ class LlamoeConfig(PretrainedConfig):
|
|
16 |
def __init__(
|
17 |
self,
|
18 |
vocab_size=32000,
|
19 |
-
hidden_size=
|
20 |
-
intermediate_size=
|
21 |
-
num_hidden_layers=
|
22 |
-
num_attention_heads=
|
23 |
-
num_key_value_heads=
|
24 |
head_dim=256,
|
25 |
-
hidden_act="
|
26 |
-
max_position_embeddings=
|
27 |
initializer_range=0.02,
|
28 |
-
rms_norm_eps=1e-
|
29 |
use_cache=True,
|
30 |
pad_token_id=0,
|
31 |
eos_token_id=1,
|
32 |
bos_token_id=2,
|
33 |
-
tie_word_embeddings=
|
34 |
rope_theta=10000.0,
|
35 |
attention_bias=False,
|
36 |
attention_dropout=0.0,
|
|
|
16 |
def __init__(
|
17 |
self,
|
18 |
vocab_size=32000,
|
19 |
+
hidden_size=3072,
|
20 |
+
intermediate_size=24576,
|
21 |
+
num_hidden_layers=28,
|
22 |
+
num_attention_heads=16,
|
23 |
+
num_key_value_heads=16,
|
24 |
head_dim=256,
|
25 |
+
hidden_act="gelu",
|
26 |
+
max_position_embeddings=8192,
|
27 |
initializer_range=0.02,
|
28 |
+
rms_norm_eps=1e-6,
|
29 |
use_cache=True,
|
30 |
pad_token_id=0,
|
31 |
eos_token_id=1,
|
32 |
bos_token_id=2,
|
33 |
+
tie_word_embeddings=True,
|
34 |
rope_theta=10000.0,
|
35 |
attention_bias=False,
|
36 |
attention_dropout=0.0,
|