update model to step 127090
Browse files- .ipynb_checkpoints/README-checkpoint.md +14 -12
- README.md +14 -12
- config.json +1 -1
- pytorch_model.bin +1 -1
.ipynb_checkpoints/README-checkpoint.md
CHANGED
@@ -19,19 +19,21 @@ More precise versions will be published shortly.<br/>
|
|
19 |
<br/>
|
20 |
Train on my server, i have studied and adapted the model starting from the repository https://github.com/karpathy/llama2.c<br/>
|
21 |
<br/>
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
28 |
<br/>
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
35 |
<br/>
|
36 |
num decayed parameter tensors: 225, with 251,068,416 parameters<br/>
|
37 |
num non-decayed parameter tensors: 65, with 49,920 parameters<br/>
|
|
|
19 |
<br/>
|
20 |
Train on my server, i have studied and adapted the model starting from the repository https://github.com/karpathy/llama2.c<br/>
|
21 |
<br/>
|
22 |
+
- LLama model parameter:
|
23 |
+
- max_seq_len: (7b = 2048) The maximum sequence length for input data.
|
24 |
+
- dim (7b= 4096) Represents the dimensionalityl
|
25 |
+
- n_layers: (7b = 32) The number of layers
|
26 |
+
- n_heads: (7b = 32) Determines the number of attention heads
|
27 |
+
- n_kv_heads: (7b = 32) The number of key and value heads
|
28 |
+
- multiple_of: (7b = 256) A value used to make the SwiGLU hidden layer size a multiple of a large power of 2
|
29 |
<br/>
|
30 |
+
- Model parameter
|
31 |
+
- max_seq_len = 1024
|
32 |
+
- dim = 768
|
33 |
+
- n_layers = 32
|
34 |
+
- n_heads = 32
|
35 |
+
- n_kv_heads = 32
|
36 |
+
- multiple_of = 32
|
37 |
<br/>
|
38 |
num decayed parameter tensors: 225, with 251,068,416 parameters<br/>
|
39 |
num non-decayed parameter tensors: 65, with 49,920 parameters<br/>
|
README.md
CHANGED
@@ -19,19 +19,21 @@ More precise versions will be published shortly.<br/>
|
|
19 |
<br/>
|
20 |
Train on my server, i have studied and adapted the model starting from the repository https://github.com/karpathy/llama2.c<br/>
|
21 |
<br/>
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
28 |
<br/>
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
35 |
<br/>
|
36 |
num decayed parameter tensors: 225, with 251,068,416 parameters<br/>
|
37 |
num non-decayed parameter tensors: 65, with 49,920 parameters<br/>
|
|
|
19 |
<br/>
|
20 |
Train on my server, i have studied and adapted the model starting from the repository https://github.com/karpathy/llama2.c<br/>
|
21 |
<br/>
|
22 |
+
- LLama model parameter:
|
23 |
+
- max_seq_len: (7b = 2048) The maximum sequence length for input data.
|
24 |
+
- dim (7b= 4096) Represents the dimensionalityl
|
25 |
+
- n_layers: (7b = 32) The number of layers
|
26 |
+
- n_heads: (7b = 32) Determines the number of attention heads
|
27 |
+
- n_kv_heads: (7b = 32) The number of key and value heads
|
28 |
+
- multiple_of: (7b = 256) A value used to make the SwiGLU hidden layer size a multiple of a large power of 2
|
29 |
<br/>
|
30 |
+
- Model parameter
|
31 |
+
- max_seq_len = 1024
|
32 |
+
- dim = 768
|
33 |
+
- n_layers = 32
|
34 |
+
- n_heads = 32
|
35 |
+
- n_kv_heads = 32
|
36 |
+
- multiple_of = 32
|
37 |
<br/>
|
38 |
num decayed parameter tensors: 225, with 251,068,416 parameters<br/>
|
39 |
num non-decayed parameter tensors: 65, with 49,920 parameters<br/>
|
config.json
CHANGED
@@ -20,7 +20,7 @@
|
|
20 |
"rope_scaling": null,
|
21 |
"rope_theta": 10000.0,
|
22 |
"tie_word_embeddings": true,
|
23 |
-
"transformers_version": "4.
|
24 |
"use_cache": true,
|
25 |
"vocab_size": 32000
|
26 |
}
|
|
|
20 |
"rope_scaling": null,
|
21 |
"rope_theta": 10000.0,
|
22 |
"tie_word_embeddings": true,
|
23 |
+
"transformers_version": "4.39.3",
|
24 |
"use_cache": true,
|
25 |
"vocab_size": 32000
|
26 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1004567442
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92be5db673d53c20628c7aab33078d4c7c4c44db04ddda6412317e58b14b7346
|
3 |
size 1004567442
|