quickstep3621 commited on
Commit
1b911aa
·
1 Parent(s): eb5aba2

Update Model

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "barchetta/barchetta-naso-22191733",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -18,6 +18,7 @@
18
  "num_attention_heads": 32,
19
  "num_hidden_layers": 32,
20
  "num_key_value_heads": 8,
 
21
  "pretraining_tp": 1,
22
  "rms_norm_eps": 1.0e-5,
23
  "rope_scaling": {
@@ -31,6 +32,7 @@
31
  "tie_word_embeddings": false,
32
  "torch_dtype": "bfloat16",
33
  "transformers_version": "4.48.1",
 
34
  "use_cache": false,
35
  "vocab_size": 128258
36
- }
 
1
  {
2
+ "_name_or_path": "workspace/trained-models/quickstep3621/nigmqxw",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
18
  "num_attention_heads": 32,
19
  "num_hidden_layers": 32,
20
  "num_key_value_heads": 8,
21
+ "pad_token_id": 128004,
22
  "pretraining_tp": 1,
23
  "rms_norm_eps": 1.0e-5,
24
  "rope_scaling": {
 
32
  "tie_word_embeddings": false,
33
  "torch_dtype": "bfloat16",
34
  "transformers_version": "4.48.1",
35
+ "unsloth_version": "2025.1.6",
36
  "use_cache": false,
37
  "vocab_size": 128258
38
+ }
generation_config.json CHANGED
@@ -2,6 +2,8 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 128000,
4
  "eos_token_id": 128001,
 
 
5
  "transformers_version": "4.48.1",
6
  "use_cache": false
7
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 128000,
4
  "eos_token_id": 128001,
5
+ "max_length": 131072,
6
+ "pad_token_id": 128004,
7
  "transformers_version": "4.48.1",
8
  "use_cache": false
9
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1e6b51d89426425533cf7c961d6b7298ff9f43416631cda80ff85a115333479
3
- size 4976715104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5fe143515e69d9be77969a732c7b9a835583edca0c13359d0ecc158b436160d
3
+ size 4976715096
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20f770b1e4c06439a3a4f8c61ca2733ca686e304695dc7c0525a24996832d6bf
3
- size 4999802768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc7eb066e73dce3242dfa415da682613419202bfef5e9dcb6ddf087716f9b7c9
3
+ size 4999802760
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17d05434e57b5388741584702dce70027389091c6be5c4e89d469ea14f61faac
3
- size 4915916232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:508baa46b1aa276aaacf8e83a41fd140fa0f2431bfeb36b9e7221fa3c25acde3
3
+ size 4915916216
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f9d828e32d2bad0b5559cfd086b6eb2ca19bceb8b7e848dcad7f8c55284a6b6
3
- size 1168155240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99e3e55db49458431710eab2d81ed874cf96747be0d4e6a5f097ed08fac657af
3
+ size 1168155232
model.safetensors.index.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "metadata": {
3
- "total_size": 16060555264
 
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00004-of-00004.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 16060555264,
4
+ "path": "workspace/trained-models/quickstep3621/nigmqxw"
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00004-of-00004.safetensors",
special_tokens_map.json CHANGED
@@ -17,11 +17,5 @@
17
  "rstrip": false,
18
  "single_word": false
19
  },
20
- "pad_token": {
21
- "content": "<|im_end|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false
26
- }
27
  }
 
17
  "rstrip": false,
18
  "single_word": false
19
  },
20
+ "pad_token": "<|finetune_right_pad_id|>"
 
 
 
 
 
 
21
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:510d12ec255f4cb0304aa5428d699c354c1a49696b427a2748a7b03bb7bbb575
3
- size 17210296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eda94f467b2242e7aefa3816ab89a430a7df755e340b598814c87fb7c92633a4
3
+ size 17210395
tokenizer_config.json CHANGED
@@ -2079,6 +2079,7 @@
2079
  "attention_mask"
2080
  ],
2081
  "model_max_length": 131072,
2082
- "pad_token": "<|im_end|>",
 
2083
  "tokenizer_class": "PreTrainedTokenizerFast"
2084
  }
 
2079
  "attention_mask"
2080
  ],
2081
  "model_max_length": 131072,
2082
+ "pad_token": "<|finetune_right_pad_id|>",
2083
+ "padding_side": "left",
2084
  "tokenizer_class": "PreTrainedTokenizerFast"
2085
  }