bluuebunny commited on
Commit
506704f
·
verified ·
1 Parent(s): 25edd26

fixed padding token for finetuning

Browse files
config.json CHANGED
@@ -19,7 +19,7 @@
19
  "num_attention_heads": 32,
20
  "num_hidden_layers": 40,
21
  "num_key_value_heads": 8,
22
- "pad_token_id": 0,
23
  "residual_multiplier": 0.22,
24
  "rms_norm_eps": 1e-05,
25
  "rope_scaling": null,
 
19
  "num_attention_heads": 32,
20
  "num_hidden_layers": 40,
21
  "num_key_value_heads": 8,
22
+ "pad_token_id": 49152,
23
  "residual_multiplier": 0.22,
24
  "rms_norm_eps": 1e-05,
25
  "rope_scaling": null,
generation_config.json CHANGED
@@ -2,6 +2,6 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "eos_token_id": 0,
5
- "pad_token_id": 0,
6
  "transformers_version": "4.46.0"
7
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "eos_token_id": 0,
5
+ "pad_token_id": 49152,
6
  "transformers_version": "4.46.0"
7
  }
special_tokens_map.json CHANGED
@@ -18,7 +18,8 @@
18
  "<commit_before>",
19
  "<commit_msg>",
20
  "<commit_after>",
21
- "<reponame>"
 
22
  ],
23
  "bos_token": {
24
  "content": "<|endoftext|>",
@@ -35,7 +36,7 @@
35
  "single_word": false
36
  },
37
  "pad_token": {
38
- "content": "<|endoftext|>",
39
  "lstrip": false,
40
  "normalized": false,
41
  "rstrip": false,
 
18
  "<commit_before>",
19
  "<commit_msg>",
20
  "<commit_after>",
21
+ "<reponame>",
22
+ "<|finetune_left_pad_id|>"
23
  ],
24
  "bos_token": {
25
  "content": "<|endoftext|>",
 
36
  "single_word": false
37
  },
38
  "pad_token": {
39
+ "content": "<|finetune_left_pad_id|>",
40
  "lstrip": false,
41
  "normalized": false,
42
  "rstrip": false,
tokenizer.json CHANGED
@@ -173,6 +173,15 @@
173
  "rstrip": false,
174
  "normalized": false,
175
  "special": true
 
 
 
 
 
 
 
 
 
176
  }
177
  ],
178
  "normalizer": null,
@@ -49359,7 +49368,8 @@
49359
  "loot": 49148,
49360
  "mpath": 49149,
49361
  "ĠSIP": 49150,
49362
- "getOptions": 49151
 
49363
  },
49364
  "merges": [
49365
  "Ġ Ġ",
 
173
  "rstrip": false,
174
  "normalized": false,
175
  "special": true
176
+ },
177
+ {
178
+ "id": 49152,
179
+ "content": "<|finetune_left_pad_id|>",
180
+ "single_word": false,
181
+ "lstrip": false,
182
+ "rstrip": false,
183
+ "normalized": false,
184
+ "special": true
185
  }
186
  ],
187
  "normalizer": null,
 
49368
  "loot": 49148,
49369
  "mpath": 49149,
49370
  "ĠSIP": 49150,
49371
+ "getOptions": 49151,
49372
+ "<|finetune_left_pad_id|>":49152
49373
  },
49374
  "merges": [
49375
  "Ġ Ġ",
tokenizer_config.json CHANGED
@@ -152,6 +152,14 @@
152
  "rstrip": false,
153
  "single_word": false,
154
  "special": true
 
 
 
 
 
 
 
 
155
  }
156
  },
157
  "additional_special_tokens": [
@@ -173,15 +181,16 @@
173
  "<commit_before>",
174
  "<commit_msg>",
175
  "<commit_after>",
176
- "<reponame>"
 
177
  ],
178
  "bos_token": "<|endoftext|>",
179
  "clean_up_tokenization_spaces": true,
180
  "eos_token": "<|endoftext|>",
181
  "model_max_length": 9223372036854775807,
182
- "pad_token": "<|endoftext|>",
183
  "padding_side": "left",
184
  "tokenizer_class": "GPT2Tokenizer",
185
  "unk_token": "<|endoftext|>",
186
- "vocab_size": 49152
187
  }
 
152
  "rstrip": false,
153
  "single_word": false,
154
  "special": true
155
+ },
156
+ "49152": {
157
+ "content": "<|finetune_left_pad_id|>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": true
163
  }
164
  },
165
  "additional_special_tokens": [
 
181
  "<commit_before>",
182
  "<commit_msg>",
183
  "<commit_after>",
184
+ "<reponame>",
185
+ "<|finetune_left_pad_id|>"
186
  ],
187
  "bos_token": "<|endoftext|>",
188
  "clean_up_tokenization_spaces": true,
189
  "eos_token": "<|endoftext|>",
190
  "model_max_length": 9223372036854775807,
191
+ "pad_token": "<|finetune_left_pad_id|>",
192
  "padding_side": "left",
193
  "tokenizer_class": "GPT2Tokenizer",
194
  "unk_token": "<|endoftext|>",
195
+ "vocab_size": 49153
196
  }