damienbenveniste commited on
Commit
ef01153
1 Parent(s): 28c4623

Training in progress, step 125

Browse files
config.json CHANGED
@@ -3,8 +3,10 @@
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
 
6
  "bos_token_id": 1,
7
  "eos_token_id": 2,
 
8
  "hidden_act": "silu",
9
  "hidden_size": 768,
10
  "initializer_range": 0.02,
@@ -19,7 +21,7 @@
19
  "sliding_window": 768,
20
  "tie_word_embeddings": false,
21
  "torch_dtype": "float32",
22
- "transformers_version": "4.35.2",
23
  "use_cache": true,
24
  "vocab_size": 32000
25
  }
 
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
6
+ "attention_dropout": 0.0,
7
  "bos_token_id": 1,
8
  "eos_token_id": 2,
9
+ "head_dim": 48,
10
  "hidden_act": "silu",
11
  "hidden_size": 768,
12
  "initializer_range": 0.02,
 
21
  "sliding_window": 768,
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "float32",
24
+ "transformers_version": "4.44.2",
25
  "use_cache": true,
26
  "vocab_size": 32000
27
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8286eb4edb5f3926f382a3897d1f8eb47fab7695a7e389a2a47d4618657a74c4
3
  size 338197712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebb33abf93ee40404c3303f7c7de4b1a4f76f60ea0a4cd7bed57cb2ecb391a07
3
  size 338197712
tokenizer.json CHANGED
@@ -36,23 +36,13 @@
36
  "special": true
37
  }
38
  ],
39
- "normalizer": {
40
- "type": "Sequence",
41
- "normalizers": [
42
- {
43
- "type": "Prepend",
44
- "prepend": "▁"
45
- },
46
- {
47
- "type": "Replace",
48
- "pattern": {
49
- "String": " "
50
- },
51
- "content": "▁"
52
- }
53
- ]
54
  },
55
- "pre_tokenizer": null,
56
  "post_processor": {
57
  "type": "TemplateProcessing",
58
  "single": [
@@ -139,6 +129,7 @@
139
  "end_of_word_suffix": null,
140
  "fuse_unk": true,
141
  "byte_fallback": true,
 
142
  "vocab": {
143
  "<unk>": 0,
144
  "<s>": 1,
 
36
  "special": true
37
  }
38
  ],
39
+ "normalizer": null,
40
+ "pre_tokenizer": {
41
+ "type": "Metaspace",
42
+ "replacement": "▁",
43
+ "prepend_scheme": "first",
44
+ "split": false
 
 
 
 
 
 
 
 
 
45
  },
 
46
  "post_processor": {
47
  "type": "TemplateProcessing",
48
  "single": [
 
129
  "end_of_word_suffix": null,
130
  "fuse_unk": true,
131
  "byte_fallback": true,
132
+ "ignore_merges": false,
133
  "vocab": {
134
  "<unk>": 0,
135
  "<s>": 1,
tokenizer_config.json CHANGED
@@ -1,4 +1,7 @@
1
  {
 
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",
@@ -29,8 +32,8 @@
29
  "bos_token": "<s>",
30
  "clean_up_tokenization_spaces": false,
31
  "eos_token": "</s>",
32
- "legacy": true,
33
- "max_length": 30000,
34
  "model_max_length": 1000000000000000019884624838656,
35
  "pad_to_multiple_of": null,
36
  "pad_token": "</s>",
@@ -38,7 +41,10 @@
38
  "padding_side": "left",
39
  "sp_model_kwargs": {},
40
  "spaces_between_special_tokens": false,
 
41
  "tokenizer_class": "LlamaTokenizer",
 
 
42
  "unk_token": "<unk>",
43
  "use_default_system_prompt": false
44
  }
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
 
32
  "bos_token": "<s>",
33
  "clean_up_tokenization_spaces": false,
34
  "eos_token": "</s>",
35
+ "legacy": false,
36
+ "max_length": 512,
37
  "model_max_length": 1000000000000000019884624838656,
38
  "pad_to_multiple_of": null,
39
  "pad_token": "</s>",
 
41
  "padding_side": "left",
42
  "sp_model_kwargs": {},
43
  "spaces_between_special_tokens": false,
44
+ "stride": 0,
45
  "tokenizer_class": "LlamaTokenizer",
46
+ "truncation_side": "right",
47
+ "truncation_strategy": "longest_first",
48
  "unk_token": "<unk>",
49
  "use_default_system_prompt": false
50
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed223478ce6e7fdecdb7ca00270a13bf737d4cb94f118b7238740f15ae576b6e
3
- size 4536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:715ac413a238a890a767d1af2d51cb6076befa8e0ae7024302e13aeaa1dd1341
3
+ size 5432